mirror of https://github.com/apache/poi.git
Start on HSSFOptimiser, which removes un-needed cell styles and fonts, fixing up references as it does so
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@677041 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
029a5be2ac
commit
9cb74f87fe
|
@ -37,6 +37,7 @@
|
|||
|
||||
<!-- Don't forget to update status.xml too! -->
|
||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45380 - Missing return keyword in ArrayPtg.toFormulaString()</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">44958 - Record level support for Data Tables. (No formula parser support though)</action>
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
<!-- Don't forget to update changes.xml too! -->
|
||||
<changes>
|
||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">New helper, HSSFOptimiser, which handles removing duplicated font and style records, to avoid going over the limits in Excel</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45322 - Fixed NPE in HSSFSheet.autoSizeColumn() when cell number format was not found</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">45380 - Missing return keyword in ArrayPtg.toFormulaString()</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">44958 - Record level support for Data Tables. (No formula parser support though)</action>
|
||||
|
|
|
@ -439,6 +439,23 @@ public class UnicodeString
|
|||
this.field_5_ext_rst = ext_rst;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Swaps all use in the string of one font index
|
||||
* for use of a different font index.
|
||||
* Normally only called when fonts have been
|
||||
* removed / re-ordered
|
||||
*/
|
||||
public void swapFontUse(short oldFontIndex, short newFontIndex) {
|
||||
Iterator i = field_4_format_runs.iterator();
|
||||
while(i.hasNext()) {
|
||||
FormatRun run = (FormatRun)i.next();
|
||||
if(run.fontIndex == oldFontIndex) {
|
||||
run.fontIndex = newFontIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* unlike the real records we return the same as "getString()" rather than debug info
|
||||
* @see #getDebugInfo()
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
/* ====================================================================
|
||||
Copyright 2002-2004 Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hssf.usermodel;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.hssf.record.ExtendedFormatRecord;
|
||||
import org.apache.poi.hssf.record.FontRecord;
|
||||
import org.apache.poi.hssf.record.UnicodeString;
|
||||
|
||||
/**
|
||||
* Excel can get cranky if you give it files containing too
|
||||
* many (especially duplicate) objects, and this class can
|
||||
* help to avoid those.
|
||||
* In general, it's much better to make sure you don't
|
||||
* duplicate the objects in your code, as this is likely
|
||||
* to be much faster than creating lots and lots of
|
||||
* excel objects+records, only to optimise them down to
|
||||
* many fewer at a later stage.
|
||||
* However, sometimes this is too hard / tricky to do, which
|
||||
* is where the use of this class comes in.
|
||||
*/
|
||||
public class HSSFOptimiser {
|
||||
/**
|
||||
* Goes through the Workbook, optimising the fonts by
|
||||
* removing duplicate ones.
|
||||
* For now, only works on fonts used in {@link HSSFCellStyle}
|
||||
* and {@link HSSFRichTextString}. Any other font uses
|
||||
* (eg charts, pictures) may well end up broken!
|
||||
* This can be a slow operation, especially if you have
|
||||
* lots of cells, cell styles or rich text strings
|
||||
* @param workbook The workbook in which to optimise the fonts
|
||||
*/
|
||||
public static void optimiseFonts(HSSFWorkbook workbook) {
|
||||
// Where each font has ended up, and if we need to
|
||||
// delete the record for it. Start off with no change
|
||||
short[] newPos =
|
||||
new short[workbook.getWorkbook().getNumberOfFontRecords()+1];
|
||||
boolean[] zapRecords = new boolean[newPos.length];
|
||||
for(int i=0; i<newPos.length; i++) {
|
||||
newPos[i] = (short)i;
|
||||
zapRecords[i] = false;
|
||||
}
|
||||
|
||||
// Get each font record, so we can do deletes
|
||||
// without getting confused
|
||||
FontRecord[] frecs = new FontRecord[newPos.length];
|
||||
for(int i=0; i<newPos.length; i++) {
|
||||
// There is no 4!
|
||||
if(i == 4) continue;
|
||||
|
||||
frecs[i] = workbook.getWorkbook().getFontRecordAt(i);
|
||||
}
|
||||
|
||||
// Loop over each font, seeing if it is the same
|
||||
// as an earlier one. If it is, point users of the
|
||||
// later duplicate copy to the earlier one, and
|
||||
// mark the later one as needing deleting
|
||||
// Note - don't change built in fonts (those before 5)
|
||||
for(int i=5; i<newPos.length; i++) {
|
||||
// Check this one for being a duplicate
|
||||
// of an earlier one
|
||||
int earlierDuplicate = -1;
|
||||
for(int j=0; j<i && earlierDuplicate == -1; j++) {
|
||||
if(j == 4) continue;
|
||||
|
||||
FontRecord frCheck = workbook.getWorkbook().getFontRecordAt(j);
|
||||
if(frCheck.sameProperties(frecs[i])) {
|
||||
earlierDuplicate = j;
|
||||
}
|
||||
}
|
||||
|
||||
// If we got a duplicate, mark it as such
|
||||
if(earlierDuplicate != -1) {
|
||||
newPos[i] = (short)earlierDuplicate;
|
||||
zapRecords[i] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Update the new positions based on
|
||||
// deletes that have occurred between
|
||||
// the start and them
|
||||
// Only need to worry about user fonts
|
||||
for(int i=5; i<newPos.length; i++) {
|
||||
// Find the number deleted to that
|
||||
// point, and adjust
|
||||
short preDeletePos = newPos[i];
|
||||
short newPosition = preDeletePos;
|
||||
for(int j=0; j<preDeletePos; j++) {
|
||||
if(zapRecords[j]) newPosition--;
|
||||
}
|
||||
|
||||
// Update the new position
|
||||
newPos[i] = newPosition;
|
||||
}
|
||||
|
||||
// Zap the un-needed user font records
|
||||
for(int i=5; i<newPos.length; i++) {
|
||||
if(zapRecords[i]) {
|
||||
workbook.getWorkbook().removeFontRecord(
|
||||
frecs[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Tell HSSFWorkbook that it needs to
|
||||
// re-start its HSSFFontCache
|
||||
workbook.resetFontCache();
|
||||
|
||||
// Update the cell styles to point at the
|
||||
// new locations of the fonts
|
||||
for(int i=0; i<workbook.getWorkbook().getNumExFormats(); i++) {
|
||||
ExtendedFormatRecord xfr = workbook.getWorkbook().getExFormatAt(i);
|
||||
xfr.setFontIndex(
|
||||
newPos[ xfr.getFontIndex() ]
|
||||
);
|
||||
}
|
||||
|
||||
// Update the rich text strings to point at
|
||||
// the new locations of the fonts
|
||||
// Remember that one underlying unicode string
|
||||
// may be shared by multiple RichTextStrings!
|
||||
HashSet doneUnicodeStrings = new HashSet();
|
||||
for(int sheetNum=0; sheetNum<workbook.getNumberOfSheets(); sheetNum++) {
|
||||
HSSFSheet s = workbook.getSheetAt(sheetNum);
|
||||
Iterator rIt = s.rowIterator();
|
||||
while(rIt.hasNext()) {
|
||||
HSSFRow row = (HSSFRow)rIt.next();
|
||||
Iterator cIt = row.cellIterator();
|
||||
while(cIt.hasNext()) {
|
||||
HSSFCell cell = (HSSFCell)cIt.next();
|
||||
if(cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
|
||||
HSSFRichTextString rtr = cell.getRichStringCellValue();
|
||||
UnicodeString u = rtr.getRawUnicodeString();
|
||||
|
||||
// Have we done this string already?
|
||||
if(! doneUnicodeStrings.contains(u)) {
|
||||
// Update for each new position
|
||||
for(short i=5; i<newPos.length; i++) {
|
||||
if(i != newPos[i]) {
|
||||
u.swapFontUse(i, newPos[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Mark as done
|
||||
doneUnicodeStrings.add(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Goes through the Wokrbook, optimising the cell styles
|
||||
* by removing duplicate ones.
|
||||
* For best results, optimise the fonts via a call to
|
||||
* {@link #optimiseFonts(HSSFWorkbook)} first.
|
||||
* @param workbook The workbook in which to optimise the cell styles
|
||||
*/
|
||||
public static void optimiseCellStyles(HSSFWorkbook workbook) {
|
||||
|
||||
}
|
||||
}
|
|
@ -67,7 +67,7 @@ public class HSSFRichTextString
|
|||
|
||||
/** Called whenever the unicode string is modified. When it is modified
|
||||
* we need to create a new SST index, so that other LabelSSTRecords will not
|
||||
* be affected by changes tat we make to this string.
|
||||
* be affected by changes that we make to this string.
|
||||
*/
|
||||
private UnicodeString cloneStringIfRequired() {
|
||||
if (book == null)
|
||||
|
@ -167,10 +167,25 @@ public class HSSFRichTextString
|
|||
return string.getString();
|
||||
}
|
||||
|
||||
/** Used internally by the HSSFCell to get the internal string value*/
|
||||
/**
|
||||
* Used internally by the HSSFCell to get the internal
|
||||
* string value.
|
||||
* Will ensure the string is not shared
|
||||
*/
|
||||
UnicodeString getUnicodeString() {
|
||||
return cloneStringIfRequired();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the raw, probably shared Unicode String.
|
||||
* Used when tweaking the styles, eg updating font
|
||||
* positions.
|
||||
* Changes to this string may well effect
|
||||
* other RichTextStrings too!
|
||||
*/
|
||||
UnicodeString getRawUnicodeString() {
|
||||
return string;
|
||||
}
|
||||
|
||||
/** Used internally by the HSSFCell to set the internal string value*/
|
||||
void setUnicodeString(UnicodeString str) {
|
||||
|
|
|
@ -1073,6 +1073,16 @@ public class HSSFWorkbook extends POIDocument
|
|||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the fonts cache, causing all new calls
|
||||
* to getFontAt() to create new objects.
|
||||
* Should only be called after deleting fonts,
|
||||
* and that's not something you should normally do
|
||||
*/
|
||||
protected void resetFontCache() {
|
||||
fonts = new Hashtable();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new Cell style and add it to the workbook's style table
|
||||
|
|
|
@ -47,6 +47,7 @@ public class AllUserModelTests {
|
|||
result.addTestSuite(TestHSSFDateUtil.class);
|
||||
result.addTestSuite(TestHSSFHeaderFooter.class);
|
||||
result.addTestSuite(TestHSSFHyperlink.class);
|
||||
result.addTestSuite(TestHSSFOptimiser.class);
|
||||
result.addTestSuite(TestHSSFPalette.class);
|
||||
result.addTestSuite(TestHSSFPatriarch.class);
|
||||
result.addTestSuite(TestHSSFPicture.class);
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
/* ====================================================================
|
||||
Copyright 2002-2004 Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hssf.usermodel;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public class TestHSSFOptimiser extends TestCase {
|
||||
public void testDoesNoHarmIfNothingToDo() throws Exception {
|
||||
HSSFWorkbook wb = new HSSFWorkbook();
|
||||
|
||||
HSSFFont f = wb.createFont();
|
||||
f.setFontName("Testing");
|
||||
HSSFCellStyle s = wb.createCellStyle();
|
||||
s.setFont(f);
|
||||
|
||||
assertEquals(5, wb.getNumberOfFonts());
|
||||
assertEquals(22, wb.getNumCellStyles());
|
||||
|
||||
// Optimise fonts
|
||||
HSSFOptimiser.optimiseFonts(wb);
|
||||
|
||||
assertEquals(5, wb.getNumberOfFonts());
|
||||
assertEquals(22, wb.getNumCellStyles());
|
||||
|
||||
assertEquals(f, s.getFont(wb));
|
||||
|
||||
// Optimise styles
|
||||
// HSSFOptimiser.optimiseCellStyles(wb);
|
||||
|
||||
assertEquals(5, wb.getNumberOfFonts());
|
||||
assertEquals(22, wb.getNumCellStyles());
|
||||
|
||||
assertEquals(f, s.getFont(wb));
|
||||
}
|
||||
|
||||
public void testOptimiseFonts() throws Exception {
|
||||
HSSFWorkbook wb = new HSSFWorkbook();
|
||||
|
||||
// Add 6 fonts, some duplicates
|
||||
HSSFFont f1 = wb.createFont();
|
||||
f1.setFontHeight((short)11);
|
||||
f1.setFontName("Testing");
|
||||
|
||||
HSSFFont f2 = wb.createFont();
|
||||
f2.setFontHeight((short)22);
|
||||
f2.setFontName("Also Testing");
|
||||
|
||||
HSSFFont f3 = wb.createFont();
|
||||
f3.setFontHeight((short)33);
|
||||
f3.setFontName("Unique");
|
||||
|
||||
HSSFFont f4 = wb.createFont();
|
||||
f4.setFontHeight((short)11);
|
||||
f4.setFontName("Testing");
|
||||
|
||||
HSSFFont f5 = wb.createFont();
|
||||
f5.setFontHeight((short)22);
|
||||
f5.setFontName("Also Testing");
|
||||
|
||||
HSSFFont f6 = wb.createFont();
|
||||
f6.setFontHeight((short)66);
|
||||
f6.setFontName("Also Unique");
|
||||
|
||||
|
||||
|
||||
// Use all three of the four in cell styles
|
||||
HSSFCellStyle cs1 = wb.createCellStyle();
|
||||
cs1.setFont(f1);
|
||||
assertEquals(5, cs1.getFontIndex());
|
||||
|
||||
HSSFCellStyle cs2 = wb.createCellStyle();
|
||||
cs2.setFont(f4);
|
||||
assertEquals(8, cs2.getFontIndex());
|
||||
|
||||
HSSFCellStyle cs3 = wb.createCellStyle();
|
||||
cs3.setFont(f5);
|
||||
assertEquals(9, cs3.getFontIndex());
|
||||
|
||||
HSSFCellStyle cs4 = wb.createCellStyle();
|
||||
cs4.setFont(f6);
|
||||
assertEquals(10, cs4.getFontIndex());
|
||||
|
||||
|
||||
// And three in rich text
|
||||
HSSFSheet s = wb.createSheet();
|
||||
HSSFRow r = s.createRow(0);
|
||||
|
||||
HSSFRichTextString rtr1 = new HSSFRichTextString("Test");
|
||||
rtr1.applyFont(0, 2, f1);
|
||||
rtr1.applyFont(3, 4, f2);
|
||||
r.createCell((short)0).setCellValue(rtr1);
|
||||
|
||||
HSSFRichTextString rtr2 = new HSSFRichTextString("AlsoTest");
|
||||
rtr2.applyFont(0, 2, f3);
|
||||
rtr2.applyFont(3, 5, f5);
|
||||
rtr2.applyFont(6, 8, f6);
|
||||
r.createCell((short)1).setCellValue(rtr2);
|
||||
|
||||
|
||||
// Check what we have now
|
||||
assertEquals(10, wb.getNumberOfFonts());
|
||||
assertEquals(25, wb.getNumCellStyles());
|
||||
|
||||
// Optimise
|
||||
HSSFOptimiser.optimiseFonts(wb);
|
||||
|
||||
// Check font count
|
||||
assertEquals(8, wb.getNumberOfFonts());
|
||||
assertEquals(25, wb.getNumCellStyles());
|
||||
|
||||
// Check font use in cell styles
|
||||
assertEquals(5, cs1.getFontIndex());
|
||||
assertEquals(5, cs2.getFontIndex()); // duplicate of 1
|
||||
assertEquals(6, cs3.getFontIndex()); // duplicate of 2
|
||||
assertEquals(8, cs4.getFontIndex()); // two have gone
|
||||
|
||||
|
||||
// And in rich text
|
||||
|
||||
// RTR 1 had f1 and f2, unchanged
|
||||
assertEquals(5, r.getCell(0).getRichStringCellValue().getFontAtIndex(0));
|
||||
assertEquals(5, r.getCell(0).getRichStringCellValue().getFontAtIndex(1));
|
||||
assertEquals(6, r.getCell(0).getRichStringCellValue().getFontAtIndex(3));
|
||||
assertEquals(6, r.getCell(0).getRichStringCellValue().getFontAtIndex(4));
|
||||
|
||||
// RTR 2 had f3 (unchanged), f5 (=f2) and f6 (moved down)
|
||||
assertEquals(7, r.getCell(1).getRichStringCellValue().getFontAtIndex(0));
|
||||
assertEquals(7, r.getCell(1).getRichStringCellValue().getFontAtIndex(1));
|
||||
assertEquals(6, r.getCell(1).getRichStringCellValue().getFontAtIndex(3));
|
||||
assertEquals(6, r.getCell(1).getRichStringCellValue().getFontAtIndex(4));
|
||||
assertEquals(8, r.getCell(1).getRichStringCellValue().getFontAtIndex(6));
|
||||
assertEquals(8, r.getCell(1).getRichStringCellValue().getFontAtIndex(7));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue