diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java index da1d2178d7..5a3d384509 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTInfoSubRecord.java @@ -74,6 +74,7 @@ import org.apache.poi.util.LittleEndian; public class ExtSSTInfoSubRecord extends Record { + public static final int INFO_SIZE = 8; public final static short sid = 0xFFF; // only here for conformance, doesn't really have an sid private int field_1_stream_pos; // stream pointer to the SST record diff --git a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java index 51ce742e24..5a87f1c78c 100644 --- a/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/ExtSSTRecord.java @@ -75,7 +75,10 @@ import java.util.ArrayList; public class ExtSSTRecord extends Record { - private static final int DEFAULT_BUCKET_SIZE = 8; + public static final int DEFAULT_BUCKET_SIZE = 8; + //Cant seem to find this documented but from the biffviewer it is clear that + //Excel only records the indexes for the first 128 buckets. + public static final int MAX_BUCKETS = 128; public final static short sid = 0xff; private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE; private ArrayList field_2_sst_info; @@ -197,17 +200,35 @@ public class ExtSSTRecord for (int k = 0; k < getNumInfoRecords(); k++) { ExtSSTInfoSubRecord rec = getInfoRecordAt(k); - pos += rec.serialize(pos + offset, data); + int length = rec.serialize(pos + offset, data); + pos += length; } return pos; } + /** Returns the size of this record */ public int getRecordSize() { return 6+8*getNumInfoRecords(); } + public static final int getNumberOfInfoRecsForStrings(int numStrings) { + int infoRecs = (numStrings / DEFAULT_BUCKET_SIZE); + if ((numStrings % DEFAULT_BUCKET_SIZE) != 0) + infoRecs ++; + //Excel seems to max out after 128 info records. + //This isnt really documented anywhere... + if (infoRecs > MAX_BUCKETS) + infoRecs = MAX_BUCKETS; + return infoRecs; + } + + /** Given a number of strings (in the sst), returns the size of the extsst record*/ + public static final int getRecordSizeForStrings(int numStrings) { + return 4 + 2 + (getNumberOfInfoRecsForStrings(numStrings) * 8); + } + public short getSid() { return sid; diff --git a/src/java/org/apache/poi/hssf/record/SSTRecord.java b/src/java/org/apache/poi/hssf/record/SSTRecord.java index 32c3842abd..13b8205116 100644 --- a/src/java/org/apache/poi/hssf/record/SSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java @@ -586,10 +586,7 @@ public class SSTRecord */ public int calcExtSSTRecordSize() { - int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); - if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0) - infoRecs ++; - return 4 + 2 + (infoRecs * 8); + return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size()); } } diff --git a/src/java/org/apache/poi/hssf/record/SSTSerializer.java b/src/java/org/apache/poi/hssf/record/SSTSerializer.java index 905770b2ce..f4538bb704 100644 --- a/src/java/org/apache/poi/hssf/record/SSTSerializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTSerializer.java @@ -82,8 +82,6 @@ class SSTSerializer /** Offsets relative the start of the current SST or continue record */ int[] bucketRelativeOffsets; int startOfSST, startOfRecord; - /** The default bucket size (this is used for ExternSST) */ - final static int DEFAULT_BUCKET_SIZE = 8; public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings ) { @@ -93,9 +91,7 @@ class SSTSerializer this.numUniqueStrings = numUniqueStrings; this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); - int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); - if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0) - infoRecs ++; + int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size()); this.bucketAbsoluteOffsets = new int[infoRecs]; this.bucketRelativeOffsets = new int[infoRecs]; } @@ -157,10 +153,14 @@ class SSTSerializer for ( int k = 0; k < strings.size(); k++ ) { - if (k % DEFAULT_BUCKET_SIZE == 0) + if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0) { - bucketAbsoluteOffsets[k / DEFAULT_BUCKET_SIZE] = pos; - bucketRelativeOffsets[k / DEFAULT_BUCKET_SIZE] = pos; + int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE; + if (index < ExtSSTRecord.MAX_BUCKETS) { + //Excel only indexes the first 128 buckets. + bucketAbsoluteOffsets[index] = pos; + bucketRelativeOffsets[index] = pos; + } } System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() ); pos += getUnicodeString( k ).getRecordSize(); @@ -210,10 +210,15 @@ class SSTSerializer { UnicodeString unistr = getUnicodeString( stringIndex ); - if (stringIndex % DEFAULT_BUCKET_SIZE == 0) + if (stringIndex % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0) { - bucketAbsoluteOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfSST; - bucketRelativeOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfRecord; + int index = stringIndex / ExtSSTRecord.DEFAULT_BUCKET_SIZE; + if (index < ExtSSTRecord.MAX_BUCKETS) { + bucketAbsoluteOffsets[index] = offset + totalWritten + + recordProcessor.getRecordOffset() - startOfSST; + bucketRelativeOffsets[index] = offset + totalWritten + + recordProcessor.getRecordOffset() - startOfRecord; + } } if ( unistr.getRecordSize() <= recordProcessor.getAvailable() )