ExtSST serialization pacth to fix corruption when there are a large number of strings in the SST record.

It seems that only 128 buckets can be serialized. This patch addresses this excel oddity.


git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353371 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Height 2003-09-25 07:18:08 +00:00
parent 9c52dae1c2
commit 67950b1584
4 changed files with 41 additions and 17 deletions

View File

@ -74,6 +74,7 @@ import org.apache.poi.util.LittleEndian;
public class ExtSSTInfoSubRecord public class ExtSSTInfoSubRecord
extends Record extends Record
{ {
public static final int INFO_SIZE = 8;
public final static short sid = public final static short sid =
0xFFF; // only here for conformance, doesn't really have an sid 0xFFF; // only here for conformance, doesn't really have an sid
private int field_1_stream_pos; // stream pointer to the SST record private int field_1_stream_pos; // stream pointer to the SST record

View File

@ -75,7 +75,10 @@ import java.util.ArrayList;
public class ExtSSTRecord public class ExtSSTRecord
extends Record extends Record
{ {
private static final int DEFAULT_BUCKET_SIZE = 8; public static final int DEFAULT_BUCKET_SIZE = 8;
//Cant seem to find this documented but from the biffviewer it is clear that
//Excel only records the indexes for the first 128 buckets.
public static final int MAX_BUCKETS = 128;
public final static short sid = 0xff; public final static short sid = 0xff;
private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE; private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE;
private ArrayList field_2_sst_info; private ArrayList field_2_sst_info;
@ -197,17 +200,35 @@ public class ExtSSTRecord
for (int k = 0; k < getNumInfoRecords(); k++) for (int k = 0; k < getNumInfoRecords(); k++)
{ {
ExtSSTInfoSubRecord rec = getInfoRecordAt(k); ExtSSTInfoSubRecord rec = getInfoRecordAt(k);
pos += rec.serialize(pos + offset, data); int length = rec.serialize(pos + offset, data);
pos += length;
} }
return pos; return pos;
} }
/** Returns the size of this record */
public int getRecordSize() public int getRecordSize()
{ {
return 6+8*getNumInfoRecords(); return 6+8*getNumInfoRecords();
} }
public static final int getNumberOfInfoRecsForStrings(int numStrings) {
int infoRecs = (numStrings / DEFAULT_BUCKET_SIZE);
if ((numStrings % DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
//Excel seems to max out after 128 info records.
//This isnt really documented anywhere...
if (infoRecs > MAX_BUCKETS)
infoRecs = MAX_BUCKETS;
return infoRecs;
}
/** Given a number of strings (in the sst), returns the size of the extsst record*/
public static final int getRecordSizeForStrings(int numStrings) {
return 4 + 2 + (getNumberOfInfoRecsForStrings(numStrings) * 8);
}
public short getSid() public short getSid()
{ {
return sid; return sid;

View File

@ -586,10 +586,7 @@ public class SSTRecord
*/ */
public int calcExtSSTRecordSize() public int calcExtSSTRecordSize()
{ {
int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
return 4 + 2 + (infoRecs * 8);
} }
} }

View File

@ -82,8 +82,6 @@ class SSTSerializer
/** Offsets relative the start of the current SST or continue record */ /** Offsets relative the start of the current SST or continue record */
int[] bucketRelativeOffsets; int[] bucketRelativeOffsets;
int startOfSST, startOfRecord; int startOfSST, startOfRecord;
/** The default bucket size (this is used for ExternSST) */
final static int DEFAULT_BUCKET_SIZE = 8;
public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings ) public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings )
{ {
@ -93,9 +91,7 @@ class SSTSerializer
this.numUniqueStrings = numUniqueStrings; this.numUniqueStrings = numUniqueStrings;
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings );
int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE); int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size());
if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
this.bucketAbsoluteOffsets = new int[infoRecs]; this.bucketAbsoluteOffsets = new int[infoRecs];
this.bucketRelativeOffsets = new int[infoRecs]; this.bucketRelativeOffsets = new int[infoRecs];
} }
@ -157,10 +153,14 @@ class SSTSerializer
for ( int k = 0; k < strings.size(); k++ ) for ( int k = 0; k < strings.size(); k++ )
{ {
if (k % DEFAULT_BUCKET_SIZE == 0) if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
{ {
bucketAbsoluteOffsets[k / DEFAULT_BUCKET_SIZE] = pos; int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE;
bucketRelativeOffsets[k / DEFAULT_BUCKET_SIZE] = pos; if (index < ExtSSTRecord.MAX_BUCKETS) {
//Excel only indexes the first 128 buckets.
bucketAbsoluteOffsets[index] = pos;
bucketRelativeOffsets[index] = pos;
}
} }
System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() ); System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() );
pos += getUnicodeString( k ).getRecordSize(); pos += getUnicodeString( k ).getRecordSize();
@ -210,10 +210,15 @@ class SSTSerializer
{ {
UnicodeString unistr = getUnicodeString( stringIndex ); UnicodeString unistr = getUnicodeString( stringIndex );
if (stringIndex % DEFAULT_BUCKET_SIZE == 0) if (stringIndex % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
{ {
bucketAbsoluteOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfSST; int index = stringIndex / ExtSSTRecord.DEFAULT_BUCKET_SIZE;
bucketRelativeOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfRecord; if (index < ExtSSTRecord.MAX_BUCKETS) {
bucketAbsoluteOffsets[index] = offset + totalWritten +
recordProcessor.getRecordOffset() - startOfSST;
bucketRelativeOffsets[index] = offset + totalWritten +
recordProcessor.getRecordOffset() - startOfRecord;
}
} }
if ( unistr.getRecordSize() <= recordProcessor.getAvailable() ) if ( unistr.getRecordSize() <= recordProcessor.getAvailable() )