mirror of https://github.com/apache/poi.git
ExtSST serialization pacth to fix corruption when there are a large number of strings in the SST record.
It seems that only 128 buckets can be serialized. This patch addresses this excel oddity. git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353371 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9c52dae1c2
commit
67950b1584
|
@ -74,6 +74,7 @@ import org.apache.poi.util.LittleEndian;
|
|||
public class ExtSSTInfoSubRecord
|
||||
extends Record
|
||||
{
|
||||
public static final int INFO_SIZE = 8;
|
||||
public final static short sid =
|
||||
0xFFF; // only here for conformance, doesn't really have an sid
|
||||
private int field_1_stream_pos; // stream pointer to the SST record
|
||||
|
|
|
@ -75,7 +75,10 @@ import java.util.ArrayList;
|
|||
public class ExtSSTRecord
|
||||
extends Record
|
||||
{
|
||||
private static final int DEFAULT_BUCKET_SIZE = 8;
|
||||
public static final int DEFAULT_BUCKET_SIZE = 8;
|
||||
//Cant seem to find this documented but from the biffviewer it is clear that
|
||||
//Excel only records the indexes for the first 128 buckets.
|
||||
public static final int MAX_BUCKETS = 128;
|
||||
public final static short sid = 0xff;
|
||||
private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE;
|
||||
private ArrayList field_2_sst_info;
|
||||
|
@ -197,17 +200,35 @@ public class ExtSSTRecord
|
|||
for (int k = 0; k < getNumInfoRecords(); k++)
|
||||
{
|
||||
ExtSSTInfoSubRecord rec = getInfoRecordAt(k);
|
||||
pos += rec.serialize(pos + offset, data);
|
||||
int length = rec.serialize(pos + offset, data);
|
||||
pos += length;
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
/** Returns the size of this record */
|
||||
public int getRecordSize()
|
||||
{
|
||||
return 6+8*getNumInfoRecords();
|
||||
}
|
||||
|
||||
public static final int getNumberOfInfoRecsForStrings(int numStrings) {
|
||||
int infoRecs = (numStrings / DEFAULT_BUCKET_SIZE);
|
||||
if ((numStrings % DEFAULT_BUCKET_SIZE) != 0)
|
||||
infoRecs ++;
|
||||
//Excel seems to max out after 128 info records.
|
||||
//This isnt really documented anywhere...
|
||||
if (infoRecs > MAX_BUCKETS)
|
||||
infoRecs = MAX_BUCKETS;
|
||||
return infoRecs;
|
||||
}
|
||||
|
||||
/** Given a number of strings (in the sst), returns the size of the extsst record*/
|
||||
public static final int getRecordSizeForStrings(int numStrings) {
|
||||
return 4 + 2 + (getNumberOfInfoRecsForStrings(numStrings) * 8);
|
||||
}
|
||||
|
||||
public short getSid()
|
||||
{
|
||||
return sid;
|
||||
|
|
|
@ -586,10 +586,7 @@ public class SSTRecord
|
|||
*/
|
||||
public int calcExtSSTRecordSize()
|
||||
{
|
||||
int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE);
|
||||
if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
|
||||
infoRecs ++;
|
||||
return 4 + 2 + (infoRecs * 8);
|
||||
return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -82,8 +82,6 @@ class SSTSerializer
|
|||
/** Offsets relative the start of the current SST or continue record */
|
||||
int[] bucketRelativeOffsets;
|
||||
int startOfSST, startOfRecord;
|
||||
/** The default bucket size (this is used for ExternSST) */
|
||||
final static int DEFAULT_BUCKET_SIZE = 8;
|
||||
|
||||
public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings )
|
||||
{
|
||||
|
@ -93,9 +91,7 @@ class SSTSerializer
|
|||
this.numUniqueStrings = numUniqueStrings;
|
||||
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings );
|
||||
|
||||
int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE);
|
||||
if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
|
||||
infoRecs ++;
|
||||
int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size());
|
||||
this.bucketAbsoluteOffsets = new int[infoRecs];
|
||||
this.bucketRelativeOffsets = new int[infoRecs];
|
||||
}
|
||||
|
@ -157,10 +153,14 @@ class SSTSerializer
|
|||
|
||||
for ( int k = 0; k < strings.size(); k++ )
|
||||
{
|
||||
if (k % DEFAULT_BUCKET_SIZE == 0)
|
||||
if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
|
||||
{
|
||||
bucketAbsoluteOffsets[k / DEFAULT_BUCKET_SIZE] = pos;
|
||||
bucketRelativeOffsets[k / DEFAULT_BUCKET_SIZE] = pos;
|
||||
int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE;
|
||||
if (index < ExtSSTRecord.MAX_BUCKETS) {
|
||||
//Excel only indexes the first 128 buckets.
|
||||
bucketAbsoluteOffsets[index] = pos;
|
||||
bucketRelativeOffsets[index] = pos;
|
||||
}
|
||||
}
|
||||
System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() );
|
||||
pos += getUnicodeString( k ).getRecordSize();
|
||||
|
@ -210,10 +210,15 @@ class SSTSerializer
|
|||
{
|
||||
UnicodeString unistr = getUnicodeString( stringIndex );
|
||||
|
||||
if (stringIndex % DEFAULT_BUCKET_SIZE == 0)
|
||||
if (stringIndex % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
|
||||
{
|
||||
bucketAbsoluteOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfSST;
|
||||
bucketRelativeOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfRecord;
|
||||
int index = stringIndex / ExtSSTRecord.DEFAULT_BUCKET_SIZE;
|
||||
if (index < ExtSSTRecord.MAX_BUCKETS) {
|
||||
bucketAbsoluteOffsets[index] = offset + totalWritten +
|
||||
recordProcessor.getRecordOffset() - startOfSST;
|
||||
bucketRelativeOffsets[index] = offset + totalWritten +
|
||||
recordProcessor.getRecordOffset() - startOfRecord;
|
||||
}
|
||||
}
|
||||
|
||||
if ( unistr.getRecordSize() <= recordProcessor.getAvailable() )
|
||||
|
|
Loading…
Reference in New Issue