mirror of https://github.com/apache/poi.git
ExtSST serialization pacth to fix corruption when there are a large number of strings in the SST record.
It seems that only 128 buckets can be serialized. This patch addresses this excel oddity. git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353371 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9c52dae1c2
commit
67950b1584
|
@ -74,6 +74,7 @@ import org.apache.poi.util.LittleEndian;
|
||||||
public class ExtSSTInfoSubRecord
|
public class ExtSSTInfoSubRecord
|
||||||
extends Record
|
extends Record
|
||||||
{
|
{
|
||||||
|
public static final int INFO_SIZE = 8;
|
||||||
public final static short sid =
|
public final static short sid =
|
||||||
0xFFF; // only here for conformance, doesn't really have an sid
|
0xFFF; // only here for conformance, doesn't really have an sid
|
||||||
private int field_1_stream_pos; // stream pointer to the SST record
|
private int field_1_stream_pos; // stream pointer to the SST record
|
||||||
|
|
|
@ -75,7 +75,10 @@ import java.util.ArrayList;
|
||||||
public class ExtSSTRecord
|
public class ExtSSTRecord
|
||||||
extends Record
|
extends Record
|
||||||
{
|
{
|
||||||
private static final int DEFAULT_BUCKET_SIZE = 8;
|
public static final int DEFAULT_BUCKET_SIZE = 8;
|
||||||
|
//Cant seem to find this documented but from the biffviewer it is clear that
|
||||||
|
//Excel only records the indexes for the first 128 buckets.
|
||||||
|
public static final int MAX_BUCKETS = 128;
|
||||||
public final static short sid = 0xff;
|
public final static short sid = 0xff;
|
||||||
private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE;
|
private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE;
|
||||||
private ArrayList field_2_sst_info;
|
private ArrayList field_2_sst_info;
|
||||||
|
@ -197,17 +200,35 @@ public class ExtSSTRecord
|
||||||
for (int k = 0; k < getNumInfoRecords(); k++)
|
for (int k = 0; k < getNumInfoRecords(); k++)
|
||||||
{
|
{
|
||||||
ExtSSTInfoSubRecord rec = getInfoRecordAt(k);
|
ExtSSTInfoSubRecord rec = getInfoRecordAt(k);
|
||||||
pos += rec.serialize(pos + offset, data);
|
int length = rec.serialize(pos + offset, data);
|
||||||
|
pos += length;
|
||||||
}
|
}
|
||||||
|
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns the size of this record */
|
||||||
public int getRecordSize()
|
public int getRecordSize()
|
||||||
{
|
{
|
||||||
return 6+8*getNumInfoRecords();
|
return 6+8*getNumInfoRecords();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final int getNumberOfInfoRecsForStrings(int numStrings) {
|
||||||
|
int infoRecs = (numStrings / DEFAULT_BUCKET_SIZE);
|
||||||
|
if ((numStrings % DEFAULT_BUCKET_SIZE) != 0)
|
||||||
|
infoRecs ++;
|
||||||
|
//Excel seems to max out after 128 info records.
|
||||||
|
//This isnt really documented anywhere...
|
||||||
|
if (infoRecs > MAX_BUCKETS)
|
||||||
|
infoRecs = MAX_BUCKETS;
|
||||||
|
return infoRecs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given a number of strings (in the sst), returns the size of the extsst record*/
|
||||||
|
public static final int getRecordSizeForStrings(int numStrings) {
|
||||||
|
return 4 + 2 + (getNumberOfInfoRecsForStrings(numStrings) * 8);
|
||||||
|
}
|
||||||
|
|
||||||
public short getSid()
|
public short getSid()
|
||||||
{
|
{
|
||||||
return sid;
|
return sid;
|
||||||
|
|
|
@ -586,10 +586,7 @@ public class SSTRecord
|
||||||
*/
|
*/
|
||||||
public int calcExtSSTRecordSize()
|
public int calcExtSSTRecordSize()
|
||||||
{
|
{
|
||||||
int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE);
|
return ExtSSTRecord.getRecordSizeForStrings(field_3_strings.size());
|
||||||
if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
|
|
||||||
infoRecs ++;
|
|
||||||
return 4 + 2 + (infoRecs * 8);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -82,8 +82,6 @@ class SSTSerializer
|
||||||
/** Offsets relative the start of the current SST or continue record */
|
/** Offsets relative the start of the current SST or continue record */
|
||||||
int[] bucketRelativeOffsets;
|
int[] bucketRelativeOffsets;
|
||||||
int startOfSST, startOfRecord;
|
int startOfSST, startOfRecord;
|
||||||
/** The default bucket size (this is used for ExternSST) */
|
|
||||||
final static int DEFAULT_BUCKET_SIZE = 8;
|
|
||||||
|
|
||||||
public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings )
|
public SSTSerializer( List recordLengths, BinaryTree strings, int numStrings, int numUniqueStrings )
|
||||||
{
|
{
|
||||||
|
@ -93,9 +91,7 @@ class SSTSerializer
|
||||||
this.numUniqueStrings = numUniqueStrings;
|
this.numUniqueStrings = numUniqueStrings;
|
||||||
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings );
|
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings );
|
||||||
|
|
||||||
int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE);
|
int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size());
|
||||||
if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
|
|
||||||
infoRecs ++;
|
|
||||||
this.bucketAbsoluteOffsets = new int[infoRecs];
|
this.bucketAbsoluteOffsets = new int[infoRecs];
|
||||||
this.bucketRelativeOffsets = new int[infoRecs];
|
this.bucketRelativeOffsets = new int[infoRecs];
|
||||||
}
|
}
|
||||||
|
@ -157,10 +153,14 @@ class SSTSerializer
|
||||||
|
|
||||||
for ( int k = 0; k < strings.size(); k++ )
|
for ( int k = 0; k < strings.size(); k++ )
|
||||||
{
|
{
|
||||||
if (k % DEFAULT_BUCKET_SIZE == 0)
|
if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
|
||||||
{
|
{
|
||||||
bucketAbsoluteOffsets[k / DEFAULT_BUCKET_SIZE] = pos;
|
int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE;
|
||||||
bucketRelativeOffsets[k / DEFAULT_BUCKET_SIZE] = pos;
|
if (index < ExtSSTRecord.MAX_BUCKETS) {
|
||||||
|
//Excel only indexes the first 128 buckets.
|
||||||
|
bucketAbsoluteOffsets[index] = pos;
|
||||||
|
bucketRelativeOffsets[index] = pos;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() );
|
System.arraycopy( getUnicodeString( k ).serialize(), 0, data, pos + offset, getUnicodeString( k ).getRecordSize() );
|
||||||
pos += getUnicodeString( k ).getRecordSize();
|
pos += getUnicodeString( k ).getRecordSize();
|
||||||
|
@ -210,10 +210,15 @@ class SSTSerializer
|
||||||
{
|
{
|
||||||
UnicodeString unistr = getUnicodeString( stringIndex );
|
UnicodeString unistr = getUnicodeString( stringIndex );
|
||||||
|
|
||||||
if (stringIndex % DEFAULT_BUCKET_SIZE == 0)
|
if (stringIndex % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0)
|
||||||
{
|
{
|
||||||
bucketAbsoluteOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfSST;
|
int index = stringIndex / ExtSSTRecord.DEFAULT_BUCKET_SIZE;
|
||||||
bucketRelativeOffsets[stringIndex / DEFAULT_BUCKET_SIZE] = offset + totalWritten + recordProcessor.getRecordOffset() - startOfRecord;
|
if (index < ExtSSTRecord.MAX_BUCKETS) {
|
||||||
|
bucketAbsoluteOffsets[index] = offset + totalWritten +
|
||||||
|
recordProcessor.getRecordOffset() - startOfSST;
|
||||||
|
bucketRelativeOffsets[index] = offset + totalWritten +
|
||||||
|
recordProcessor.getRecordOffset() - startOfRecord;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( unistr.getRecordSize() <= recordProcessor.getAvailable() )
|
if ( unistr.getRecordSize() <= recordProcessor.getAvailable() )
|
||||||
|
|
Loading…
Reference in New Issue