Fixed ExtSST serialization (length not calculated correctly)

Implemented DBCellRecord and IndexRecord serialization

Can now import into MS Access. Cool! Both of the above fixes were required to make this work.


git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353356 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Height 2003-09-18 02:10:50 +00:00
parent 9aebc04aef
commit 5b82dc6574
10 changed files with 290 additions and 242 deletions

View File

@ -288,6 +288,15 @@ public class Sheet implements Model
{
retval.windowTwo = (WindowTwoRecord) rec;
}
else if ( rec.getSid() == DBCellRecord.sid )
{
rec = null;
}
else if ( rec.getSid() == IndexRecord.sid )
{
rec = null;
}
if (rec != null)
{
@ -700,49 +709,6 @@ public class Sheet implements Model
return preoffset;
}
/**
* Serializes all records in the sheet into one big byte array. Use this to write
* the sheet out.
*
* @return byte[] array containing the binary representation of the records in this sheet
*
*/
public byte [] serialize()
{
log.log(log.DEBUG, "Sheet.serialize");
// addDBCellRecords();
byte[] retval = null;
// ArrayList bytes = new ArrayList(4096);
int arraysize = getSize();
int pos = 0;
// for (int k = 0; k < records.size(); k++)
// {
// bytes.add((( Record ) records.get(k)).serialize());
//
// }
// for (int k = 0; k < bytes.size(); k++)
// {
// arraysize += (( byte [] ) bytes.get(k)).length;
// log.debug((new StringBuffer("arraysize=")).append(arraysize)
// .toString());
// }
retval = new byte[ arraysize ];
for (int k = 0; k < records.size(); k++)
{
// byte[] rec = (( byte [] ) bytes.get(k));
// System.arraycopy(rec, 0, retval, pos, rec.length);
pos += (( Record ) records.get(k)).serialize(pos,
retval); // rec.length;
}
log.log(log.DEBUG, "Sheet.serialize returning " + retval);
return retval;
}
/**
* Serializes all records in the sheet into one big byte array. Use this to write
* the sheet out.
@ -756,40 +722,69 @@ public class Sheet implements Model
{
log.log(log.DEBUG, "Sheet.serialize using offsets");
// addDBCellRecords();
// ArrayList bytes = new ArrayList(4096);
// int arraysize = getSize(); // 0;
int pos = 0;
// for (int k = 0; k < records.size(); k++)
// {
// bytes.add((( Record ) records.get(k)).serialize());
//
// }
// for (int k = 0; k < bytes.size(); k++)
// {
// arraysize += (( byte [] ) bytes.get(k)).length;
// log.debug((new StringBuffer("arraysize=")).append(arraysize)
// .toString());
// }
int pos = offset;
boolean haveSerializedIndex = false;
for (int k = 0; k < records.size(); k++)
{
// byte[] rec = (( byte [] ) bytes.get(k));
// System.arraycopy(rec, 0, data, offset + pos, rec.length);
Record record = (( Record ) records.get(k));
int startPos = pos;
//Once the rows have been found in the list of records, start
//writing out the blocked row information. This includes the DBCell references
if (record instanceof RowRecordsAggregate) {
pos += ((RowRecordsAggregate)record).serialize(pos, data, cells); // rec.length;
} else if (record instanceof ValueRecordsAggregate) {
//Do nothing here. The records were serialized during the RowRecordAggregate block serialization
} else {
pos += record.serialize(pos, data ); // rec.length;
}
//uncomment to test record sizes
// byte[] data2 = new byte[record.getRecordSize()];
// record.serialize(0, data2 ); // rec.length;
// if (LittleEndian.getUShort(data2, 2) != record.getRecordSize() - 4
// && record instanceof RowRecordsAggregate == false && record instanceof ValueRecordsAggregate == false)
// throw new RuntimeException("Blah!!!");
pos += record.serialize(pos + offset, data ); // rec.length;
//If the BOF record was just serialized then add the IndexRecord
if (record.getSid() == BOFRecord.sid) {
//Can there be more than one BOF for a sheet? If not then we can
//remove this guard. So be safe it is left here.
if (!haveSerializedIndex) {
haveSerializedIndex = true;
pos += serializeIndexRecord(k, pos, data);
}
}
}
log.log(log.DEBUG, "Sheet.serialize returning ");
return pos;
return pos-offset;
}
private int serializeIndexRecord(final int BOFRecordIndex, final int offset, byte[] data) {
IndexRecord index = new IndexRecord();
index.setFirstRow(rows.getFirstRowNum());
index.setLastRowAdd1(rows.getLastRowNum()+1);
//Calculate the size of the records from the end of the BOF
//and up to the RowRecordsAggregate...
int sheetRecSize = 0;
for (int j = BOFRecordIndex+1; j < records.size(); j++)
{
Record tmpRec = (( Record ) records.get(j));
if (tmpRec instanceof RowRecordsAggregate)
break;
sheetRecSize+= tmpRec.getRecordSize();
}
//Add the references to the DBCells in the IndexRecord (one for each block)
int blockCount = rows.getRowBlockCount();
//Calculate the size of this IndexRecord
int indexRecSize = index.getRecordSizeForBlockCount(blockCount);
int rowBlockOffset = 0;
int cellBlockOffset = 0;
int dbCellOffset = 0;
for (int block=0;block<blockCount;block++) {
rowBlockOffset += rows.getRowBlockSize(block);
cellBlockOffset += cells.getRowCellBlockSize(rows.getStartRowNumberForBlock(block),
rows.getEndRowNumberForBlock(block));
//Note: The offsets are relative to the Workbook BOF. Assume that this is
//0 for now.....
index.addDbcell(offset + indexRecSize + sheetRecSize + dbCellOffset + rowBlockOffset + cellBlockOffset);
//Add space required to write the dbcell record(s) (whose references were just added).
dbCellOffset += (8 + (rows.getRowCountForBlock(block) * 2));
}
return index.serialize(offset, data);
}
/**
@ -1369,84 +1364,6 @@ public class Sheet implements Model
return this.cells.getRowCellIterator(row);
}
/**
* Not currently used method to calculate and add dbcell records
*
*/
public void addDBCellRecords()
{
int offset = 0;
int recnum = 0;
int rownum = 0;
//int lastrow = 0;
//long lastrowoffset = 0;
IndexRecord index = null;
// ArrayList rowOffsets = new ArrayList();
IntList rowOffsets = new IntList();
for (recnum = 0; recnum < records.size(); recnum++)
{
Record rec = ( Record ) records.get(recnum);
if (rec.getSid() == IndexRecord.sid)
{
index = ( IndexRecord ) rec;
}
if (rec.getSid() != RowRecord.sid)
{
offset += rec.serialize().length;
}
else
{
break;
}
}
// First Row Record
for (; recnum < records.size(); recnum++)
{
Record rec = ( Record ) records.get(recnum);
if (rec.getSid() == RowRecord.sid)
{
rownum++;
rowOffsets.add(offset);
if ((rownum % 32) == 0)
{
// if this is the last rec in a dbcell block
// find the next row or last value record
for (int rn = recnum; rn < records.size(); rn++)
{
rec = ( Record ) records.get(rn);
if ((!rec.isInValueSection())
|| (rec.getSid() == RowRecord.sid))
{
// here is the next row or last value record
records.add(rn,
createDBCell(offset, rowOffsets,
index));
recnum = rn;
break;
}
}
}
else
{
}
}
if (!rec.isInValueSection())
{
records.add(recnum, createDBCell(offset, rowOffsets, index));
break;
}
offset += rec.serialize().length;
}
}
public int getFirstRow() {
return rows.getFirstRowNum();
}
@ -1455,41 +1372,6 @@ public class Sheet implements Model
return rows.getLastRowNum();
}
/** not currently used */
private DBCellRecord createDBCell(int offset, IntList rowoffsets,
IndexRecord index)
{
DBCellRecord rec = new DBCellRecord();
rec.setRowOffset(offset - rowoffsets.get(0));
// test hack
rec.addCellOffset(( short ) 0x0);
// end test hack
addDbCellToIndex(offset, index);
return rec;
}
/** not currently used */
private void addDbCellToIndex(int offset, IndexRecord index)
{
int numdbcells = index.getNumDbcells() + 1;
index.addDbcell(offset + preoffset);
// stupid but whenever we add an offset that causes everything to be shifted down 4
for (int k = 0; k < numdbcells; k++)
{
int dbval = index.getDbcellAt(k);
index.setDbcell(k, dbval + 4);
}
}
/**
* creates the BOF record
* @see org.apache.poi.hssf.record.BOFRecord
@ -2238,6 +2120,21 @@ public class Sheet implements Model
{
retval += (( Record ) records.get(k)).getRecordSize();
}
//Add space for the IndexRecord
final int blocks = rows.getRowBlockCount();
retval += IndexRecord.getRecordSizeForBlockCount(blocks);
//Add space for the DBCell records
//Once DBCell per block.
//8 bytes per DBCell (non variable section)
//2 bytes per row reference
int startRetVal = retval;
retval += (8 * blocks);
for (Iterator itr = rows.getIterator(); itr.hasNext();) {
RowRecord row = (RowRecord)itr.next();
if (cells.rowHasCells(row.getRowNumber()))
retval += 2;
}
return retval;
}

View File

@ -58,16 +58,18 @@ package org.apache.poi.hssf.record;
import org.apache.poi.util.LittleEndian;
/**
* Title: DBCell Record (Currently read only. Not required.)
* Description: Used to find rows in blocks...TODO<P>
* Title: DBCell Record
* Description: Used by Excel and other MS apps to quickly find rows in the sheets.<P>
* REFERENCE: PG 299/440 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P>
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Jason Height
* @version 2.0-pre
*/
public class DBCellRecord
extends Record
{
public final static int BLOCK_SIZE = 32;
public final static short sid = 0xd7;
private int field_1_row_offset;
private short[] field_2_cell_offsets;
@ -217,7 +219,7 @@ public class DBCellRecord
LittleEndian.putInt(data, 4 + offset, getRowOffset());
for (int k = 0; k < getNumCellOffsets(); k++)
{
LittleEndian.putShort(data, 8 + k + offset, getCellOffsetAt(k));
LittleEndian.putShort(data, 8 + 2*k + offset, getCellOffsetAt(k));
}
return getRecordSize();
}
@ -227,6 +229,11 @@ public class DBCellRecord
return 8 + (getNumCellOffsets() * 2);
}
/** Returns the size of a DBCellRecord when it needs to reference a certain number of rows*/
public static int getRecordSizeForRows(int rows) {
return 8 + (rows * 2);
}
public short getSid()
{
return this.sid;

View File

@ -65,8 +65,8 @@ import org.apache.poi.util.LittleEndian;
/**
* Extended SST table info subrecord<P>
* contains the elements of "info" in the SST's array field<P>
* WE HAVE VERY LITTLE INFORMATION ON HOW TO IMPLEMENT THIS RECORD! (EXTSSST)<P>
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Jason Height
* @version 2.0-pre
* @see org.apache.poi.hssf.record.ExtSSTRecord
*/

View File

@ -61,13 +61,13 @@ import java.util.ArrayList;
/**
* Title: Extended Static String Table<P>
* Description: I really don't understand this thing... its supposed to be "a hash
* table for optimizing external copy operations" --
*<P>
* This sounds like a job for Marc "BitMaster" Johnson aka the
* "Hawaiian Master Chef".<P>
* Description: This record is used for a quick lookup into the SST record. This
* record breaks the SST table into a set of buckets. The offsets
* to these buckets within the SST record are kept as well as the
* position relative to the start of the SST record.
* REFERENCE: PG 313 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P>
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Jason Height
* @version 2.0-pre
* @see org.apache.poi.hssf.record.ExtSSTInfoSubRecord
*/
@ -75,8 +75,9 @@ import java.util.ArrayList;
public class ExtSSTRecord
extends Record
{
private static final int DEFAULT_BUCKET_SIZE = 8;
public final static short sid = 0xff;
private short field_1_strings_per_bucket;
private short field_1_strings_per_bucket = DEFAULT_BUCKET_SIZE;
private ArrayList field_2_sst_info;
@ -120,12 +121,11 @@ public class ExtSSTRecord
}
}
// this probably doesn't work but we don't really care at this point
protected void fillFields(byte [] data, short size, int offset)
{
field_2_sst_info = new ArrayList();
field_1_strings_per_bucket = LittleEndian.getShort(data, 0 + offset);
for (int k = 2; k < ((data.length - offset) - size); k += 8)
for (int k = 2; k < (size-offset); k += 8)
{
byte[] tempdata = new byte[ 8 + offset ];
@ -196,16 +196,16 @@ public class ExtSSTRecord
for (int k = 0; k < getNumInfoRecords(); k++)
{
System.arraycopy(getInfoRecordAt(k).serialize(), 0, data,
pos + offset, 8);
pos += getInfoRecordAt(k).getRecordSize();
ExtSSTInfoSubRecord rec = getInfoRecordAt(k);
pos += rec.serialize(pos + offset, data);
}
return getRecordSize();
return pos;
}
public int getRecordSize()
{
return 4 + 2 + field_2_sst_info.size() * 8;
return 6+8*getNumInfoRecords();
}
public short getSid()

View File

@ -222,6 +222,13 @@ public class IndexRecord
return 20 + (getNumDbcells() * 4);
}
/** Returns the size of an INdexRecord when it needs to index the specified number of blocks
*
*/
public static int getRecordSizeForBlockCount(int blockCount) {
return 20 + (4 * blockCount);
}
public short getSid()
{
return this.sid;

View File

@ -249,7 +249,6 @@ class SSTDeserializer
*/
static public void addToStringTable( BinaryTree strings, Integer integer, UnicodeString string )
{
if ( string.isRichText() )
string.setOptionFlags( (byte) ( string.getOptionFlags() & ( ~8 ) ) );
if ( string.isExtendedText() )

View File

@ -586,7 +586,10 @@ public class SSTRecord
*/
public int calcExtSSTRecordSize()
{
return 4 + 2 + ((field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE) + 1) * 8;
int infoRecs = (field_3_strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE);
if ((field_3_strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
return 4 + 2 + (infoRecs * 8);
}
}

View File

@ -93,8 +93,11 @@ class SSTSerializer
this.numUniqueStrings = numUniqueStrings;
this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings );
this.bucketAbsoluteOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1];
this.bucketRelativeOffsets = new int[strings.size()/DEFAULT_BUCKET_SIZE+1];
int infoRecs = (strings.size() / SSTSerializer.DEFAULT_BUCKET_SIZE);
if ((strings.size() % SSTSerializer.DEFAULT_BUCKET_SIZE) != 0)
infoRecs ++;
this.bucketAbsoluteOffsets = new int[infoRecs];
this.bucketRelativeOffsets = new int[infoRecs];
}
/**

View File

@ -57,6 +57,7 @@ package org.apache.poi.hssf.record.aggregates;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RowRecord;
import org.apache.poi.hssf.record.DBCellRecord;
import org.apache.poi.hssf.record.UnknownRecord;
import java.util.Map;
@ -169,6 +170,86 @@ public class RowRecordsAggregate
return k;
}
/** Returns the number of row blocks.
* <p/>The row blocks are goupings of rows that contain the DBCell record
* after them
*/
public int getRowBlockCount() {
int size = records.size()/DBCellRecord.BLOCK_SIZE;
if ((records.size() % DBCellRecord.BLOCK_SIZE) != 0)
size++;
return size;
}
public int getRowBlockSize(int block) {
return 20 * getRowCountForBlock(block);
}
/** Returns the number of physical rows within a block*/
public int getRowCountForBlock(int block) {
int startIndex = block * DBCellRecord.BLOCK_SIZE;
int endIndex = startIndex + DBCellRecord.BLOCK_SIZE - 1;
if (endIndex >= records.size())
endIndex = records.size()-1;
return endIndex-startIndex+1;
}
/** Returns the physical row number of the first row in a block*/
public int getStartRowNumberForBlock(int block) {
//JMH Damn! I would like to directly index a record in the map rather than
//iterating through it.
int startIndex = block * DBCellRecord.BLOCK_SIZE;
Iterator rowIter = records.values().iterator();
RowRecord row = null;
//Position the iterator at the start of the block
for (int i=0; i<=startIndex;i++) {
row = (RowRecord)rowIter.next();
}
return row.getRowNumber();
}
/** Returns the physical row number of the end row in a block*/
public int getEndRowNumberForBlock(int block) {
//JMH Damn! I would like to directly index a record in the map rather than
//iterating through it.
int endIndex = ((block + 1)*DBCellRecord.BLOCK_SIZE)-1;
if (endIndex >= records.size())
endIndex = records.size()-1;
Iterator rowIter = records.values().iterator();
RowRecord row = null;
for (int i=0; i<=endIndex;i++) {
row = (RowRecord)rowIter.next();
}
return row.getRowNumber();
}
/** Serializes a block of the rows */
private int serializeRowBlock(final int block, final int offset, byte[] data) {
final int startIndex = block*DBCellRecord.BLOCK_SIZE;
final int endIndex = startIndex + DBCellRecord.BLOCK_SIZE;
Iterator rowIterator = records.values().iterator();
int pos = offset;
//JMH TBD create an iterator that can start at a specific index.
int i=0;
for (;i<startIndex;i++)
rowIterator.next();
while(rowIterator.hasNext() && (i++ < endIndex)) {
RowRecord row = (RowRecord)rowIterator.next();
pos += row.serialize(pos, data);
}
return pos - offset;
}
public int serialize(int offset, byte [] data) {
throw new RuntimeException("The serialize method that passes in cells should be used");
}
/**
* called by the class that is responsible for writing this sucker.
* Subclasses should implement this so that their data is passed back in a
@ -179,14 +260,38 @@ public class RowRecordsAggregate
* @return number of bytes written
*/
public int serialize(int offset, byte [] data)
public int serialize(int offset, byte [] data, ValueRecordsAggregate cells)
{
Iterator itr = records.values().iterator();
int pos = offset;
while (itr.hasNext())
{
pos += (( Record ) itr.next()).serialize(pos, data);
//DBCells are serialized before row records.
final int blockCount = getRowBlockCount();
for (int block=0;block<blockCount;block++) {
//Serialize a block of rows.
//Hold onto the position of the first row in the block
final int rowStartPos = pos;
//Hold onto the size of this block that was serialized
final int rowBlockSize = serializeRowBlock(block, pos, data);
pos += rowBlockSize;
//Serialize a block of cells for those rows
final int startRowNumber = getStartRowNumberForBlock(block);
final int endRowNumber = getEndRowNumberForBlock(block);
DBCellRecord cellRecord = new DBCellRecord();
//Note: Cell references start from the second row...
int cellRefOffset = (rowBlockSize-20);
for (int row=startRowNumber;row<=endRowNumber;row++) {
if (cells.rowHasCells(row)) {
final int rowCellSize = cells.serializeCellRow(row, pos, data);
pos += rowCellSize;
//Add the offset to the first cell for the row into the DBCellRecord.
cellRecord.addCellOffset((short)cellRefOffset);
cellRefOffset = rowCellSize;
}
}
//Calculate Offset from the start of a DBCellRecord to the first Row
cellRecord.setRowOffset(pos - rowStartPos);
pos += cellRecord.serialize(pos, data);
}
return pos - offset;
}

View File

@ -115,22 +115,7 @@ public class ValueRecordsAggregate
public int serialize(int offset, byte [] data)
{
//throw new RuntimeException("Not Implemented serialize");
int pos = offset;
Iterator irecs = getIterator();
while (irecs.hasNext()) {
pos += (( Record ) irecs.next()).serialize(pos,data);
}
/* Iterator itr = records.values().iterator();
int pos = offset;
while (itr.hasNext())
{
pos += (( Record ) itr.next()).serialize(pos, data);
}*/
return pos - offset;
throw new RuntimeException("This method shouldnt be called. ValueRecordsAggregate.serializeCellRow() should be called from RowRecordsAggregate.");
}
public ValueRecordsAggregate() {
@ -147,6 +132,42 @@ public class ValueRecordsAggregate
return new VRAIterator(this);
}
/** Tallies a count of the size of the cell records
* that are attached to the rows in the range specified.
*/
public int getRowCellBlockSize(int startRow, int endRow) {
Iterator cellRec = new VRAIterator(this, startRow, endRow);;
int size = 0;
while (cellRec.hasNext()) {
CellValueRecordInterface cell = (CellValueRecordInterface)cellRec.next();
int row = cell.getRow();
if ((row >=startRow) && (row <= endRow))
size += ((Record)cell).getRecordSize();
}
return size;
}
/** Returns true if the row has cells attached to it */
public boolean rowHasCells(int row) {
IntList ctRow = (IntList) celltype.get(row);
return ((ctRow != null) && (ctRow.size() > 0));
}
/** Serializes the cells that are allocated to a certain row range*/
public int serializeCellRow(final int row, int offset, byte [] data)
{
Iterator itr = new VRAIterator(this, row);
int pos = offset;
while (itr.hasNext())
{
CellValueRecordInterface cell = (CellValueRecordInterface)itr.next();
pos += (( Record ) cell).serialize(pos, data);
}
return pos - offset;
}
public int construct(int offset, List records)
{
@ -512,30 +533,33 @@ public class ValueRecordsAggregate
class VRAIterator implements Iterator {
private boolean hasNext;
private ValueRecordsAggregate vra;
int popindex;
int row;
int rowlimit;
int col;
private int popindex;
private int row;
private int rowlimit;
private int col;
CellValueRecordInterface current = null;
CellValueRecordInterface next = null;
public VRAIterator(ValueRecordsAggregate vra) {
this.vra = vra;
this.rowlimit = -1;
popindex = 0;
if (vra.getPhysicalNumberOfCells() > 0) {
hasNext = true;
next = findNextCell(null);
}
this(vra, 0, -1);
}
public VRAIterator(ValueRecordsAggregate vra, int row) {
this(vra);
rowlimit = row;
this.row = row;
this.popindex = vra.populatedRows.indexOf(row);
this(vra, row, row);
}
public VRAIterator(ValueRecordsAggregate vra, int startRow, int endRow) {
this.vra = vra;
this.row = startRow;
this.rowlimit = endRow;
this.popindex = vra.populatedRows.indexOf(row);
if (vra.getPhysicalNumberOfCells() > 0) {
next = findNextCell(null);
hasNext = (next != null);
}
}
public boolean hasNext() {
return hasNext;
}
@ -575,7 +599,7 @@ class VRAIterator implements Iterator {
rowNum = vra.populatedRows.get(popindex);
ctRow = (IntList)vra.celltype.get(rowNum);
if (ctRow.size() == 0) {
if (rowlimit == -1) {
if ((rowlimit == -1)||(rowNum<=rowlimit)) {
popindex++;
} else {
this.hasNext = false;
@ -592,8 +616,11 @@ class VRAIterator implements Iterator {
colNum = newCol;
if (colNum == -1) { //end of row, forward one row
popindex++;
if (popindex < vra.populatedRows.size() && rowlimit == -1) {
if (popindex < vra.populatedRows.size() && ((rowlimit == -1)||(rowNum<=rowlimit))) {
rowNum = vra.populatedRows.get(popindex);
//Return null if the row is out of range
if ((rowlimit != -1) &&( rowNum > rowlimit))
return null;
} else {
return null;
}