From 6ac844079be2347da6013fcc61bb154bd6f4a18e Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Thu, 6 Nov 2008 01:12:41 +0000 Subject: [PATCH] Introduced ContinuableRecord to help fix serialization of StringRecords with large data. Fixed TextObjectRecord to only write 16bit unicode when needed. Simplification in UnicodeString. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@711749 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/hssf/record/RecordFactory.java | 2 - .../org/apache/poi/hssf/record/SSTRecord.java | 89 +-- .../poi/hssf/record/SSTRecordHeader.java | 76 --- .../hssf/record/SSTRecordSizeCalculator.java | 51 -- .../apache/poi/hssf/record/SSTSerializer.java | 61 +- .../apache/poi/hssf/record/StringRecord.java | 122 +--- .../poi/hssf/record/TextObjectRecord.java | 128 +---- .../apache/poi/hssf/record/UnicodeString.java | 537 ++++-------------- .../hssf/record/cont/ContinuableRecord.java | 69 +++ .../record/cont/ContinuableRecordOutput.java | 257 +++++++++ .../cont/UnknownLengthRecordOutput.java | 114 ++++ .../poi/util/DelayableLittleEndianOutput.java | 34 ++ .../LittleEndianByteArrayOutputStream.java | 8 +- .../poi/hssf/record/TestRecordFactory.java | 49 +- .../record/TestSSTRecordSizeCalculator.java | 7 +- .../poi/hssf/record/TestStringRecord.java | 83 ++- .../hssf/record/TestTextObjectBaseRecord.java | 8 +- .../poi/hssf/record/TestTextObjectRecord.java | 66 ++- .../poi/hssf/record/TestUnicodeString.java | 25 +- 19 files changed, 827 insertions(+), 959 deletions(-) delete mode 100644 src/java/org/apache/poi/hssf/record/SSTRecordHeader.java delete mode 100644 src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java create mode 100644 src/java/org/apache/poi/hssf/record/cont/ContinuableRecord.java create mode 100644 src/java/org/apache/poi/hssf/record/cont/ContinuableRecordOutput.java create mode 100644 src/java/org/apache/poi/hssf/record/cont/UnknownLengthRecordOutput.java create mode 100644 src/java/org/apache/poi/util/DelayableLittleEndianOutput.java diff --git a/src/java/org/apache/poi/hssf/record/RecordFactory.java b/src/java/org/apache/poi/hssf/record/RecordFactory.java index e548029053..bae867e512 100644 --- a/src/java/org/apache/poi/hssf/record/RecordFactory.java +++ b/src/java/org/apache/poi/hssf/record/RecordFactory.java @@ -379,8 +379,6 @@ public final class RecordFactory { records.add(record); } else if (lastRecord instanceof DrawingGroupRecord) { ((DrawingGroupRecord)lastRecord).processContinueRecord(contRec.getData()); - } else if (lastRecord instanceof StringRecord) { - ((StringRecord)lastRecord).processContinueRecord(contRec.getData()); } else if (lastRecord instanceof UnknownRecord) { //Gracefully handle records that we don't know about, //that happen to be continued diff --git a/src/java/org/apache/poi/hssf/record/SSTRecord.java b/src/java/org/apache/poi/hssf/record/SSTRecord.java index 7c56d955ec..98bd075e34 100644 --- a/src/java/org/apache/poi/hssf/record/SSTRecord.java +++ b/src/java/org/apache/poi/hssf/record/SSTRecord.java @@ -17,14 +17,16 @@ package org.apache.poi.hssf.record; +import java.util.Iterator; + +import org.apache.poi.hssf.record.cont.ContinuableRecord; +import org.apache.poi.hssf.record.cont.ContinuableRecordOutput; import org.apache.poi.util.IntMapper; import org.apache.poi.util.LittleEndianConsts; -import java.util.Iterator; - /** - * Title: Static String Table Record - *

+ * Title: Static String Table Record (0x00FC)

+ * * Description: This holds all the strings for LabelSSTRecords. *

* REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN: @@ -37,27 +39,20 @@ import java.util.Iterator; * @see org.apache.poi.hssf.record.LabelSSTRecord * @see org.apache.poi.hssf.record.ContinueRecord */ -public final class SSTRecord extends Record { +public final class SSTRecord extends ContinuableRecord { public static final short sid = 0x00FC; - private static UnicodeString EMPTY_STRING = new UnicodeString(""); - - /** how big can an SST record be? As big as any record can be: 8228 bytes */ - static final int MAX_RECORD_SIZE = 8228; + private static final UnicodeString EMPTY_STRING = new UnicodeString(""); + // TODO - move these constants to test class (the only consumer) /** standard record overhead: two shorts (record id plus data space size)*/ - static final int STD_RECORD_OVERHEAD = - 2 * LittleEndianConsts.SHORT_SIZE; + static final int STD_RECORD_OVERHEAD = 2 * LittleEndianConsts.SHORT_SIZE; /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */ - static final int SST_RECORD_OVERHEAD = - ( STD_RECORD_OVERHEAD + ( 2 * LittleEndianConsts.INT_SIZE ) ); + static final int SST_RECORD_OVERHEAD = STD_RECORD_OVERHEAD + 2 * LittleEndianConsts.INT_SIZE; /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */ - static final int MAX_DATA_SPACE = MAX_RECORD_SIZE - SST_RECORD_OVERHEAD; - - /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */ - static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE + LittleEndianConsts.BYTE_SIZE; + static final int MAX_DATA_SPACE = RecordInputStream.MAX_RECORD_DATA_SIZE - 8; /** union of strings in the SST and EXTSST */ private int field_1_num_strings; @@ -133,37 +128,6 @@ public final class SSTRecord extends Record { return field_2_num_unique_strings; } - /** - * USE THIS METHOD AT YOUR OWN PERIL: THE addString - * METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR - * ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY - * WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS - * WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD - * - * @param count number of strings - * - */ - - public void setNumStrings( final int count ) - { - field_1_num_strings = count; - } - - /** - * USE THIS METHOD AT YOUR OWN PERIL: THE addString - * METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE - * EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT - * IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN - * THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ - * THE RECORD - * - * @param count number of strings - */ - - public void setNumUniqueStrings( final int count ) - { - field_2_num_unique_strings = count; - } /** * Get a particular string by its index @@ -178,11 +142,6 @@ public final class SSTRecord extends Record { return (UnicodeString) field_3_strings.get( id ); } - public boolean isString16bit( final int id ) - { - UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( id ) ); - return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 ); - } /** * Return a debugging string representation @@ -350,29 +309,11 @@ public final class SSTRecord extends Record { return field_3_strings.size(); } - /** - * called by the class that is responsible for writing this sucker. - * Subclasses should implement this so that their data is passed back in a - * byte array. - * - * @return size - */ - - public int serialize( int offset, byte[] data ) - { - SSTSerializer serializer = new SSTSerializer( - field_3_strings, getNumStrings(), getNumUniqueStrings() ); - int bytes = serializer.serialize( offset, data ); + protected void serialize(ContinuableRecordOutput out) { + SSTSerializer serializer = new SSTSerializer(field_3_strings, getNumStrings(), getNumUniqueStrings() ); + serializer.serialize(out); bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets(); bucketRelativeOffsets = serializer.getBucketRelativeOffsets(); - return bytes; - } - - - protected int getDataSize() { - SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(field_3_strings); - int recordSize = calculator.getRecordSize(); - return recordSize-4; } SSTDeserializer getDeserializer() diff --git a/src/java/org/apache/poi/hssf/record/SSTRecordHeader.java b/src/java/org/apache/poi/hssf/record/SSTRecordHeader.java deleted file mode 100644 index e5696111f8..0000000000 --- a/src/java/org/apache/poi/hssf/record/SSTRecordHeader.java +++ /dev/null @@ -1,76 +0,0 @@ - -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - - -package org.apache.poi.hssf.record; - -import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.LittleEndianConsts; - -/** - * Write out an SST header record. - * - * @author Glen Stampoultzis (glens at apache.org) - */ -class SSTRecordHeader -{ - int numStrings; - int numUniqueStrings; - - public SSTRecordHeader( int numStrings, int numUniqueStrings ) - { - this.numStrings = numStrings; - this.numUniqueStrings = numUniqueStrings; - } - - /** - * Writes out the SST record. This consists of the sid, the record size, the number of - * strings and the number of unique strings. - * - * @param data The data buffer to write the header to. - * @param bufferIndex The index into the data buffer where the header should be written. - * @param recSize The number of records written. - * - * @return The bufer of bytes modified. - */ - public int writeSSTHeader( UnicodeString.UnicodeRecordStats stats, byte[] data, int bufferIndex, int recSize ) - { - int offset = bufferIndex; - - LittleEndian.putShort( data, offset, SSTRecord.sid ); - offset += LittleEndianConsts.SHORT_SIZE; - stats.recordSize += LittleEndianConsts.SHORT_SIZE; - stats.remainingSize -= LittleEndianConsts.SHORT_SIZE; - //Delay writing the length - stats.lastLengthPos = offset; - offset += LittleEndianConsts.SHORT_SIZE; - stats.recordSize += LittleEndianConsts.SHORT_SIZE; - stats.remainingSize -= LittleEndianConsts.SHORT_SIZE; - LittleEndian.putInt( data, offset, numStrings ); - offset += LittleEndianConsts.INT_SIZE; - stats.recordSize += LittleEndianConsts.INT_SIZE; - stats.remainingSize -= LittleEndianConsts.INT_SIZE; - LittleEndian.putInt( data, offset, numUniqueStrings ); - offset += LittleEndianConsts.INT_SIZE; - stats.recordSize += LittleEndianConsts.INT_SIZE; - stats.remainingSize -= LittleEndianConsts.INT_SIZE; - - return offset - bufferIndex; - } - -} diff --git a/src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java b/src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java deleted file mode 100644 index c10c21d83d..0000000000 --- a/src/java/org/apache/poi/hssf/record/SSTRecordSizeCalculator.java +++ /dev/null @@ -1,51 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - - -package org.apache.poi.hssf.record; - -import org.apache.poi.util.IntMapper; - -/** - * Used to calculate the record sizes for a particular record. This kind of - * sucks because it's similar to the SST serialization code. In general - * the SST serialization code needs to be rewritten. - * - * @author Glen Stampoultzis (glens at apache.org) - * @author Jason Height (jheight at apache.org) - */ -class SSTRecordSizeCalculator -{ - private IntMapper strings; - - public SSTRecordSizeCalculator(IntMapper strings) - { - this.strings = strings; - } - - public int getRecordSize() { - UnicodeString.UnicodeRecordStats rs = new UnicodeString.UnicodeRecordStats(); - rs.remainingSize -= SSTRecord.SST_RECORD_OVERHEAD; - rs.recordSize += SSTRecord.SST_RECORD_OVERHEAD; - for (int i=0; i < strings.size(); i++ ) - { - UnicodeString unistr = ( (UnicodeString) strings.get(i)); - unistr.getRecordSize(rs); - } - return rs.recordSize; - } -} diff --git a/src/java/org/apache/poi/hssf/record/SSTSerializer.java b/src/java/org/apache/poi/hssf/record/SSTSerializer.java index 3f97fa3e3a..78844deb30 100644 --- a/src/java/org/apache/poi/hssf/record/SSTSerializer.java +++ b/src/java/org/apache/poi/hssf/record/SSTSerializer.java @@ -1,4 +1,3 @@ - /* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -15,12 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - package org.apache.poi.hssf.record; +import org.apache.poi.hssf.record.cont.ContinuableRecordOutput; import org.apache.poi.util.IntMapper; -import org.apache.poi.util.LittleEndian; /** * This class handles serialization of SST records. It utilizes the record processor @@ -28,71 +26,50 @@ import org.apache.poi.util.LittleEndian; * * @author Glen Stampoultzis (glens at apache.org) */ -class SSTSerializer -{ +final class SSTSerializer { - // todo: make private again - private IntMapper strings; + private final int _numStrings; + private final int _numUniqueStrings; - private SSTRecordHeader sstRecordHeader; + private final IntMapper strings; /** Offsets from the beginning of the SST record (even across continuations) */ - int[] bucketAbsoluteOffsets; + private final int[] bucketAbsoluteOffsets; /** Offsets relative the start of the current SST or continue record */ - int[] bucketRelativeOffsets; + private final int[] bucketRelativeOffsets; int startOfSST, startOfRecord; public SSTSerializer( IntMapper strings, int numStrings, int numUniqueStrings ) { this.strings = strings; - this.sstRecordHeader = new SSTRecordHeader( numStrings, numUniqueStrings ); + _numStrings = numStrings; + _numUniqueStrings = numUniqueStrings; int infoRecs = ExtSSTRecord.getNumberOfInfoRecsForStrings(strings.size()); this.bucketAbsoluteOffsets = new int[infoRecs]; this.bucketRelativeOffsets = new int[infoRecs]; } - /** - * Create a byte array consisting of an SST record and any - * required Continue records, ready to be written out. - *

- * If an SST record and any subsequent Continue records are read - * in to create this instance, this method should produce a byte - * array that is identical to the byte array produced by - * concatenating the input records' data. - * - * @return the byte array - */ - public int serialize(int offset, byte[] data ) - { - UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats(); - sstRecordHeader.writeSSTHeader( stats, data, 0 + offset, 0 ); - int pos = offset + SSTRecord.SST_RECORD_OVERHEAD; + public void serialize(ContinuableRecordOutput out) { + out.writeInt(_numStrings); + out.writeInt(_numUniqueStrings); for ( int k = 0; k < strings.size(); k++ ) { if (k % ExtSSTRecord.DEFAULT_BUCKET_SIZE == 0) { + int rOff = out.getTotalSize(); int index = k/ExtSSTRecord.DEFAULT_BUCKET_SIZE; if (index < ExtSSTRecord.MAX_BUCKETS) { //Excel only indexes the first 128 buckets. - bucketAbsoluteOffsets[index] = pos-offset; - bucketRelativeOffsets[index] = pos-offset; - } + bucketAbsoluteOffsets[index] = rOff; + bucketRelativeOffsets[index] = rOff; + } } UnicodeString s = getUnicodeString(k); - pos += s.serialize(stats, pos, data); - } - //Check to see if there is a hanging continue record length - if (stats.lastLengthPos != -1) { - short lastRecordLength = (short)(pos - stats.lastLengthPos-2); - if (lastRecordLength > 8224) - throw new InternalError(); - - LittleEndian.putShort(data, stats.lastLengthPos, lastRecordLength); - } - return pos - offset; - } + s.serialize(out); + } + } private UnicodeString getUnicodeString( int index ) diff --git a/src/java/org/apache/poi/hssf/record/StringRecord.java b/src/java/org/apache/poi/hssf/record/StringRecord.java index 620a07e093..45322bea4b 100644 --- a/src/java/org/apache/poi/hssf/record/StringRecord.java +++ b/src/java/org/apache/poi/hssf/record/StringRecord.java @@ -17,19 +17,23 @@ package org.apache.poi.hssf.record; -import org.apache.poi.util.LittleEndian; +import org.apache.poi.hssf.record.cont.ContinuableRecord; +import org.apache.poi.hssf.record.cont.ContinuableRecordOutput; import org.apache.poi.util.StringUtil; /** - * Supports the STRING record structure. (0x0207) + * STRING (0x0207)

+ * + * Stores the cached result of a text formula * * @author Glen Stampoultzis (glens at apache.org) */ -public class StringRecord extends Record { - public final static short sid = 0x0207; - private int field_1_string_length; - private byte field_2_unicode_flag; - private String field_3_string; +public final class StringRecord extends ContinuableRecord { + + public final static short sid = 0x0207; + + private boolean _is16bitUnicode; + private String _text; public StringRecord() @@ -39,77 +43,24 @@ public class StringRecord extends Record { /** * @param in the RecordInputstream to read the record from */ - public StringRecord( RecordInputStream in) - { - field_1_string_length = in.readShort(); - field_2_unicode_flag = in.readByte(); - byte[] data = in.readRemainder(); - //Why isn't this using the in.readString methods??? - if (isUnCompressedUnicode()) - { - field_3_string = StringUtil.getFromUnicodeLE(data, 0, field_1_string_length ); - } - else - { - field_3_string = StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length); + public StringRecord( RecordInputStream in) { + int field_1_string_length = in.readUShort(); + _is16bitUnicode = in.readByte() != 0x00; + + if (_is16bitUnicode){ + _text = in.readUnicodeLEString(field_1_string_length); + } else { + _text = in.readCompressedUnicode(field_1_string_length); } } - - public void processContinueRecord(byte[] data) { - if(isUnCompressedUnicode()) { - field_3_string += StringUtil.getFromUnicodeLE(data, 0, field_1_string_length - field_3_string.length()); - } else { - field_3_string += StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length - field_3_string.length()); - } + + + protected void serialize(ContinuableRecordOutput out) { + out.writeShort(_text.length()); + out.writeStringData(_text); } - private int getStringByteLength() - { - return isUnCompressedUnicode() ? field_1_string_length * 2 : field_1_string_length; - } - protected int getDataSize() { - return 2 + 1 + getStringByteLength(); - } - - /** - * is this uncompressed unicode (16bit)? Or just 8-bit compressed? - * @return isUnicode - True for 16bit- false for 8bit - */ - public boolean isUnCompressedUnicode() - { - return (field_2_unicode_flag == 1); - } - - /** - * called by the class that is responsible for writing this sucker. - * Subclasses should implement this so that their data is passed back in a - * byte array. - * - * @param offset to begin writing at - * @param data byte array containing instance data - * @return number of bytes written - */ - public int serialize( int offset, byte[] data ) - { - LittleEndian.putUShort(data, 0 + offset, sid); - LittleEndian.putUShort(data, 2 + offset, 3 + getStringByteLength()); - LittleEndian.putUShort(data, 4 + offset, field_1_string_length); - data[6 + offset] = field_2_unicode_flag; - if (isUnCompressedUnicode()) - { - StringUtil.putUnicodeLE(field_3_string, data, 7 + offset); - } - else - { - StringUtil.putCompressedUnicode(field_3_string, data, 7 + offset); - } - return getRecordSize(); - } - - /** - * return the non static version of the id for this record. - */ public short getSid() { return sid; @@ -120,26 +71,16 @@ public class StringRecord extends Record { */ public String getString() { - return field_3_string; + return _text; } - /** - * Sets whether the string is compressed or not - * @param unicode_flag 1 = uncompressed, 0 = compressed - */ - public void setCompressedFlag( byte unicode_flag ) - { - this.field_2_unicode_flag = unicode_flag; - } /** * Sets the string represented by this record. */ - public void setString( String string ) - { - this.field_1_string_length = string.length(); - this.field_3_string = string; - setCompressedFlag(StringUtil.hasMultibyte(string) ? (byte)1 : (byte)0); + public void setString(String string) { + _text = string; + _is16bitUnicode = StringUtil.hasMultibyte(string); } public String toString() @@ -148,16 +89,15 @@ public class StringRecord extends Record { buffer.append("[STRING]\n"); buffer.append(" .string = ") - .append(field_3_string).append("\n"); + .append(_text).append("\n"); buffer.append("[/STRING]\n"); return buffer.toString(); } public Object clone() { StringRecord rec = new StringRecord(); - rec.field_1_string_length = this.field_1_string_length; - rec.field_2_unicode_flag= this.field_2_unicode_flag; - rec.field_3_string = this.field_3_string; + rec._is16bitUnicode= _is16bitUnicode; + rec._text = _text; return rec; } } diff --git a/src/java/org/apache/poi/hssf/record/TextObjectRecord.java b/src/java/org/apache/poi/hssf/record/TextObjectRecord.java index 0a50e181b2..019aab09e8 100644 --- a/src/java/org/apache/poi/hssf/record/TextObjectRecord.java +++ b/src/java/org/apache/poi/hssf/record/TextObjectRecord.java @@ -17,16 +17,13 @@ package org.apache.poi.hssf.record; -import java.io.UnsupportedEncodingException; - +import org.apache.poi.hssf.record.cont.ContinuableRecord; +import org.apache.poi.hssf.record.cont.ContinuableRecordOutput; import org.apache.poi.hssf.record.formula.Ptg; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.util.BitField; import org.apache.poi.util.BitFieldFactory; import org.apache.poi.util.HexDump; -import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.LittleEndianByteArrayOutputStream; -import org.apache.poi.util.LittleEndianOutput; /** * The TXO record (0x01B6) is used to define the properties of a text box. It is @@ -36,7 +33,7 @@ import org.apache.poi.util.LittleEndianOutput; * * @author Glen Stampoultzis (glens at apache.org) */ -public final class TextObjectRecord extends Record { +public final class TextObjectRecord extends ContinuableRecord { public final static short sid = 0x01B6; private static final int FORMAT_RUN_ENCODED_SIZE = 8; // 2 shorts and 4 bytes reserved @@ -163,30 +160,7 @@ public final class TextObjectRecord extends Record { return sid; } - /** - * Only for the current record. does not include any subsequent Continue - * records - */ - private int getCurrentRecordDataSize() { - int result = 2 + 2 + 2 + 2 + 2 + 2 + 2 + 4; - if (_linkRefPtg != null) { - result += 2 // formula size - + 4 // unknownInt - +_linkRefPtg.getSize(); - if (_unknownPostFormulaByte != null) { - result += 1; - } - } - return result; - } - - private int serializeTXORecord(int offset, byte[] data) { - int dataSize = getCurrentRecordDataSize(); - int recSize = dataSize+4; - LittleEndianOutput out = new LittleEndianByteArrayOutputStream(data, offset, recSize); - - out.writeShort(TextObjectRecord.sid); - out.writeShort(dataSize); + private void serializeTXORecord(ContinuableRecordOutput out) { out.writeShort(field_1_options); out.writeShort(field_2_textOrientation); @@ -206,79 +180,23 @@ public final class TextObjectRecord extends Record { out.writeByte(_unknownPostFormulaByte.byteValue()); } } - return recSize; } - private int serializeTrailingRecords(int offset, byte[] data) { - byte[] textBytes; - try { - textBytes = _text.getString().getBytes("UTF-16LE"); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e.getMessage(), e); - } - int remainingLength = textBytes.length; - - int countTextBytesWritten = 0; - int pos = offset; - // (regardless what was read, we always serialize double-byte - // unicode characters (UTF-16LE). - Byte unicodeFlag = new Byte((byte)1); - while (remainingLength > 0) { - int chunkSize = Math.min(RecordInputStream.MAX_RECORD_DATA_SIZE - 2, remainingLength); - remainingLength -= chunkSize; - pos += ContinueRecord.write(data, pos, unicodeFlag, textBytes, countTextBytesWritten, chunkSize); - countTextBytesWritten += chunkSize; - } - - byte[] formatData = createFormatData(_text); - pos += ContinueRecord.write(data, pos, null, formatData); - return pos - offset; + private void serializeTrailingRecords(ContinuableRecordOutput out) { + out.writeContinue(); + out.writeStringData(_text.getString()); + out.writeContinue(); + writeFormatData(out, _text); } - private int getTrailingRecordsSize() { - if (_text.length() < 1) { - return 0; - } - int encodedTextSize = 0; - int textBytesLength = _text.length() * LittleEndian.SHORT_SIZE; - while (textBytesLength > 0) { - int chunkSize = Math.min(RecordInputStream.MAX_RECORD_DATA_SIZE - 2, textBytesLength); - textBytesLength -= chunkSize; + protected void serialize(ContinuableRecordOutput out) { - encodedTextSize += 4; // +4 for ContinueRecord sid+size - encodedTextSize += 1+chunkSize; // +1 for compressed unicode flag, - } - - int encodedFormatSize = (_text.numFormattingRuns() + 1) * FORMAT_RUN_ENCODED_SIZE - + 4; // +4 for ContinueRecord sid+size - return encodedTextSize + encodedFormatSize; - } - - - public int serialize(int offset, byte[] data) { - - int expectedTotalSize = getRecordSize(); - int totalSize = serializeTXORecord(offset, data); - + serializeTXORecord(out); if (_text.getString().length() > 0) { - totalSize += serializeTrailingRecords(offset+totalSize, data); + serializeTrailingRecords(out); } - - if (totalSize != expectedTotalSize) - throw new RecordFormatException(totalSize - + " bytes written but getRecordSize() reports " + expectedTotalSize); - return totalSize; } - /** - * Note - this total size includes all potential {@link ContinueRecord}s written - * but it is not the "ushort size" value to be written at the start of the first BIFF record - */ - protected int getDataSize() { - return getCurrentRecordDataSize() + getTrailingRecordsSize(); - } - - private int getFormattingDataLength() { if (_text.length() < 1) { // important - no formatting data if text is empty @@ -287,25 +205,17 @@ public final class TextObjectRecord extends Record { return (_text.numFormattingRuns() + 1) * FORMAT_RUN_ENCODED_SIZE; } - private static byte[] createFormatData(HSSFRichTextString str) { + private static void writeFormatData(ContinuableRecordOutput out , HSSFRichTextString str) { int nRuns = str.numFormattingRuns(); - byte[] result = new byte[(nRuns + 1) * FORMAT_RUN_ENCODED_SIZE]; - int pos = 0; for (int i = 0; i < nRuns; i++) { - LittleEndian.putUShort(result, pos, str.getIndexOfFormattingRun(i)); - pos += 2; + out.writeShort(str.getIndexOfFormattingRun(i)); int fontIndex = str.getFontOfFormattingRun(i); - LittleEndian.putUShort(result, pos, fontIndex == str.NO_FONT ? 0 : fontIndex); - pos += 2; - pos += 4; // skip reserved + out.writeShort(fontIndex == str.NO_FONT ? 0 : fontIndex); + out.writeInt(0); // skip reserved } - LittleEndian.putUShort(result, pos, str.length()); - pos += 2; - LittleEndian.putUShort(result, pos, 0); - pos += 2; - pos += 4; // skip reserved - - return result; + out.writeShort(str.length()); + out.writeShort(0); + out.writeInt(0); // skip reserved } /** diff --git a/src/java/org/apache/poi/hssf/record/UnicodeString.java b/src/java/org/apache/poi/hssf/record/UnicodeString.java index 0494aa98ab..fc493d4348 100644 --- a/src/java/org/apache/poi/hssf/record/UnicodeString.java +++ b/src/java/org/apache/poi/hssf/record/UnicodeString.java @@ -17,75 +17,84 @@ package org.apache.poi.hssf.record; -import org.apache.poi.util.BitField; -import org.apache.poi.util.BitFieldFactory; -import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.HexDump; - -import java.util.Iterator; -import java.util.List; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.apache.poi.hssf.record.cont.ContinuableRecordOutput; +import org.apache.poi.util.BitField; +import org.apache.poi.util.BitFieldFactory; +import org.apache.poi.util.HexDump; +import org.apache.poi.util.LittleEndianInput; +import org.apache.poi.util.LittleEndianOutput; /** - * Title: Unicode String

- * Description: Unicode String record. We implement these as a record, although - * they are really just standard fields that are in several records. - * It is considered more desirable then repeating it in all of them.

- * REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

+ * Title: Unicode String

+ * Description: Unicode String - just standard fields that are in several records. + * It is considered more desirable then repeating it in all of them.

+ * REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)

* @author Andrew C. Oliver * @author Marc Johnson (mjohnson at apache dot org) * @author Glen Stampoultzis (glens at apache.org) */ public final class UnicodeString implements Comparable { - private short field_1_charCount; // = 0; - private byte field_2_optionflags; // = 0; - private String field_3_string; // = null; + private short field_1_charCount; + private byte field_2_optionflags; + private String field_3_string; private List field_4_format_runs; private byte[] field_5_ext_rst; - private static final BitField highByte = BitFieldFactory.getInstance(0x1); - private static final BitField extBit = BitFieldFactory.getInstance(0x4); - private static final BitField richText = BitFieldFactory.getInstance(0x8); + private static final BitField highByte = BitFieldFactory.getInstance(0x1); + private static final BitField extBit = BitFieldFactory.getInstance(0x4); + private static final BitField richText = BitFieldFactory.getInstance(0x8); public static class FormatRun implements Comparable { - short character; - short fontIndex; + short character; + short fontIndex; - public FormatRun(short character, short fontIndex) { - this.character = character; - this.fontIndex = fontIndex; - } - - public short getCharacterPos() { - return character; - } - - public short getFontIndex() { - return fontIndex; - } - - public boolean equals(Object o) { - if ((o == null) || (o.getClass() != this.getClass())) - { - return false; + public FormatRun(short character, short fontIndex) { + this.character = character; + this.fontIndex = fontIndex; } - FormatRun other = ( FormatRun ) o; - return ((character == other.character) && (fontIndex == other.fontIndex)); - } + public FormatRun(LittleEndianInput in) { + this(in.readShort(), in.readShort()); + } - public int compareTo(Object obj) { - FormatRun r = (FormatRun)obj; - if ((character == r.character) && (fontIndex == r.fontIndex)) - return 0; - if (character == r.character) - return fontIndex - r.fontIndex; - else return character - r.character; - } + public short getCharacterPos() { + return character; + } - public String toString() { - return "character="+character+",fontIndex="+fontIndex; - } + public short getFontIndex() { + return fontIndex; + } + + public boolean equals(Object o) { + if (!(o instanceof FormatRun)) { + return false; + } + FormatRun other = ( FormatRun ) o; + + return character == other.character && fontIndex == other.fontIndex; + } + + public int compareTo(Object obj) { + FormatRun r = (FormatRun)obj; + if ((character == r.character) && (fontIndex == r.fontIndex)) + return 0; + if (character == r.character) + return fontIndex - r.fontIndex; + else return character - r.character; + } + + public String toString() { + return "character="+character+",fontIndex="+fontIndex; + } + + public void serialize(LittleEndianOutput out) { + out.writeShort(character); + out.writeShort(fontIndex); + } } private UnicodeString() { @@ -116,13 +125,12 @@ public final class UnicodeString implements Comparable { */ public boolean equals(Object o) { - if ((o == null) || (o.getClass() != this.getClass())) - { + if (!(o instanceof UnicodeString)) { return false; } - UnicodeString other = ( UnicodeString ) o; + UnicodeString other = (UnicodeString) o; - //Ok lets do this in stages to return a quickly, first check the actual string + //OK lets do this in stages to return a quickly, first check the actual string boolean eq = ((field_1_charCount == other.field_1_charCount) && (field_2_optionflags == other.field_2_optionflags) && field_3_string.equals(other.field_3_string)); @@ -148,7 +156,7 @@ public final class UnicodeString implements Comparable { if (!run1.equals(run2)) return false; - } + } //Well the format runs are equal as well!, better check the ExtRst data //Which by the way we dont know how to decode! @@ -194,19 +202,17 @@ public final class UnicodeString implements Comparable { boolean isCompressed = ((field_2_optionflags & 1) == 0); if (isCompressed) { - field_3_string = in.readCompressedUnicode(field_1_charCount); + field_3_string = in.readCompressedUnicode(field_1_charCount); } else { - field_3_string = in.readUnicodeLEString(field_1_charCount); + field_3_string = in.readUnicodeLEString(field_1_charCount); } if (isRichText() && (runCount > 0)) { field_4_format_runs = new ArrayList(runCount); for (int i=0;i 0)) { @@ -372,11 +378,8 @@ public final class UnicodeString implements Comparable { field_2_optionflags = richText.clearByte(field_2_optionflags); } - public byte[] getExtendedRst() { - return this.field_5_ext_rst; - } - public void setExtendedRst(byte[] ext_rst) { + void setExtendedRst(byte[] ext_rst) { if (ext_rst != null) field_2_optionflags = extBit.setByte(field_2_optionflags); else field_2_optionflags = extBit.clearByte(field_2_optionflags); @@ -391,13 +394,13 @@ public final class UnicodeString implements Comparable { * removed / re-ordered */ public void swapFontUse(short oldFontIndex, short newFontIndex) { - Iterator i = field_4_format_runs.iterator(); - while(i.hasNext()) { - FormatRun run = (FormatRun)i.next(); - if(run.fontIndex == oldFontIndex) { - run.fontIndex = newFontIndex; - } - } + Iterator i = field_4_format_runs.iterator(); + while(i.hasNext()) { + FormatRun run = (FormatRun)i.next(); + if(run.fontIndex == oldFontIndex) { + run.fontIndex = newFontIndex; + } + } } /** @@ -442,353 +445,45 @@ public final class UnicodeString implements Comparable { return buffer.toString(); } - private int writeContinueIfRequired(UnicodeRecordStats stats, final int requiredSize, int offset, byte[] data) { - //Basic string overhead - if (stats.remainingSize < requiredSize) { - //Check if be are already in a continue record, if so make sure that - //we go back and write out our length - if (stats.lastLengthPos != -1) { - short lastRecordLength = (short)(offset - stats.lastLengthPos - 2); - if (lastRecordLength > 8224) - throw new InternalError(); - LittleEndian.putShort(data, stats.lastLengthPos, lastRecordLength); + public void serialize(ContinuableRecordOutput out) { + int numberOfRichTextRuns = 0; + int extendedDataSize = 0; + if (isRichText() && field_4_format_runs != null) { + numberOfRichTextRuns = field_4_format_runs.size(); + } + if (isExtendedText() && field_5_ext_rst != null) { + extendedDataSize = field_5_ext_rst.length; + } + + out.writeString(field_3_string, numberOfRichTextRuns, extendedDataSize); + + if (numberOfRichTextRuns > 0) { + + //This will ensure that a run does not split a continue + for (int i=0;i 0) { + // OK ExtRst is actually not documented, so i am going to hope + // that we can actually continue on byte boundaries - stats.recordSize += 4; - stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4; - } - return offset; - } - - public int serialize(UnicodeRecordStats stats, final int offset, byte [] data) - { - int pos = offset; - - //Basic string overhead - pos = writeContinueIfRequired(stats, 3, pos, data); - LittleEndian.putShort(data, pos, getCharCount()); - pos += 2; - data[ pos ] = getOptionFlags(); - pos += 1; - stats.recordSize += 3; - stats.remainingSize-= 3; - - if (isRichText()) { - if (field_4_format_runs != null) { - pos = writeContinueIfRequired(stats, 2, pos, data); - - LittleEndian.putShort(data, pos, (short) field_4_format_runs.size()); - pos += 2; - stats.recordSize += 2; - stats.remainingSize -= 2; - } - } - if ( isExtendedText() ) - { - if (this.field_5_ext_rst != null) { - pos = writeContinueIfRequired(stats, 4, pos, data); - - LittleEndian.putInt(data, pos, field_5_ext_rst.length); - pos += 4; - stats.recordSize += 4; - stats.remainingSize -= 4; - } - } - - int charsize = isUncompressedUnicode() ? 2 : 1; - int strSize = (getString().length() * charsize); - - byte[] strBytes = null; - try { - String unicodeString = getString(); - if (!isUncompressedUnicode()) - { - strBytes = unicodeString.getBytes("ISO-8859-1"); - } - else - { - strBytes = unicodeString.getBytes("UTF-16LE"); + int extPos = 0; + while (true) { + int nBytesToWrite = Math.min(extendedDataSize - extPos, out.getAvailableSpace()); + out.write(field_5_ext_rst, extPos, nBytesToWrite); + extPos += nBytesToWrite; + if (extPos >= extendedDataSize) { + break; + } + out.writeContinue(); } } - catch (Exception e) { - throw new InternalError(); - } - if (strSize != strBytes.length) - throw new InternalError("That shouldnt have happened!"); - - //Check to see if the offset occurs mid string, if so then we need to add - //the byte to start with that represents the first byte of the continue record. - if (strSize > stats.remainingSize) { - //OK the offset occurs half way through the string, that means that - //we need an extra byte after the continue record ie we didnt finish - //writing out the string the 1st time through - - //But hang on, how many continue records did we span? What if this is - //a REALLY long string. We need to work this all out. - int amountThatCantFit = strSize; - int strPos = 0; - while (amountThatCantFit > 0) { - int amountWritten = Math.min(stats.remainingSize, amountThatCantFit); - //Make sure that the amount that can't fit takes into account - //whether we are writing double byte unicode - if (isUncompressedUnicode()) { - //We have the '-1' here because whether this is the first record or - //subsequent continue records, there is always the case that the - //number of bytes in a string on double byte boundaries is actually odd. - if ( ( (amountWritten ) % 2) == 1) - amountWritten--; - } - System.arraycopy(strBytes, strPos, data, pos, amountWritten); - pos += amountWritten; - strPos += amountWritten; - stats.recordSize += amountWritten; - stats.remainingSize -= amountWritten; - - //Ok lets subtract what we can write - amountThatCantFit -= amountWritten; - - //Each iteration of this while loop is another continue record, unless - //everything now fits. - if (amountThatCantFit > 0) { - //We know that a continue WILL be requied, but use this common method - pos = writeContinueIfRequired(stats, amountThatCantFit, pos, data); - - //The first byte after a continue mid string is the extra byte to - //indicate if this run is compressed or not. - data[pos] = (byte) (isUncompressedUnicode() ? 0x1 : 0x0); - pos++; - stats.recordSize++; - stats.remainingSize --; - } - } - } else { - if (strSize > (data.length-pos)) - System.out.println("Hmm shouldnt happen"); - //Ok the string fits nicely in the remaining size - System.arraycopy(strBytes, 0, data, pos, strSize); - pos += strSize; - stats.recordSize += strSize; - stats.remainingSize -= strSize; - } - - - if (isRichText() && (field_4_format_runs != null)) { - int count = field_4_format_runs.size(); - - //This will ensure that a run does not split a continue - for (int i=0;i 0) { - while (ammountThatCantFit > 0) { - //So for this record we have already written - int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit); - System.arraycopy(field_5_ext_rst, extPos, data, pos, ammountWritten); - pos += ammountWritten; - extPos += ammountWritten; - stats.recordSize += ammountWritten; - stats.remainingSize -= ammountWritten; - - //Ok lets subtract what we can write - ammountThatCantFit -= ammountWritten; - if (ammountThatCantFit > 0) { - pos = writeContinueIfRequired(stats, 1, pos, data); - } - } - } else { - //We can fit wholey in what remains. - System.arraycopy(field_5_ext_rst, 0, data, pos, field_5_ext_rst.length); - pos += field_5_ext_rst.length; - stats.remainingSize -= field_5_ext_rst.length; - stats.recordSize += field_5_ext_rst.length; - } - } - - return pos - offset; - } - - - public void setCompressedUnicode() { - field_2_optionflags = highByte.setByte(field_2_optionflags); - } - - public void setUncompressedUnicode() { - field_2_optionflags = highByte.clearByte(field_2_optionflags); - } - - private boolean isUncompressedUnicode() - { - return highByte.isSet(getOptionFlags()); - } - - /** Returns the size of this record, given the amount of record space - * remaining, it will also include the size of writing a continue record. - */ - - public static class UnicodeRecordStats { - public int recordSize; - public int remainingSize = SSTRecord.MAX_RECORD_SIZE; - public int lastLengthPos = -1; - } - public void getRecordSize(UnicodeRecordStats stats) { - //Basic string overhead - if (stats.remainingSize < 3) { - //Needs a continue - stats.recordSize += 4; - stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4; - } - stats.recordSize += 3; - stats.remainingSize-= 3; - - //Read the number of rich runs if rich text. - if ( isRichText() ) - { - //Run count - if (stats.remainingSize < 2) { - //Needs a continue - //Reset the available space. - stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4; - //continue record overhead - stats.recordSize+=4; - } - - stats.recordSize += 2; - stats.remainingSize -=2; - } - //Read the size of extended data if present. - if ( isExtendedText() ) - { - //Needs a continue - //extension length - if (stats.remainingSize < 4) { - //Reset the available space. - stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4; - //continue record overhead - stats.recordSize+=4; - } - - stats.recordSize += 4; - stats.remainingSize -=4; - } - - int charsize = isUncompressedUnicode() ? 2 : 1; - int strSize = (getString().length() * charsize); - //Check to see if the offset occurs mid string, if so then we need to add - //the byte to start with that represents the first byte of the continue record. - if (strSize > stats.remainingSize) { - //Ok the offset occurs half way through the string, that means that - //we need an extra byte after the continue record ie we didnt finish - //writing out the string the 1st time through - - //But hang on, how many continue records did we span? What if this is - //a REALLY long string. We need to work this all out. - int ammountThatCantFit = strSize; - while (ammountThatCantFit > 0) { - int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit); - //Make sure that the ammount that cant fit takes into account - //whether we are writing double byte unicode - if (isUncompressedUnicode()) { - //We have the '-1' here because whether this is the first record or - //subsequent continue records, there is always the case that the - //number of bytes in a string on doube byte boundaries is actually odd. - if ( ( (ammountWritten) % 2) == 1) - ammountWritten--; - } - stats.recordSize += ammountWritten; - stats.remainingSize -= ammountWritten; - - //Ok lets subtract what we can write - ammountThatCantFit -= ammountWritten; - - //Each iteration of this while loop is another continue record, unless - //everything now fits. - if (ammountThatCantFit > 0) { - //Reset the available space. - stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4; - //continue record overhead - stats.recordSize+=4; - - //The first byte after a continue mid string is the extra byte to - //indicate if this run is compressed or not. - stats.recordSize++; - stats.remainingSize --; - } - } - } else { - //Ok the string fits nicely in the remaining size - stats.recordSize += strSize; - stats.remainingSize -= strSize; - } - - if (isRichText() && (field_4_format_runs != null)) { - int count = field_4_format_runs.size(); - - //This will ensure that a run does not split a continue - for (int i=0;i 0) { - while (ammountThatCantFit > 0) { - //So for this record we have already written - int ammountWritten = Math.min(stats.remainingSize, ammountThatCantFit); - stats.recordSize += ammountWritten; - stats.remainingSize -= ammountWritten; - - //Ok lets subtract what we can write - ammountThatCantFit -= ammountWritten; - if (ammountThatCantFit > 0) { - //Each iteration of this while loop is another continue record. - - //Reset the available space. - stats.remainingSize = SSTRecord.MAX_RECORD_SIZE-4; - //continue record overhead - stats.recordSize += 4; - } - } - } else { - //We can fit wholey in what remains. - stats.remainingSize -= field_5_ext_rst.length; - stats.recordSize += field_5_ext_rst.length; - } - } } public int compareTo(Object obj) @@ -801,9 +496,9 @@ public final class UnicodeString implements Comparable { if (result != 0) return result; - //Ok string appears to be equal but now lets compare formatting runs + //OK string appears to be equal but now lets compare formatting runs if ((field_4_format_runs == null) && (str.field_4_format_runs == null)) - //Strings are equal, and there are no formtting runs. + //Strings are equal, and there are no formatting runs. return 0; if ((field_4_format_runs == null) && (str.field_4_format_runs != null)) @@ -850,12 +545,12 @@ public final class UnicodeString implements Comparable { return 0; } - public boolean isRichText() + private boolean isRichText() { return richText.isSet(getOptionFlags()); } - public boolean isExtendedText() + private boolean isExtendedText() { return extBit.isSet(getOptionFlags()); } @@ -877,10 +572,8 @@ public final class UnicodeString implements Comparable { str.field_5_ext_rst = new byte[field_5_ext_rst.length]; System.arraycopy(field_5_ext_rst, 0, str.field_5_ext_rst, 0, field_5_ext_rst.length); - } + } return str; } - - } diff --git a/src/java/org/apache/poi/hssf/record/cont/ContinuableRecord.java b/src/java/org/apache/poi/hssf/record/cont/ContinuableRecord.java new file mode 100644 index 0000000000..135b93ff44 --- /dev/null +++ b/src/java/org/apache/poi/hssf/record/cont/ContinuableRecord.java @@ -0,0 +1,69 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record.cont; + +import org.apache.poi.hssf.record.ContinueRecord; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.util.LittleEndianByteArrayOutputStream; +import org.apache.poi.util.LittleEndianOutput; + +/** + * Common superclass of all records that can produce {@link ContinueRecord}s while being serialized. + * + * @author Josh Micich + */ +public abstract class ContinuableRecord extends Record { + + protected ContinuableRecord() { + // no fields to initialise + } + /** + * Serializes this record's content to the supplied data output.
+ * The standard BIFF header (ushort sid, ushort size) has been handled by the superclass, so + * only BIFF data should be written by this method. Simple data types can be written with the + * standard {@link LittleEndianOutput} methods. Methods from {@link ContinuableRecordOutput} + * can be used to serialize strings (with {@link ContinueRecord}s being written as required). + * If necessary, implementors can explicitly start {@link ContinueRecord}s (regardless of the + * amount of remaining space). + * + * @param out a data output stream + */ + protected abstract void serialize(ContinuableRecordOutput out); + + + /** + * @return four less than the total length of the encoded record(s) + * (in the case when no {@link ContinueRecord} is needed, this is the + * same ushort value that gets encoded after the record sid + */ + protected final int getDataSize() { + ContinuableRecordOutput out = ContinuableRecordOutput.createForCountingOnly(); + serialize(out); + out.terminate(); + return out.getTotalSize() - 4; + } + + public final int serialize(int offset, byte[] data) { + + LittleEndianOutput leo = new LittleEndianByteArrayOutputStream(data, offset); + ContinuableRecordOutput out = new ContinuableRecordOutput(leo, getSid()); + serialize(out); + out.terminate(); + return out.getTotalSize(); + } +} diff --git a/src/java/org/apache/poi/hssf/record/cont/ContinuableRecordOutput.java b/src/java/org/apache/poi/hssf/record/cont/ContinuableRecordOutput.java new file mode 100644 index 0000000000..5d540365b7 --- /dev/null +++ b/src/java/org/apache/poi/hssf/record/cont/ContinuableRecordOutput.java @@ -0,0 +1,257 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record.cont; + +import org.apache.poi.hssf.record.ContinueRecord; +import org.apache.poi.util.DelayableLittleEndianOutput; +import org.apache.poi.util.LittleEndianOutput; +import org.apache.poi.util.StringUtil; + +/** + * An augmented {@link LittleEndianOutput} used for serialization of {@link ContinuableRecord}s. + * This class keeps track of how much remaining space is available in the current BIFF record and + * can start new {@link ContinueRecord}s as required. + * + * @author Josh Micich + */ +public final class ContinuableRecordOutput implements LittleEndianOutput { + + private final LittleEndianOutput _out; + private UnknownLengthRecordOutput _ulrOutput; + private int _totalPreviousRecordsSize; + + ContinuableRecordOutput(LittleEndianOutput out, int sid) { + _ulrOutput = new UnknownLengthRecordOutput(out, sid); + _out = out; + _totalPreviousRecordsSize = 0; + } + + public static ContinuableRecordOutput createForCountingOnly() { + return new ContinuableRecordOutput(NOPOutput, -777); // fake sid + } + + /** + * @return total number of bytes written so far (including all BIFF headers) + */ + public int getTotalSize() { + return _totalPreviousRecordsSize + _ulrOutput.getTotalSize(); + } + /** + * Terminates the last record (also updates its 'ushort size' field) + */ + void terminate() { + _ulrOutput.terminate(); + } + /** + * @return number of remaining bytes of space in current record + */ + public int getAvailableSpace() { + return _ulrOutput.getAvailableSpace(); + } + + /** + * Terminates the current record and starts a new {@link ContinueRecord} (regardless + * of how much space is still available in the current record). + */ + public void writeContinue() { + _ulrOutput.terminate(); + _totalPreviousRecordsSize += _ulrOutput.getTotalSize(); + _ulrOutput = new UnknownLengthRecordOutput(_out, ContinueRecord.sid); + } + public void writeContinueIfRequired(int requiredContinuousSize) { + if (_ulrOutput.getAvailableSpace() < requiredContinuousSize) { + writeContinue(); + } + } + + /** + * Writes the 'optionFlags' byte and encoded character data of a unicode string. This includes: + *

+ * + * Notes: + * + */ + public void writeStringData(String text) { + boolean is16bitEncoded = StringUtil.hasMultibyte(text); + // calculate total size of the header and first encoded char + int keepTogetherSize = 1 + 1; // ushort len, at least one character byte + int optionFlags = 0x00; + if (is16bitEncoded) { + optionFlags |= 0x01; + keepTogetherSize += 1; // one extra byte for first char + } + writeContinueIfRequired(keepTogetherSize); + writeByte(optionFlags); + writeCharacterData(text, is16bitEncoded); + } + /** + * Writes a unicode string complete with header and character data. This includes: + * + * + * The following bits of the 'optionFlags' byte will be set as appropriate: + * + * + * + * + * + *
MaskDescription
0x01is16bitEncoded
0x04hasExtendedData
0x08isRichText
+ * Notes: + * + */ + public void writeString(String text, int numberOfRichTextRuns, int extendedDataSize) { + boolean is16bitEncoded = StringUtil.hasMultibyte(text); + // calculate total size of the header and first encoded char + int keepTogetherSize = 2 + 1 + 1; // ushort len, byte optionFlags, at least one character byte + int optionFlags = 0x00; + if (is16bitEncoded) { + optionFlags |= 0x01; + keepTogetherSize += 1; // one extra byte for first char + } + if (numberOfRichTextRuns > 0) { + optionFlags |= 0x08; + keepTogetherSize += 2; + } + if (extendedDataSize > 0) { + optionFlags |= 0x04; + keepTogetherSize += 4; + } + writeContinueIfRequired(keepTogetherSize); + writeShort(text.length()); + writeByte(optionFlags); + if (numberOfRichTextRuns > 0) { + writeShort(numberOfRichTextRuns); + } + if (extendedDataSize > 0) { + writeInt(extendedDataSize); + } + writeCharacterData(text, is16bitEncoded); + } + + + private void writeCharacterData(String text, boolean is16bitEncoded) { + int nChars = text.length(); + int i=0; + if (is16bitEncoded) { + while(true) { + int nWritableChars = Math.min(nChars-i, _ulrOutput.getAvailableSpace() / 2); + for ( ; nWritableChars > 0; nWritableChars--) { + _ulrOutput.writeShort(text.charAt(i++)); + } + if (i >= nChars) { + break; + } + writeContinue(); + writeByte(0x01); + } + } else { + while(true) { + int nWritableChars = Math.min(nChars-i, _ulrOutput.getAvailableSpace() / 1); + for ( ; nWritableChars > 0; nWritableChars--) { + _ulrOutput.writeByte(text.charAt(i++)); + } + if (i >= nChars) { + break; + } + writeContinue(); + writeByte(0x00); + } + } + } + + public void write(byte[] b) { + writeContinueIfRequired(b.length); + _ulrOutput.write(b); + } + public void write(byte[] b, int offset, int len) { + writeContinueIfRequired(len); + _ulrOutput.write(b, offset, len); + } + public void writeByte(int v) { + writeContinueIfRequired(1); + _ulrOutput.writeByte(v); + } + public void writeDouble(double v) { + writeContinueIfRequired(8); + _ulrOutput.writeDouble(v); + } + public void writeInt(int v) { + writeContinueIfRequired(4); + _ulrOutput.writeInt(v); + } + public void writeLong(long v) { + writeContinueIfRequired(8); + _ulrOutput.writeLong(v); + } + public void writeShort(int v) { + writeContinueIfRequired(2); + _ulrOutput.writeShort(v); + } + + /** + * Allows optimised usage of {@link ContinuableRecordOutput} for sizing purposes only. + */ + private static final LittleEndianOutput NOPOutput = new DelayableLittleEndianOutput() { + + public LittleEndianOutput createDelayedOutput(int size) { + return this; + } + public void write(byte[] b) { + // does nothing + } + public void write(byte[] b, int offset, int len) { + // does nothing + } + public void writeByte(int v) { + // does nothing + } + public void writeDouble(double v) { + // does nothing + } + public void writeInt(int v) { + // does nothing + } + public void writeLong(long v) { + // does nothing + } + public void writeShort(int v) { + // does nothing + } + }; +} diff --git a/src/java/org/apache/poi/hssf/record/cont/UnknownLengthRecordOutput.java b/src/java/org/apache/poi/hssf/record/cont/UnknownLengthRecordOutput.java new file mode 100644 index 0000000000..9209566c23 --- /dev/null +++ b/src/java/org/apache/poi/hssf/record/cont/UnknownLengthRecordOutput.java @@ -0,0 +1,114 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.record.cont; + +import org.apache.poi.hssf.record.RecordInputStream; +import org.apache.poi.util.DelayableLittleEndianOutput; +import org.apache.poi.util.LittleEndianByteArrayOutputStream; +import org.apache.poi.util.LittleEndianOutput; +/** + * Allows the writing of BIFF records when the 'ushort size' header field is not known in advance. + * When the client is finished writing data, it calls {@link #terminate()}, at which point this + * class updates the 'ushort size' with its final value. + * + * @author Josh Micich + */ +final class UnknownLengthRecordOutput implements LittleEndianOutput { + private static final int MAX_DATA_SIZE = RecordInputStream.MAX_RECORD_DATA_SIZE; + + private final LittleEndianOutput _originalOut; + /** for writing the 'ushort size' field once its value is known */ + private final LittleEndianOutput _dataSizeOutput; + private final byte[] _byteBuffer; + private LittleEndianOutput _out; + private int _size; + + public UnknownLengthRecordOutput(LittleEndianOutput out, int sid) { + _originalOut = out; + out.writeShort(sid); + if (out instanceof DelayableLittleEndianOutput) { + // optimisation + DelayableLittleEndianOutput dleo = (DelayableLittleEndianOutput) out; + _dataSizeOutput = dleo.createDelayedOutput(2); + _byteBuffer = null; + _out = out; + } else { + // otherwise temporarily write all subsequent data to a buffer + _dataSizeOutput = out; + _byteBuffer = new byte[RecordInputStream.MAX_RECORD_DATA_SIZE]; + _out = new LittleEndianByteArrayOutputStream(_byteBuffer, 0); + } + } + /** + * includes 4 byte header + */ + public int getTotalSize() { + return 4 + _size; + } + public int getAvailableSpace() { + if (_out == null) { + throw new IllegalStateException("Record already terminated"); + } + return MAX_DATA_SIZE - _size; + } + /** + * Finishes writing the current record and updates 'ushort size' field.
+ * After this method is called, only {@link #getTotalSize()} may be called. + */ + public void terminate() { + if (_out == null) { + throw new IllegalStateException("Record already terminated"); + } + _dataSizeOutput.writeShort(_size); + if (_byteBuffer != null) { + _originalOut.write(_byteBuffer, 0, _size); + _out = null; + return; + } + _out = null; + } + + public void write(byte[] b) { + _out.write(b); + _size += b.length; + } + public void write(byte[] b, int offset, int len) { + _out.write(b, offset, len); + _size += len; + } + public void writeByte(int v) { + _out.writeByte(v); + _size += 1; + } + public void writeDouble(double v) { + _out.writeDouble(v); + _size += 8; + } + public void writeInt(int v) { + _out.writeInt(v); + _size += 4; + } + public void writeLong(long v) { + _out.writeLong(v); + _size += 8; + } + public void writeShort(int v) { + _out.writeShort(v); + _size += 2; + } +} diff --git a/src/java/org/apache/poi/util/DelayableLittleEndianOutput.java b/src/java/org/apache/poi/util/DelayableLittleEndianOutput.java new file mode 100644 index 0000000000..d8e4395e64 --- /dev/null +++ b/src/java/org/apache/poi/util/DelayableLittleEndianOutput.java @@ -0,0 +1,34 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.util; +/** + * Implementors of this interface allow client code to 'delay' writing to a certain section of a + * data output stream.
+ * A typical application is for writing BIFF records when the size is not known until well after + * the header has been written. The client code can call {@link #createDelayedOutput(int)} + * to reserve two bytes of the output for the 'ushort size' header field. The delayed output can + * be written at any stage. + * + * @author Josh Micich + */ +public interface DelayableLittleEndianOutput extends LittleEndianOutput { + /** + * Creates an output stream intended for outputting a sequence of size bytes. + */ + LittleEndianOutput createDelayedOutput(int size); +} diff --git a/src/java/org/apache/poi/util/LittleEndianByteArrayOutputStream.java b/src/java/org/apache/poi/util/LittleEndianByteArrayOutputStream.java index 1b68a348be..b3ded97687 100644 --- a/src/java/org/apache/poi/util/LittleEndianByteArrayOutputStream.java +++ b/src/java/org/apache/poi/util/LittleEndianByteArrayOutputStream.java @@ -24,7 +24,7 @@ package org.apache.poi.util; * * @author Josh Micich */ -public final class LittleEndianByteArrayOutputStream implements LittleEndianOutput { +public final class LittleEndianByteArrayOutputStream implements LittleEndianOutput, DelayableLittleEndianOutput { private final byte[] _buf; private final int _endIndex; private int _writeIndex; @@ -89,4 +89,10 @@ public final class LittleEndianByteArrayOutputStream implements LittleEndianOutp public int getWriteIndex() { return _writeIndex; } + public LittleEndianOutput createDelayedOutput(int size) { + checkPosition(size); + LittleEndianOutput result = new LittleEndianByteArrayOutputStream(_buf, _writeIndex, _writeIndex+size); + _writeIndex += size; + return result; + } } diff --git a/src/testcases/org/apache/poi/hssf/record/TestRecordFactory.java b/src/testcases/org/apache/poi/hssf/record/TestRecordFactory.java index 13cea4189c..b54b4032d6 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestRecordFactory.java +++ b/src/testcases/org/apache/poi/hssf/record/TestRecordFactory.java @@ -48,7 +48,6 @@ public final class TestRecordFactory extends TestCase { byte[] data = { 0, 6, 5, 0, -2, 28, -51, 7, -55, 64, 0, 0, 6, 1, 0, 0 }; - short size = 16; Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data)); assertEquals(BOFRecord.class.getName(), @@ -64,7 +63,6 @@ public final class TestRecordFactory extends TestCase { assertEquals(5, bofRecord.getType()); assertEquals(1536, bofRecord.getVersion()); recType = MMSRecord.sid; - size = 2; data = new byte[] { 0, 0 @@ -93,7 +91,6 @@ public final class TestRecordFactory extends TestCase { byte[] data = { 0, 0, 0, 0, 21, 0, 0, 0, 0, 0 }; - short size = 10; Record[] record = RecordFactory.createRecord(TestcaseRecordInputStream.create(recType, data)); assertEquals(NumberRecord.class.getName(), @@ -154,34 +151,34 @@ public final class TestRecordFactory extends TestCase { */ public void testMixedContinue() throws Exception { /** - * Taken from a real test sample file 39512.xls. See Bug 39512 for details. + * Adapted from a real test sample file 39512.xls (Offset 0x4854). + * See Bug 39512 for details. */ String dump = //OBJ - "5D, 00, 48, 00, 15, 00, 12, 00, 0C, 00, 3C, 00, 11, 00, A0, 2E, 03, 01, CC, 42, " + - "CF, 00, 00, 00, 00, 00, 0A, 00, 0C, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, " + - "03, 00, 0B, 00, 06, 00, 28, 01, 03, 01, 00, 00, 12, 00, 08, 00, 00, 00, 00, 00, " + - "00, 00, 03, 00, 11, 00, 04, 00, 3D, 00, 00, 00, 00, 00, 00, 00, " + + "5D 00 48 00 15 00 12 00 0C 00 3C 00 11 00 A0 2E 03 01 CC 42 " + + "CF 00 00 00 00 00 0A 00 0C 00 00 00 00 00 00 00 00 00 00 00 " + + "03 00 0B 00 06 00 28 01 03 01 00 00 12 00 08 00 00 00 00 00 " + + "00 00 03 00 11 00 04 00 3D 00 00 00 00 00 00 00 " + //MSODRAWING - "EC, 00, 08, 00, 00, 00, 0D, F0, 00, 00, 00, 00, " + - //TXO - "B6, 01, 12, 00, 22, 02, 00, 00, 00, 00, 00, 00, 00, 00, 10, 00, 10, 00, 00, 00, " + - "00, 00, 3C, 00, 21, 00, 01, 4F, 00, 70, 00, 74, 00, 69, 00, 6F, 00, 6E, 00, 20, " + - "00, 42, 00, 75, 00, 74, 00, 74, 00, 6F, 00, 6E, 00, 20, 00, 33, 00, 39, 00, 3C, " + - "00, 10, 00, 00, 00, 05, 00, 00, 00, 00, 00, 10, 00, 00, 00, 00, 00, 00, 00, " + - //CONTINUE - "3C, 00, 7E, 00, 0F, 00, 04, F0, 7E, 00, 00, 00, 92, 0C, 0A, F0, 08, 00, 00, 00, " + - "3D, 04, 00, 00, 00, 0A, 00, 00, A3, 00, 0B, F0, 3C, 00, 00, 00, 7F, 00, 00, 01, " + - "00, 01, 80, 00, 8C, 01, 03, 01, 85, 00, 01, 00, 00, 00, 8B, 00, 02, 00, 00, 00, " + - "BF, 00, 08, 00, 1A, 00, 7F, 01, 29, 00, 29, 00, 81, 01, 41, 00, 00, 08, BF, 01, " + - "00, 00, 10, 00, C0, 01, 40, 00, 00, 08, FF, 01, 00, 00, 08, 00, 00, 00, 10, F0, " + - "12, 00, 00, 00, 02, 00, 02, 00, A0, 03, 18, 00, B5, 00, 04, 00, 30, 02, 1A, 00, " + - "00, 00, 00, 00, 11, F0, 00, 00, 00, 00, " + + "EC 00 08 00 00 00 0D F0 00 00 00 00 " + + //TXO (and 2 trailing CONTINUE records) + "B6 01 12 00 22 02 00 00 00 00 00 00 00 00 10 00 10 00 00 00 00 00 " + + "3C 00 11 00 00 4F 70 74 69 6F 6E 20 42 75 74 74 6F 6E 20 33 39 " + + "3C 00 10 00 00 00 05 00 00 00 00 00 10 00 00 00 00 00 00 00 " + + // another CONTINUE + "3C 00 7E 00 0F 00 04 F0 7E 00 00 00 92 0C 0A F0 08 00 00 00 " + + "3D 04 00 00 00 0A 00 00 A3 00 0B F0 3C 00 00 00 7F 00 00 01 " + + "00 01 80 00 8C 01 03 01 85 00 01 00 00 00 8B 00 02 00 00 00 " + + "BF 00 08 00 1A 00 7F 01 29 00 29 00 81 01 41 00 00 08 BF 01 " + + "00 00 10 00 C0 01 40 00 00 08 FF 01 00 00 08 00 00 00 10 F0 " + + "12 00 00 00 02 00 02 00 A0 03 18 00 B5 00 04 00 30 02 1A 00 " + + "00 00 00 00 11 F0 00 00 00 00 " + //OBJ - "5D, 00, 48, 00, 15, 00, 12, 00, 0C, 00, 3D, 00, 11, 00, 8C, 01, 03, 01, C8, 59, CF, 00, 00, " + - "00, 00, 00, 0A, 00, 0C, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 03, 00, 0B, 00, 06, 00, " + - "7C, 16, 03, 01, 00, 00, 12, 00, 08, 00, 00, 00, 00, 00, 00, 00, 03, 00, 11, 00, 04, 00, 01, " + - "00, 00, 00, 00, 00, 00, 00"; + "5D 00 48 00 15 00 12 00 0C 00 3D 00 11 00 8C 01 03 01 C8 59 CF 00 00 " + + "00 00 00 0A 00 0C 00 00 00 00 00 00 00 00 00 00 00 03 00 0B 00 06 00 " + + "7C 16 03 01 00 00 12 00 08 00 00 00 00 00 00 00 03 00 11 00 04 00 01 " + + "00 00 00 00 00 00 00"; byte[] data = HexRead.readFromString(dump); List records = RecordFactory.createRecords(new ByteArrayInputStream(data)); diff --git a/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java b/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java index d35f2009dc..2a0830ac79 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java +++ b/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java @@ -19,6 +19,7 @@ package org.apache.poi.hssf.record; import junit.framework.TestCase; +import org.apache.poi.hssf.record.cont.ContinuableRecordOutput; import org.apache.poi.util.IntMapper; /** @@ -35,8 +36,10 @@ public final class TestSSTRecordSizeCalculator extends TestCase { private void confirmSize(int expectedSize) { - SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(strings); - assertEquals(expectedSize, calculator.getRecordSize()); + ContinuableRecordOutput cro = ContinuableRecordOutput.createForCountingOnly(); + SSTSerializer ss = new SSTSerializer(strings, 0, 0); + ss.serialize(cro); + assertEquals(expectedSize, cro.getTotalSize()); } public void testBasic() { diff --git a/src/testcases/org/apache/poi/hssf/record/TestStringRecord.java b/src/testcases/org/apache/poi/hssf/record/TestStringRecord.java index 14b708cdc8..ec7b84c693 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestStringRecord.java +++ b/src/testcases/org/apache/poi/hssf/record/TestStringRecord.java @@ -18,6 +18,12 @@ package org.apache.poi.hssf.record; +import org.apache.poi.util.HexRead; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LittleEndianByteArrayInputStream; +import org.apache.poi.util.LittleEndianInput; + +import junit.framework.AssertionFailedError; import junit.framework.TestCase; /** @@ -28,29 +34,66 @@ import junit.framework.TestCase; * @author Glen Stampoultzis (glens at apache.org) */ public final class TestStringRecord extends TestCase { - byte[] data = new byte[] { - (byte)0x0B,(byte)0x00, // length - (byte)0x00, // option - // string - (byte)0x46,(byte)0x61,(byte)0x68,(byte)0x72,(byte)0x7A,(byte)0x65,(byte)0x75,(byte)0x67,(byte)0x74,(byte)0x79,(byte)0x70 - }; + private static final byte[] data = HexRead.readFromString( + "0B 00 " + // length + "00 " + // option + // string + "46 61 68 72 7A 65 75 67 74 79 70" + ); - public void testLoad() { + public void testLoad() { - StringRecord record = new StringRecord(TestcaseRecordInputStream.create(0x207, data)); - assertEquals( "Fahrzeugtyp", record.getString()); + StringRecord record = new StringRecord(TestcaseRecordInputStream.create(0x207, data)); + assertEquals( "Fahrzeugtyp", record.getString()); - assertEquals( 18, record.getRecordSize() ); - } + assertEquals( 18, record.getRecordSize() ); + } - public void testStore() - { - StringRecord record = new StringRecord(); - record.setString("Fahrzeugtyp"); + public void testStore() { + StringRecord record = new StringRecord(); + record.setString("Fahrzeugtyp"); - byte [] recordBytes = record.serialize(); - assertEquals(recordBytes.length - 4, data.length); - for (int i = 0; i < data.length; i++) - assertEquals("At offset " + i, data[i], recordBytes[i+4]); - } + byte [] recordBytes = record.serialize(); + assertEquals(recordBytes.length - 4, data.length); + for (int i = 0; i < data.length; i++) + assertEquals("At offset " + i, data[i], recordBytes[i+4]); + } + + public void testContinue() { + int MAX_BIFF_DATA = RecordInputStream.MAX_RECORD_DATA_SIZE; + int TEXT_LEN = MAX_BIFF_DATA + 1000; // deliberately over-size + String textChunk = "ABCDEGGHIJKLMNOP"; // 16 chars + StringBuffer sb = new StringBuffer(16384); + while (sb.length() < TEXT_LEN) { + sb.append(textChunk); + } + sb.setLength(TEXT_LEN); + + StringRecord sr = new StringRecord(); + sr.setString(sb.toString()); + byte[] ser = sr.serialize(); + assertEquals(StringRecord.sid, LittleEndian.getUShort(ser, 0)); + if (LittleEndian.getUShort(ser, 2) > MAX_BIFF_DATA) { + throw new AssertionFailedError( + "StringRecord should have been split with a continue record"); + } + // Confirm expected size of first record, and ushort strLen. + assertEquals(MAX_BIFF_DATA, LittleEndian.getUShort(ser, 2)); + assertEquals(TEXT_LEN, LittleEndian.getUShort(ser, 4)); + + // Confirm first few bytes of ContinueRecord + LittleEndianInput crIn = new LittleEndianByteArrayInputStream(ser, (MAX_BIFF_DATA + 4)); + int nCharsInFirstRec = MAX_BIFF_DATA - (2 + 1); // strLen, optionFlags + int nCharsInSecondRec = TEXT_LEN - nCharsInFirstRec; + assertEquals(ContinueRecord.sid, crIn.readUShort()); + assertEquals(1 + nCharsInSecondRec, crIn.readUShort()); + assertEquals(0, crIn.readUByte()); + assertEquals('N', crIn.readUByte()); + assertEquals('O', crIn.readUByte()); + + // re-read and make sure string value is the same + RecordInputStream in = TestcaseRecordInputStream.create(ser); + StringRecord sr2 = new StringRecord(in); + assertEquals(sb.toString(), sr2.getString()); + } } diff --git a/src/testcases/org/apache/poi/hssf/record/TestTextObjectBaseRecord.java b/src/testcases/org/apache/poi/hssf/record/TestTextObjectBaseRecord.java index 9b53cdd339..674279388c 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestTextObjectBaseRecord.java +++ b/src/testcases/org/apache/poi/hssf/record/TestTextObjectBaseRecord.java @@ -44,9 +44,9 @@ public final class TestTextObjectBaseRecord extends TestCase { "00 00" + "00 00 " + "3C 00 " + // ContinueRecord.sid - "05 00 " + // size 5 - "01 " + // unicode uncompressed - "41 00 42 00 " + // 'AB' + "03 00 " + // size 3 + "00 " + // unicode compressed + "41 42 " + // 'AB' "3C 00 " + // ContinueRecord.sid "10 00 " + // size 16 "00 00 18 00 00 00 00 00 " + @@ -63,7 +63,7 @@ public final class TestTextObjectBaseRecord extends TestCase { assertEquals(true, record.isTextLocked()); assertEquals(TextObjectRecord.TEXT_ORIENTATION_ROT_RIGHT, record.getTextOrientation()); - assertEquals(51, record.getRecordSize() ); + assertEquals(49, record.getRecordSize() ); } public void testStore() diff --git a/src/testcases/org/apache/poi/hssf/record/TestTextObjectRecord.java b/src/testcases/org/apache/poi/hssf/record/TestTextObjectRecord.java index 19ec07c810..39ea8ba820 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestTextObjectRecord.java +++ b/src/testcases/org/apache/poi/hssf/record/TestTextObjectRecord.java @@ -37,16 +37,14 @@ import org.apache.poi.util.LittleEndian; public final class TestTextObjectRecord extends TestCase { private static final byte[] simpleData = HexRead.readFromString( - "B6 01 12 00 " + - "12 02 00 00 00 00 00 00" + - "00 00 0D 00 08 00 00 00" + - "00 00 " + - "3C 00 1B 00 " + - "01 48 00 65 00 6C 00 6C 00 6F 00 " + - "2C 00 20 00 57 00 6F 00 72 00 6C " + - "00 64 00 21 00 " + - "3C 00 08 " + - "00 0D 00 00 00 00 00 00 00" + "B6 01 12 00 " + + "12 02 00 00 00 00 00 00" + + "00 00 0D 00 08 00 00 00" + + "00 00 " + + "3C 00 0E 00 " + + "00 48 65 6C 6C 6F 2C 20 57 6F 72 6C 64 21 " + + "3C 00 08 " + + "00 0D 00 00 00 00 00 00 00" ); @@ -92,12 +90,12 @@ public final class TestTextObjectRecord extends TestCase { record.setStr(str); byte [] ser = record.serialize(); - + int formatDataLen = LittleEndian.getUShort(ser, 16); assertEquals("formatDataLength", 0, formatDataLen); assertEquals(22, ser.length); // just the TXO record - + //read again RecordInputStream is = TestcaseRecordInputStream.create(ser); record = new TextObjectRecord(is); @@ -152,38 +150,38 @@ public final class TestTextObjectRecord extends TestCase { byte[] cln = cloned.serialize(); assertTrue(Arrays.equals(src, cln)); } - - /** similar to {@link #simpleData} but with link formula at end of TXO rec*/ + + /** similar to {@link #simpleData} but with link formula at end of TXO rec*/ private static final byte[] linkData = HexRead.readFromString( - "B6 01 " + // TextObjectRecord.sid - "1E 00 " + // size 18 - "44 02 02 00 00 00 00 00" + - "00 00 " + - "02 00 " + // strLen 2 - "10 00 " + // 16 bytes for 2 format runs - "00 00 00 00 " + + "B6 01 " + // TextObjectRecord.sid + "1E 00 " + // size 18 + "44 02 02 00 00 00 00 00" + + "00 00 " + + "02 00 " + // strLen 2 + "10 00 " + // 16 bytes for 2 format runs + "00 00 00 00 " + "05 00 " + // formula size "D4 F0 8A 03 " + // unknownInt "24 01 00 13 C0 " + //tRef(T2) "13 " + // ?? - "3C 00 " + // ContinueRecord.sid - "05 00 " + // size 5 - "01 " + // unicode uncompressed - "41 00 42 00 " + // 'AB' - "3C 00 " + // ContinueRecord.sid - "10 00 " + // size 16 - "00 00 18 00 00 00 00 00 " + - "02 00 00 00 00 00 00 00 " + "3C 00 " + // ContinueRecord.sid + "03 00 " + // size 3 + "00 " + // unicode compressed + "41 42 " + // 'AB' + "3C 00 " + // ContinueRecord.sid + "10 00 " + // size 16 + "00 00 18 00 00 00 00 00 " + + "02 00 00 00 00 00 00 00 " ); - - + + public void testLinkFormula() { RecordInputStream is = new RecordInputStream(new ByteArrayInputStream(linkData)); is.nextRecord(); TextObjectRecord rec = new TextObjectRecord(is); - + Ptg ptg = rec.getLinkRefPtg(); assertNotNull(ptg); assertEquals(RefPtg.class, ptg.getClass()); @@ -193,6 +191,6 @@ public final class TestTextObjectRecord extends TestCase { byte [] data2 = rec.serialize(); assertEquals(linkData.length, data2.length); assertTrue(Arrays.equals(linkData, data2)); - } - + } + } diff --git a/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java b/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java index 2d64002017..1a80f9e921 100755 --- a/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java +++ b/src/testcases/org/apache/poi/hssf/record/TestUnicodeString.java @@ -19,8 +19,11 @@ package org.apache.poi.hssf.record; import junit.framework.TestCase; +import org.apache.poi.hssf.record.cont.ContinuableRecordOutput; + /** - * Tests that records size calculates correctly. + * Tests that {@link UnicodeString} record size calculates correctly. The record size + * is used when serializing {@link SSTRecord}s. * * @author Jason Height (jheight at apache.org) */ @@ -33,11 +36,23 @@ public final class TestUnicodeString extends TestCase { private static void confirmSize(int expectedSize, UnicodeString s) { confirmSize(expectedSize, s, 0); } + /** + * Note - a value of zero for amountUsedInCurrentRecord would only ever occur just + * after a {@link ContinueRecord} had been started. In the initial {@link SSTRecord} this + * value starts at 8 (for the first {@link UnicodeString} written). In general, it can be + * any value between 0 and {@link #MAX_DATA_SIZE} + */ private static void confirmSize(int expectedSize, UnicodeString s, int amountUsedInCurrentRecord) { - UnicodeString.UnicodeRecordStats stats = new UnicodeString.UnicodeRecordStats(); - stats.remainingSize = MAX_DATA_SIZE-amountUsedInCurrentRecord; - s.getRecordSize(stats); - assertEquals(expectedSize, stats.recordSize); + ContinuableRecordOutput out = ContinuableRecordOutput.createForCountingOnly(); + out.writeContinue(); + for(int i=amountUsedInCurrentRecord; i>0; i--) { + out.writeByte(0); + } + int size0 = out.getTotalSize(); + s.serialize(out); + int size1 = out.getTotalSize(); + int actualSize = size1-size0; + assertEquals(expectedSize, actualSize); } public void testSmallStringSize() {