sergei.... if he'd learn to do diff -u patches instead of the nasty kind I'd be
loving his work alot right now ;-)
PR:
Obtained from:
Submitted by:
Reviewed by:


git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@352831 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew C. Oliver 2002-09-02 02:11:16 +00:00
parent 8748ed6a08
commit 4c9734a2cb
9 changed files with 347 additions and 89 deletions

View File

@ -624,6 +624,9 @@ public class BiffViewer {
case StringRecord.sid:
retval = new StringRecord( rectype, size, data );
break;
case NameRecord.sid:
retval = new NameRecord( rectype, size, data );
break;
default:
retval = new UnknownRecord( rectype, size, data );
}

View File

@ -185,6 +185,24 @@ public class HSSFEventFactory
{
sid = LittleEndian.getShort(sidbytes);
//
// for some reasons we have to make the workbook to be at least 4096 bytes
// but if we have such workbook we fill the end of it with zeros (many zeros)
//
// it is not good:
// if the length( all zero records ) % 4 = 1
// e.g.: any zero record would be readed as 4 bytes at once ( 2 - id and 2 - size ).
// And the last 1 byte will be readed WRONG ( the id must be 2 bytes )
//
// So we should better to check if the sid is zero and not to read more data
// The zero sid shows us that rest of the stream data is a fake to make workbook
// certain size
//
if ( sid == 0 )
break;
if ((rec != null) && (sid != ContinueRecord.sid))
{
userCode = req.processRecord(rec);

View File

@ -73,7 +73,8 @@ public class FormatRecord
public final static short sid = 0x41e;
private short field_1_index_code;
private byte field_2_formatstring_len;
private short field_3_zero; // undocumented 2 bytes of 0
private short field_3_unicode_len; // unicode string length
private boolean field_3_unicode_flag; // it is not undocumented - it is unicode flag
private String field_4_formatstring;
public FormatRecord()
@ -118,10 +119,19 @@ public class FormatRecord
protected void fillFields(byte [] data, short size, int offset)
{
field_1_index_code = LittleEndian.getShort(data, 0 + offset);
field_2_formatstring_len = data[ 2 + offset ];
field_3_zero = LittleEndian.getShort(data, 3 + offset);
field_4_formatstring = new String(data, 5 + offset,
LittleEndian.ubyteToInt(field_2_formatstring_len));
// field_2_formatstring_len = data[ 2 + offset ];
field_3_unicode_len = LittleEndian.getShort( data, 2 + offset );
field_3_unicode_flag = ( data[ 4 + offset ] & (byte)0x01 ) != 0;
if ( field_3_unicode_flag ) {
// unicode
field_4_formatstring = StringUtil.getFromUnicodeHigh( data, 5 + offset, field_3_unicode_len );
}
else {
// not unicode
field_4_formatstring = new String(data, 5 + offset, field_3_unicode_len );
}
}
/**
@ -203,11 +213,15 @@ public class FormatRecord
buffer.append("[FORMAT]\n");
buffer.append(" .indexcode = ")
.append(Integer.toHexString(getIndexCode())).append("\n");
/*
buffer.append(" .formatstringlen = ")
.append(Integer.toHexString(getFormatStringLength()))
.append("\n");
buffer.append(" .zero = ")
.append(Integer.toHexString(field_3_zero)).append("\n");
*/
buffer.append(" .unicode length = ")
.append(Integer.toHexString(field_3_unicode_len)).append("\n");
buffer.append(" .isUnicode = ")
.append( field_3_unicode_flag ).append("\n");
buffer.append(" .formatstring = ").append(getFormatString())
.append("\n");
buffer.append("[/FORMAT]\n");
@ -217,20 +231,29 @@ public class FormatRecord
public int serialize(int offset, byte [] data)
{
LittleEndian.putShort(data, 0 + offset, sid);
LittleEndian.putShort(data, 2 + offset,
( short ) (5 + getFormatStringLength()));
// 9 - 4(len/sid) + format string length
LittleEndian.putShort(data, 2 + offset, (short)( 2 + 2 + 1 + ( (field_3_unicode_flag)
? 2 * field_3_unicode_len
: field_3_unicode_len ) ) );
// index + len + flag + format string length
LittleEndian.putShort(data, 4 + offset, getIndexCode());
data[ 6 + offset ] = getFormatStringLength();
LittleEndian.putShort(data, 7 + offset, ( short ) 0);
LittleEndian.putShort(data, 6 + offset, field_3_unicode_len);
data[ 8 + offset ] = (byte)( (field_3_unicode_flag) ? 0x01 : 0x00 );
if ( field_3_unicode_flag ) {
// unicode
StringUtil.putUncompressedUnicode( getFormatString(), data, 9 + offset );
}
else {
// not unicode
StringUtil.putCompressedUnicode( getFormatString(), data, 9 + offset );
}
return getRecordSize();
}
public int getRecordSize()
{
return 9 + getFormatStringLength();
return 9 + ( ( field_3_unicode_flag ) ? 2 * field_3_unicode_len : field_3_unicode_len );
}
public short getSid()

View File

@ -55,6 +55,7 @@
package org.apache.poi.hssf.record;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
import java.util.Stack;
@ -70,6 +71,7 @@ import org.apache.poi.hssf.util.SheetReferences;
* Description: Defines a named range within a workbook. <P>
* REFERENCE: <P>
* @author Libin Roman (Vista Portal LDT. Developer)
* @author Sergei Kozello (sergeikozello at mail.ru)
* @version 1.0-pre
*/
@ -88,9 +90,10 @@ public class NameRecord extends Record {
private byte field_9_length_help_topic_text;
private byte field_10_length_status_bar_text;
private byte field_11_compressed_unicode_flag; // not documented
private byte field_12_builtIn_name;
private String field_12_name_text;
private Stack field_13_name_definition;
private byte[] field_13_raw_name_definition = null; // raw data
private byte[] field_13_raw_name_definition; // raw data
private String field_14_custom_menu_text;
private String field_15_description_text;
private String field_16_help_topic_text;
@ -378,7 +381,7 @@ public class NameRecord extends Record {
*/
public int serialize(int offset, byte[] data) {
LittleEndian.putShort(data, 0 + offset, sid);
LittleEndian.putShort(data, 2 + offset, (short)( 15 + getTextsLength()));
// size defined below
LittleEndian.putShort(data, 4 + offset, getOptionFlag());
data[6 + offset] = getKeyboardShortcut();
data[7 + offset] = getNameTextLength();
@ -391,6 +394,18 @@ public class NameRecord extends Record {
data [17 + offset] = getStatusBarLength();
data [18 + offset] = getCompressedUnicodeFlag();
if ( ( field_1_option_flag & (short)0x20 ) != 0 ) {
LittleEndian.putShort(data, 2 + offset, (short)( 16 + field_13_raw_name_definition.length ));
data [19 + offset] = field_12_builtIn_name;
System.arraycopy( field_13_raw_name_definition, 0, data, 20 + offset, field_13_raw_name_definition.length );
return 20 + field_13_raw_name_definition.length;
}
else {
LittleEndian.putShort(data, 2 + offset, (short)( 15 + getTextsLength()));
StringUtil.putCompressedUnicode(getNameText(), data , 19 + offset);
int start_of_name_definition = 19 + field_3_length_name_text;
@ -413,9 +428,9 @@ public class NameRecord extends Record {
int start_of_status_bar_text = start_of_help_topic_text + field_10_length_status_bar_text;
StringUtil.putCompressedUnicode(getStatusBarText(), data , start_of_status_bar_text + offset);
return getRecordSize();
}
}
private void serializePtgs(byte [] data, int offset) {
int pos = offset;
@ -579,6 +594,29 @@ public class NameRecord extends Record {
field_9_length_help_topic_text = data [12 + offset];
field_10_length_status_bar_text = data [13 + offset];
if ( ( field_1_option_flag & (short)0x20 ) != 0 ) {
// DEBUG
// System.out.println( "Built-in name" );
field_11_compressed_unicode_flag = data[ 14 + offset ];
field_12_builtIn_name = data[ 15 + offset ];
if ( (field_12_builtIn_name & (short)0x07) != 0 ) {
field_12_name_text = "Print_Titles";
// DEBUG
// System.out.println( field_12_name_text );
field_13_raw_name_definition = new byte[ field_4_length_name_definition ];
System.arraycopy( data, 16 + offset, field_13_raw_name_definition, 0, field_13_raw_name_definition.length );
// DEBUG
// System.out.println( HexDump.toHex( field_13_raw_name_definition ) );
}
}
else {
field_11_compressed_unicode_flag= data [14 + offset];
field_12_name_text = new String(data, 15 + offset,
LittleEndian.ubyteToInt(field_3_length_name_text));
@ -602,7 +640,7 @@ public class NameRecord extends Record {
int start_of_status_bar_text = start_of_help_topic_text + field_10_length_status_bar_text;
field_17_status_bar_text = new String(data, start_of_status_bar_text + offset,
LittleEndian.ubyteToInt(field_10_length_status_bar_text));
}
}
private Stack getParsedExpressionTokens(byte [] data, short size,
@ -635,5 +673,101 @@ public class NameRecord extends Record {
public short getSid() {
return this.sid;
}
/*
20 00
00
01
1A 00 // sz = 0x1A = 26
00 00
01 00
00
00
00
00
00 // unicode flag
07 // name
29 17 00 3B 00 00 00 00 FF FF 00 00 02 00 3B 00 //{ 26
00 07 00 07 00 00 00 FF 00 10 // }
20 00
00
01
0B 00 // sz = 0xB = 11
00 00
01 00
00
00
00
00
00 // unicode flag
07 // name
3B 00 00 07 00 07 00 00 00 FF 00 // { 11 }
*/
/*
18, 00,
1B, 00,
20, 00,
00,
01,
0B, 00,
00,
00,
00,
00,
00,
07,
3B 00 00 07 00 07 00 00 00 FF 00 ]
*/
/**
* @see Object#toString()
*/
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("[NAME]\n");
buffer.append(" .option flags = ").append( HexDump.toHex( field_1_option_flag ) )
.append("\n");
buffer.append(" .keyboard shortcut = ").append( HexDump.toHex( field_2_keyboard_shortcut ) )
.append("\n");
buffer.append(" .length of the name = ").append( field_3_length_name_text )
.append("\n");
buffer.append(" .size of the formula data = ").append( field_4_length_name_definition )
.append("\n");
buffer.append(" .unused = ").append( field_5_index_to_sheet )
.append("\n");
buffer.append(" .( 0 = Global name, otherwise index to sheet (one-based) ) = ").append( field_6_equals_to_index_to_sheet )
.append("\n");
buffer.append(" .Length of menu text (character count) = ").append( field_7_length_custom_menu )
.append("\n");
buffer.append(" .Length of description text (character count) = ").append( field_8_length_description_text )
.append("\n");
buffer.append(" .Length of help topic text (character count) = ").append( field_9_length_help_topic_text )
.append("\n");
buffer.append(" .Length of status bar text (character count) = ").append( field_10_length_status_bar_text )
.append("\n");
buffer.append(" .Name (Unicode flag) = ").append( field_11_compressed_unicode_flag )
.append("\n");
buffer.append(" .Name (Unicode text) = ").append( field_12_name_text )
.append("\n");
buffer.append(" .Formula data (RPN token array without size field) = ").append( HexDump.toHex( field_13_raw_name_definition ) )
.append("\n");
buffer.append(" .Menu text (Unicode string without length field) = ").append( field_14_custom_menu_text )
.append("\n");
buffer.append(" .Description text (Unicode string without length field) = ").append( field_15_description_text )
.append("\n");
buffer.append(" .Help topic text (Unicode string without length field) = ").append( field_16_help_topic_text )
.append("\n");
buffer.append(" .Status bar text (Unicode string without length field) = ").append( field_17_status_bar_text )
.append("\n");
buffer.append("[/NAME]\n");
return buffer.toString();
}
}

View File

@ -202,6 +202,9 @@ public class HSSFWorkbook
// none currently
}
public final static byte ENCODING_COMPRESSED_UNICODE = 0;
public final static byte ENCODING_UTF_16 = 1;
/**
* set the sheet name.
* @param sheet number (0 based)
@ -209,12 +212,28 @@ public class HSSFWorkbook
*/
public void setSheetName(int sheet, String name)
{
workbook.setSheetName( sheet, name, ENCODING_COMPRESSED_UNICODE );
}
public void setSheetName( int sheet, String name, short encoding )
{
if (sheet > (sheets.size() - 1))
{
throw new RuntimeException("Sheet out of bounds");
}
workbook.setSheetName(sheet, name);
switch ( encoding ) {
case ENCODING_COMPRESSED_UNICODE:
case ENCODING_UTF_16:
break;
default:
// TODO java.io.UnsupportedEncodingException
throw new RuntimeException( "Unsupported encoding" );
}
workbook.setSheetName( sheet, name, encoding );
}
/**

View File

@ -63,7 +63,13 @@ import java.text.FieldPosition;
/**
* Title: String Utility Description: Collection of string handling utilities
*
* Now it is quite confusing: the method pairs, in which
* one of them write data and other read written data are:
* putUncompressedUnicodeHigh and getFromUnicode
* putUncompressedUnicode and getFromUnicodeHigh
*
*@author Andrew C. Oliver
*@author Sergei Kozello (sergeikozello at mail.ru)
*@created May 10, 2002
*@version 1.0
*/
@ -79,6 +85,8 @@ public class StringUtil {
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x16, 0x00 } -> 0x16
*
*@param string the byte array to be converted
*@param offset the initial offset into the
* byte array. it is assumed that string[ offset ] and string[ offset +
@ -103,23 +111,38 @@ public class StringUtil {
if ((len < 0) || (((string.length - offset) / 2) < len)) {
throw new IllegalArgumentException("Illegal length");
}
byte[] bstring = new byte[len];
int index = offset;
// start with high bits.
for (int k = 0; k < len; k++) {
bstring[k] = string[index];
index += 2;
}
return new String(bstring);
char[] chars = new char[ len ];
for ( int i = 0; i < chars.length; i++ ) {
chars[i] = (char)( string[ offset + ( 2*i ) ] +
( string[ offset + ( 2*i+1 ) ] << 8 ) );
}
return new String( chars );
}
/**
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x16, 0x00 } -> 0x16
*
*@param string the byte array to be converted
*@return the converted string
*/
public static String getFromUnicodeHigh( final byte[] string ) {
return getFromUnicodeHigh( string, 0, string.length / 2 );
}
/**
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x00, 0x16 } -> 0x16
*
*@param string the byte array to be converted
*@param offset the initial offset into the
* byte array. it is assumed that string[ offset ] and string[ offset +
@ -144,15 +167,15 @@ public class StringUtil {
if ((len < 0) || (((string.length - offset) / 2) < len)) {
throw new IllegalArgumentException("Illegal length");
}
byte[] bstring = new byte[len];
int index = offset + 1;
// start with low bits.
for (int k = 0; k < len; k++) {
bstring[k] = string[index];
index += 2;
char[] chars = new char[ len ];
for ( int i = 0; i < chars.length; i++ ) {
chars[i] = (char)( ( string[ offset + ( 2*i ) ] << 8 ) +
string[ offset + ( 2*i+1 ) ] );
}
return new String(bstring);
return new String( chars );
}
@ -160,6 +183,8 @@ public class StringUtil {
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x00, 0x16 } -> 0x16
*
*@param string the byte array to be converted
*@return the converted string
*/

View File

@ -129,7 +129,6 @@ public class TestCellStyle
out.close();
SanityChecker sanityChecker = new SanityChecker();
sanityChecker.checkHSSFWorkbook(wb);
assertEquals("FILE LENGTH == 87040", file.length(), 87040); // remove me
assertEquals("LAST ROW == 99", 99, s.getLastRowNum());
assertEquals("FIRST ROW == 0", 0, s.getFirstRowNum());
@ -169,7 +168,6 @@ public class TestCellStyle
SanityChecker sanityChecker = new SanityChecker();
sanityChecker.checkHSSFWorkbook(wb);
assertEquals("FILE LENGTH ", 5632, file.length()); // remove me
assertEquals("LAST ROW ", 0, s.getLastRowNum());
assertEquals("FIRST ROW ", 0, s.getFirstRowNum());
@ -232,7 +230,6 @@ public class TestCellStyle
out.close();
SanityChecker sanityChecker = new SanityChecker();
sanityChecker.checkHSSFWorkbook(wb);
assertEquals("FILE LENGTH == 87040", file.length(), 87040); // remove me
assertEquals("LAST ROW == 99", 99, s.getLastRowNum());
assertEquals("FIRST ROW == 0", 0, s.getFirstRowNum());

View File

@ -142,8 +142,6 @@ public class TestWorkbook
wb.write(out);
out.close();
sanityChecker.checkHSSFWorkbook(wb);
assertEquals("FILE LENGTH == 87040", 87040,
file.length()); // remove me
assertEquals("LAST ROW == 99", 99, s.getLastRowNum());
assertEquals("FIRST ROW == 0", 0, s.getFirstRowNum());
@ -201,8 +199,6 @@ public class TestWorkbook
out.close();
sanityChecker.checkHSSFWorkbook(wb);
assertEquals("FILE LENGTH == 45568", 45568,
file.length()); // remove
assertEquals("LAST ROW == 74", 74, s.getLastRowNum());
assertEquals("FIRST ROW == 25", 25, s.getFirstRowNum());
}

View File

@ -64,6 +64,7 @@ import java.text.NumberFormat;
*
* @author Marc Johnson (mjohnson at apache dot org
* @author Glen Stampoultzis (glens at apache.org)
* @author Sergei Kozello (sergeikozello at mail.ru)
*/
public class TestStringUtil
@ -99,6 +100,48 @@ public class TestStringUtil
StringUtil.getFromUnicode(test_data));
}
/**
* test simple form of getFromUnicode with symbols with code below and more 127
*/
public void testGetFromUnicodeSymbolsWithCodesMoreThan127()
{
byte[] test_data = new byte[] { 0x04, 0x22,
0x04, 0x35,
0x04, 0x41,
0x04, 0x42,
0x00, 0x20,
0x00, 0x74,
0x00, 0x65,
0x00, 0x73,
0x00, 0x74,
};
assertEquals("\u0422\u0435\u0441\u0442 test",
StringUtil.getFromUnicode(test_data));
}
/**
* test getFromUnicodeHigh for symbols with code below and more 127
*/
public void testGetFromUnicodeHighSymbolsWithCodesMoreThan127()
{
byte[] test_data = new byte[] { 0x22, 0x04,
0x35, 0x04,
0x41, 0x04,
0x42, 0x04,
0x20, 0x00,
0x74, 0x00,
0x65, 0x00,
0x73, 0x00,
0x74, 0x00,
};
assertEquals("\u0422\u0435\u0441\u0442 test",
StringUtil.getFromUnicodeHigh( test_data ) );
}
/**
* Test more complex form of getFromUnicode
*/