fixed RecordFormatException when reading unicode strings with photenic data, see Bugzilla 50779

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1080496 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2011-03-11 09:33:22 +00:00
parent 5b8ee9b865
commit dd87e86c44
7 changed files with 1271 additions and 1 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta2" date="2011-??-??">
<action dev="poi-developers" type="fix">50779 - fixed RecordFormatException when reading unicode strings with photenic data</action>
<action dev="poi-developers" type="fix">50718 - More helpful error message when you try to create a CellReference with #REF!</action>
<action dev="poi-developers" type="fix">50784 - XSSFColors return by XSSFFont now have theme information applied to them</action>
<action dev="poi-developers" type="fix">50846 - Improve how XSSFColor inherits from Themes</action>

View File

@ -22,6 +22,7 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.hssf.record.cont.ContinuableRecordInput;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.BitField;
@ -435,7 +436,7 @@ public class UnicodeString implements Comparable<UnicodeString> { // TODO - make
}
if (isExtendedText() && (extensionLength > 0)) {
field_5_ext_rst = new ExtRst(in, extensionLength);
field_5_ext_rst = new ExtRst(new ContinuableRecordInput(in), extensionLength);
if(field_5_ext_rst.getDataSize()+4 != extensionLength) {
System.err.println("ExtRst was supposed to be " + extensionLength + " bytes long, but seems to actually be " + (field_5_ext_rst.getDataSize()+4));
}

View File

@ -0,0 +1,118 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.hssf.record.cont;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.util.LittleEndianInput;
/**
* A decorated {@link RecordInputStream} that can read primitive data types
* (short, int, long, etc.) spanned across a {@link ContinueRecord } boundary.
*
* <p>
* Most records construct themselves from {@link RecordInputStream}.
* This class assumes that a {@link ContinueRecord} record break always occurs at the type boundary,
* however, it is not always so.
* </p>
* Two attachments to <a href="https://issues.apache.org/bugzilla/show_bug.cgi?id=50779">Bugzilla 50779</a>
* demonstrate that a CONTINUE break can appear right in between two bytes of a unicode character
* or between two bytes of a <code>short</code>. The problematic portion of the data is
* in a Asian Phonetic Settings Block (ExtRst) of a UnicodeString.
* <p>
* {@link RecordInputStream} greedily requests the bytes to be read and stumbles on such files with a
* "Not enough data (1) to read requested (2) bytes" exception. The <code>ContinuableRecordInput</code>
* class circumvents this "type boundary" rule and reads data byte-by-byte rolling over CONTINUE if necessary.
* </p>
*
* <p>
* YK: For now (March 2011) this class is only used to read
* @link org.apache.poi.hssf.record.common.UnicodeString.ExtRst} blocks of a UnicodeString.
*
* </p>
*
* @author Yegor Kozlov
*/
public class ContinuableRecordInput implements LittleEndianInput {
private final RecordInputStream _in;
public ContinuableRecordInput(RecordInputStream in){
_in = in;
}
public int available(){
return _in.available();
}
public byte readByte(){
return _in.readByte();
}
public int readUByte(){
return _in.readUByte();
}
public short readShort(){
return _in.readShort();
}
public int readUShort(){
int ch1 = readUByte();
int ch2 = readUByte();
return (ch2 << 8) + (ch1 << 0);
}
public int readInt(){
int ch1 = _in.readUByte();
int ch2 = _in.readUByte();
int ch3 = _in.readUByte();
int ch4 = _in.readUByte();
return (ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0);
}
public long readLong(){
int b0 = _in.readUByte();
int b1 = _in.readUByte();
int b2 = _in.readUByte();
int b3 = _in.readUByte();
int b4 = _in.readUByte();
int b5 = _in.readUByte();
int b6 = _in.readUByte();
int b7 = _in.readUByte();
return (((long)b7 << 56) +
((long)b6 << 48) +
((long)b5 << 40) +
((long)b4 << 32) +
((long)b3 << 24) +
(b2 << 16) +
(b1 << 8) +
(b0 << 0));
}
public double readDouble(){
return _in.readDouble();
}
public void readFully(byte[] buf){
_in.readFully(buf);
}
public void readFully(byte[] buf, int off, int len){
_in.readFully(buf, off, len);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -2019,4 +2019,13 @@ if(1==2) {
// TODO Identify what excel doesn't like, and check for that
}
public void test50779() throws Exception {
HSSFWorkbook wb1 = openSample("50779_1.xls");
writeOutAndReadBack(wb1);
HSSFWorkbook wb2 = openSample("50779_2.xls");
writeOutAndReadBack(wb2);
}
}

Binary file not shown.

Binary file not shown.