bug 50955 -- fix for java 7

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1790130 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2017-04-04 14:41:53 +00:00
parent 6fe3b75bfd
commit 64e8f3b0f4
3 changed files with 39 additions and 6 deletions

View File

@ -18,6 +18,7 @@
package org.apache.poi.hwpf.model;
import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.Internal;
import org.apache.poi.util.NotImplemented;
@ -40,11 +41,19 @@ public class OldTextPiece extends TextPiece {
public OldTextPiece(int start, int end, byte[] text, PieceDescriptor pd) {
super(start, end, text, pd);
this.rawBytes = text;
if (end < start) {
throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end);
}
}
@Override
protected void validateLengths(int start, int end, int length, PieceDescriptor pd) {
//things are still wonky with Big5 char/byte length mapping
//sometimes working w/ Java 8 but not w/ Java 7!
//for now, if we're dealing w/ Big5 don't bother checking
if (pd.getCharset() != null &&
CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(pd.getCharset())) {
return;
}
super.validateLengths(start, end, length, pd);
}
/**
* @return nothing, ever. Always throws an UnsupportedOperationException
* @throws UnsupportedOperationException
@ -56,6 +65,7 @@ public class OldTextPiece extends TextPiece {
}
@Override
public StringBuilder getStringBuilder() {
return (StringBuilder) _buf;
}

View File

@ -60,14 +60,17 @@ public class TextPiece extends PropertyNode<TextPiece> {
// Validate
int textLength = ((CharSequence) _buf).length();
if (end - start != textLength) {
throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
}
validateLengths(start, end, textLength, pd);
if (end < start) {
throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end);
}
}
protected void validateLengths(int start, int end, int textLength, PieceDescriptor pd) {
if (end - start != textLength) {
throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
}
}
/**
* Create the StringBuilder from the text and unicode flag
*/

View File

@ -226,6 +226,26 @@ public final class TestHWPFOldDocument extends HWPFTestCase {
assertContains(txt, "also maintain");//this is at a critical juncture
assertContains(txt, "which are available for");//this too
/*
The bytes for the following test:
170 : 78 : x
171 : 0 :
172 : d : <r>
173 : 35 : 5
174 : 39 : 9
175 : 0 :
176 : 2d : -
177 : 0 :
178 : 35 : 5
179 : 0 :
180 : 35 : 5
Note that we are skipping over the value "5" at offset 173.
This is an apparently invalid sequence in MS's encoding scheme
When I open the document in MSWord, I also see "\r9-55"
*/
assertContains(txt, "\n9-55 xxxxx block5");
//TODO: figure out why these two aren't passing
// assertContains(txt, "\u2019\u0078 block2");//make sure smart quote is extracted correctly
// assertContains(txt, "We are able to");//not sure if we can get this easily?