mirror of https://github.com/apache/poi.git
Fix bug #50539 - Better fix for html-style br tags (invalid XML) inside XSSF documents
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1067217 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b4d748f242
commit
81620ddd18
|
@ -34,6 +34,7 @@
|
|||
|
||||
<changes>
|
||||
<release version="3.8-beta1" date="2010-??-??">
|
||||
<action dev="poi-developers" type="fix">50539 - Better fix for html-style br tags (invalid XML) inside XSSF documents</action>
|
||||
<action dev="poi-developers" type="add">49928 - allow overridden built-in formats in HSSFCellStyle</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">50607 - Added implementation for CLEAN(), CHAR() and ADDRESS()</action>
|
||||
<action dev="poi-developers" type="add">50587 - Improved documentation on user-defined functions</action>
|
||||
|
|
|
@ -54,16 +54,26 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
|
|||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
if(spare != null) {
|
||||
// This is risky, but spare is normally only a byte or two...
|
||||
System.arraycopy(spare, 0, b, off, spare.length);
|
||||
int ret = spare.length;
|
||||
spare = null;
|
||||
return ret;
|
||||
// Grab any data left from last time
|
||||
int readA = readFromSpare(b, off, len);
|
||||
|
||||
// Now read from the stream
|
||||
int readB = source.read(b, off+readA, len-readA);
|
||||
|
||||
// Figure out how much we've done
|
||||
int read;
|
||||
if(readB == -1 || readB == 0) {
|
||||
read = readA;
|
||||
} else {
|
||||
read = readA + readB;
|
||||
}
|
||||
|
||||
int read = source.read(b, off, len);
|
||||
read = fixUp(b, off, read);
|
||||
// Fix up our data
|
||||
if(read > 0) {
|
||||
read = fixUp(b, off, read);
|
||||
}
|
||||
|
||||
// All done
|
||||
return read;
|
||||
}
|
||||
|
||||
|
@ -71,11 +81,72 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
|
|||
public int read(byte[] b) throws IOException {
|
||||
return this.read(b, 0, b.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads into the buffer from the spare bytes
|
||||
*/
|
||||
private int readFromSpare(byte[] b, int offset, int len) {
|
||||
if(spare == null) return 0;
|
||||
if(len == 0) throw new IllegalArgumentException("Asked to read 0 bytes");
|
||||
|
||||
if(spare.length <= len) {
|
||||
// All fits, good
|
||||
System.arraycopy(spare, 0, b, offset, spare.length);
|
||||
int read = spare.length;
|
||||
spare = null;
|
||||
return read;
|
||||
} else {
|
||||
// We have more spare than they can copy with...
|
||||
byte[] newspare = new byte[spare.length-len];
|
||||
System.arraycopy(spare, 0, b, offset, len);
|
||||
System.arraycopy(spare, len, newspare, 0, newspare.length);
|
||||
spare = newspare;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
private void addToSpare(byte[] b, int offset, int len, boolean atTheEnd) {
|
||||
if(spare == null) {
|
||||
spare = new byte[len];
|
||||
System.arraycopy(b, offset, spare, 0, len);
|
||||
} else {
|
||||
byte[] newspare = new byte[spare.length+len];
|
||||
if(atTheEnd) {
|
||||
System.arraycopy(spare, 0, newspare, 0, spare.length);
|
||||
System.arraycopy(b, offset, newspare, spare.length, len);
|
||||
} else {
|
||||
System.arraycopy(b, offset, newspare, 0, len);
|
||||
System.arraycopy(spare, 0, newspare, len, spare.length);
|
||||
}
|
||||
spare = newspare;
|
||||
}
|
||||
}
|
||||
|
||||
private int fixUp(byte[] b, int offset, int read) {
|
||||
// Do we have any potential overhanging ones?
|
||||
for(int i=0; i<detect.length-1; i++) {
|
||||
int base = offset+read-1-i;
|
||||
if(base < 0) continue;
|
||||
|
||||
boolean going = true;
|
||||
for(int j=0; j<=i && going; j++) {
|
||||
if(b[base+j] == detect[j]) {
|
||||
// Matches
|
||||
} else {
|
||||
going = false;
|
||||
}
|
||||
}
|
||||
if(going) {
|
||||
// There could be a <br> handing over the end, eg <br|
|
||||
addToSpare(b, base, i+1, true);
|
||||
read -= 1;
|
||||
read -= i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Find places to fix
|
||||
ArrayList<Integer> fixAt = new ArrayList<Integer>();
|
||||
for(int i=offset; i<offset+read-4; i++) {
|
||||
for(int i=offset; i<=offset+read-detect.length; i++) {
|
||||
boolean going = true;
|
||||
for(int j=0; j<detect.length && going; j++) {
|
||||
if(b[i+j] != detect[j]) {
|
||||
|
@ -91,18 +162,37 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
|
|||
return read;
|
||||
}
|
||||
|
||||
// Save a bit, if needed to fit
|
||||
int overshoot = offset+read+fixAt.size() - b.length;
|
||||
// If there isn't space in the buffer to contain
|
||||
// all the fixes, then save the overshoot for next time
|
||||
int needed = offset+read+fixAt.size();
|
||||
int overshoot = needed - b.length;
|
||||
if(overshoot > 0) {
|
||||
spare = new byte[overshoot];
|
||||
System.arraycopy(b, b.length-overshoot, spare, 0, overshoot);
|
||||
// Make sure we don't loose part of a <br>!
|
||||
int fixes = 0;
|
||||
for(int at : fixAt) {
|
||||
if(at > offset+read-detect.length-overshoot-fixes) {
|
||||
overshoot = needed - at - 1 - fixes;
|
||||
break;
|
||||
}
|
||||
fixes++;
|
||||
}
|
||||
|
||||
addToSpare(b, offset+read-overshoot, overshoot, false);
|
||||
read -= overshoot;
|
||||
}
|
||||
|
||||
// Fix them, in reverse order so the
|
||||
// positions are valid
|
||||
for(int j=fixAt.size()-1; j>=0; j--) {
|
||||
int i = fixAt.get(j);
|
||||
int i = fixAt.get(j);
|
||||
if(i >= read+offset) {
|
||||
// This one has moved into the overshoot
|
||||
continue;
|
||||
}
|
||||
if(i > read-3) {
|
||||
// This one has moved into the overshoot
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] tmp = new byte[read-i-3];
|
||||
System.arraycopy(b, i+3, tmp, 0, tmp.length);
|
||||
|
|
|
@ -70,6 +70,37 @@ public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
|
|||
assertEquals(fixed, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that we can copy with br tags around the buffer boundaries
|
||||
*/
|
||||
public void testBufferSize() throws Exception {
|
||||
byte[] orig = "<p><div>Hello<br> <br>There!</div> <div>Tags!<br><br></div></p>".getBytes("UTF-8");
|
||||
byte[] fixed = "<p><div>Hello<br/> <br/>There!</div> <div>Tags!<br/><br/></div></p>".getBytes("UTF-8");
|
||||
|
||||
// Vary the buffer size, so that we can end up with the br in the
|
||||
// overflow or only part in the buffer
|
||||
for(int i=5; i<orig.length; i++) {
|
||||
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
|
||||
new ByteArrayInputStream(orig)
|
||||
);
|
||||
|
||||
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||
boolean going = true;
|
||||
while(going) {
|
||||
byte[] b = new byte[i];
|
||||
int r = inp.read(b);
|
||||
if(r > 0) {
|
||||
bout.write(b, 0, r);
|
||||
} else {
|
||||
going = false;
|
||||
}
|
||||
}
|
||||
|
||||
byte[] result = bout.toByteArray();
|
||||
assertEquals(fixed, result);
|
||||
}
|
||||
}
|
||||
|
||||
protected void assertEquals(byte[] a, byte[] b) {
|
||||
assertEquals(a.length, b.length);
|
||||
for(int i=0; i<a.length; i++) {
|
||||
|
|
Loading…
Reference in New Issue