From 7ed2c1e85af772be8821524f9a5599be0f8f7bfa Mon Sep 17 00:00:00 2001
From: Andreas Beeker
+ *
+ * If you want to normalize line endings DOS/MAC (\n\r | \r) to UNIX (\n), you can call the following:
+ * {@code new ReplacingInputStream(new ReplacingInputStream(is, "\n\r", "\n"), "\r", "\n")}
+ *
+ * @param in input
+ * @param pattern pattern to replace
+ * @param replacement the replacement or null
+ */
+ public ReplacingInputStream(InputStream in, byte[] pattern, byte[] replacement) {
+ super(in);
+ if (pattern == null || pattern.length == 0) {
+ throw new IllegalArgumentException("pattern length should be > 0");
+ }
+ this.pattern = pattern;
+ this.replacement = replacement;
+ // we will never match more than the pattern length
+ buf = new int[pattern.length];
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ // copy of parent logic; we need to call our own read() instead of super.read(), which delegates instead of calling our read
+ if (b == null) {
+ throw new NullPointerException();
+ } else if (off < 0 || len < 0 || len > b.length - off) {
+ throw new IndexOutOfBoundsException();
+ } else if (len == 0) {
+ return 0;
+ }
+
+ int c = read();
+ if (c == -1) {
+ return -1;
+ }
+ b[off] = (byte)c;
+
+ int i = 1;
+ for (; i < len ; i++) {
+ c = read();
+ if (c == -1) {
+ break;
+ }
+ b[off + i] = (byte)c;
+ }
+ return i;
+
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ // call our own read
+ return read(b, 0, b.length);
+ }
+
+ @Override
+ public int read() throws IOException {
+ // use a simple state machine to figure out what we are doing
+ int next;
+ switch (state) {
+ default:
+ case NOT_MATCHED:
+ // we are not currently matching, replacing, or unbuffering
+ next=super.read();
+ if (pattern[0] != next) {
+ return next;
+ }
+
+ // clear whatever was there
+ Arrays.fill(buf, 0);
+ // make sure we start at 0
+ matchedIndex=0;
+
+ buf[matchedIndex++]=next;
+ if (pattern.length == 1) {
+ // edge-case when the pattern length is 1 we go straight to replacing
+ state=State.REPLACING;
+ // reset replace counter
+ replacedIndex=0;
+ } else {
+ // pattern of length 1
+ state=State.MATCHING;
+ }
+ // recurse to continue matching
+ return read();
+
+ case MATCHING:
+ // the previous bytes matched part of the pattern
+ next=super.read();
+ if (pattern[matchedIndex]==next) {
+ buf[matchedIndex++]=next;
+ if (matchedIndex==pattern.length) {
+ // we've found a full match!
+ if (replacement==null || replacement.length==0) {
+ // the replacement is empty, go straight to NOT_MATCHED
+ state=State.NOT_MATCHED;
+ matchedIndex=0;
+ } else {
+ // start replacing
+ state=State.REPLACING;
+ replacedIndex=0;
+ }
+ }
+ } else {
+ // mismatch -> unbuffer
+ buf[matchedIndex++]=next;
+ state=State.UNBUFFER;
+ unbufferIndex=0;
+ }
+ return read();
+
+ case REPLACING:
+ // we've fully matched the pattern and are returning bytes from the replacement
+ next=replacement[replacedIndex++];
+ if (replacedIndex==replacement.length) {
+ state=State.NOT_MATCHED;
+ replacedIndex=0;
+ }
+ return next;
+
+ case UNBUFFER:
+ // we partially matched the pattern before encountering a non matching byte
+ // we need to serve up the buffered bytes before we go back to NOT_MATCHED
+ next=buf[unbufferIndex++];
+ if (unbufferIndex==matchedIndex) {
+ state=State.NOT_MATCHED;
+ matchedIndex=0;
+ }
+ return next;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return state.name() + " " + matchedIndex + " " + replacedIndex + " " + unbufferIndex;
+ }
+
+}
\ No newline at end of file
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
index 352b804b9a..7a7f02f6cd 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
@@ -34,7 +34,7 @@ import javax.xml.namespace.QName;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.util.DocumentHelper;
-import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream;
+import org.apache.poi.util.ReplacingInputStream;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
@@ -124,7 +124,13 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
protected void read(InputStream is) throws IOException, XmlException {
Document doc;
try {
- doc = DocumentHelper.readDocument(new EvilUnclosedBRFixingInputStream(is));
+ /*
+ * This is a seriously sick fix for the fact that some .xlsx files contain raw bits
+ * of HTML, without being escaped or properly turned into XML.
+ * The result is that they contain things like >br<, which breaks the XML parsing.
+ * This very sick InputStream wrapper attempts to spot these go past, and fix them.
+ */
+ doc = DocumentHelper.readDocument(new ReplacingInputStream(is, "
", "
"));
} catch (SAXException e) {
throw new XmlException(e.getMessage(), e);
}
@@ -146,7 +152,9 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
String id = shape.getId();
if(id != null) {
Matcher m = ptrn_shapeId.matcher(id);
- if(m.find()) _shapeId = Math.max(_shapeId, Integer.parseInt(m.group(1)));
+ if(m.find()) {
+ _shapeId = Math.max(_shapeId, Integer.parseInt(m.group(1)));
+ }
}
_items.add(shape);
} else {
diff --git a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
index 5fae1ea1c6..0ef1aeeb5e 100644
--- a/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
+++ b/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
@@ -16,9 +16,11 @@
==================================================================== */
package org.apache.poi.xssf.util;
-import java.io.IOException;
import java.io.InputStream;
-import java.util.ArrayList;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.Removal;
+import org.apache.poi.util.ReplacingInputStream;
/**
* This is a seriously sick fix for the fact that some .xlsx
@@ -31,179 +33,14 @@ import java.util.ArrayList;
* Only works for UTF-8 and US-ASCII based streams!
* It should only be used where experience shows the problem
* can occur...
+ *
+ * @deprecated 3.16-beta2 - use ReplacingInputStream(source, ">br<", ">br/<")
*/
-public class EvilUnclosedBRFixingInputStream extends InputStream {
- private InputStream source;
- private byte[] spare;
-
- private static byte[] detect = new byte[] {
- (byte)'<', (byte)'b', (byte)'r', (byte)'>'
- };
-
+@Deprecated
+@Removal(version="3.18")
+@Internal
+public class EvilUnclosedBRFixingInputStream extends ReplacingInputStream {
public EvilUnclosedBRFixingInputStream(InputStream source) {
- this.source = source;
- }
-
- /**
- * Warning - doesn't fix!
- */
- @Override
- public int read() throws IOException {
- return source.read();
- }
-
- @Override
- public int read(byte[] b, int off, int len) throws IOException {
- // Grab any data left from last time
- int readA = readFromSpare(b, off, len);
-
- // Now read from the stream
- int readB = source.read(b, off+readA, len-readA);
-
- // Figure out how much we've done
- int read;
- if(readB == -1 || readB == 0) {
- if (readA == 0) {
- return readB;
- }
- read = readA;
- } else {
- read = readA + readB;
- }
-
- // Fix up our data
- if(read > 0) {
- read = fixUp(b, off, read);
- }
-
- // All done
- return read;
- }
-
- @Override
- public int read(byte[] b) throws IOException {
- return this.read(b, 0, b.length);
- }
-
- /**
- * Reads into the buffer from the spare bytes
- */
- private int readFromSpare(byte[] b, int offset, int len) {
- if(spare == null) return 0;
- if(len == 0) throw new IllegalArgumentException("Asked to read 0 bytes");
-
- if(spare.length <= len) {
- // All fits, good
- System.arraycopy(spare, 0, b, offset, spare.length);
- int read = spare.length;
- spare = null;
- return read;
- } else {
- // We have more spare than they can copy with...
- byte[] newspare = new byte[spare.length-len];
- System.arraycopy(spare, 0, b, offset, len);
- System.arraycopy(spare, len, newspare, 0, newspare.length);
- spare = newspare;
- return len;
- }
- }
- private void addToSpare(byte[] b, int offset, int len, boolean atTheEnd) {
- if(spare == null) {
- spare = new byte[len];
- System.arraycopy(b, offset, spare, 0, len);
- } else {
- byte[] newspare = new byte[spare.length+len];
- if(atTheEnd) {
- System.arraycopy(spare, 0, newspare, 0, spare.length);
- System.arraycopy(b, offset, newspare, spare.length, len);
- } else {
- System.arraycopy(b, offset, newspare, 0, len);
- System.arraycopy(spare, 0, newspare, len, spare.length);
- }
- spare = newspare;
- }
- }
-
- private int fixUp(byte[] b, int offset, int read) {
- // Do we have any potential overhanging ones?
- for(int i=0; i
fixAt = new ArrayList
!
- int fixes = 0;
- for(int at : fixAt) {
- if(at > offset+read-detect.length-overshoot-fixes) {
- overshoot = needed - at - 1 - fixes;
- break;
- }
- fixes++;
- }
-
- addToSpare(b, offset+read-overshoot, overshoot, false);
- read -= overshoot;
- }
-
- // Fix them, in reverse order so the
- // positions are valid
- for(int j=fixAt.size()-1; j>=0; j--) {
- int i = fixAt.get(j);
- if(i >= read+offset) {
- // This one has moved into the overshoot
- continue;
- }
- if(i > read-3) {
- // This one has moved into the overshoot
- continue;
- }
-
- byte[] tmp = new byte[read-i-3];
- System.arraycopy(b, i+3, tmp, 0, tmp.length);
- b[i+3] = (byte)'/';
- System.arraycopy(tmp, 0, b, i+4, tmp.length);
- // It got one longer
- read++;
- }
- return read;
+ super(source, "
", "
");
}
}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java
index 4ae2b8b052..b8dbd36a71 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java
@@ -16,18 +16,28 @@
==================================================================== */
package org.apache.poi.xssf.usermodel;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.util.List;
+import java.util.regex.Pattern;
-import com.microsoft.schemas.office.excel.STTrueFalseBlank;
import org.apache.poi.POIDataSamples;
+import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
+import org.junit.Test;
import com.microsoft.schemas.office.excel.CTClientData;
import com.microsoft.schemas.office.excel.STObjectType;
+import com.microsoft.schemas.office.excel.STTrueFalseBlank;
import com.microsoft.schemas.office.office.CTShapeLayout;
import com.microsoft.schemas.office.office.STConnectType;
import com.microsoft.schemas.office.office.STInsetMode;
@@ -37,14 +47,10 @@ import com.microsoft.schemas.vml.CTShapetype;
import com.microsoft.schemas.vml.STExt;
import com.microsoft.schemas.vml.STTrueFalse;
-import junit.framework.TestCase;
+public class TestXSSFVMLDrawing {
-/**
- * @author Yegor Kozlov
- */
-public class TestXSSFVMLDrawing extends TestCase {
-
- public void testNew() throws Exception {
+ @Test
+ public void testNew() throws IOException, XmlException {
XSSFVMLDrawing vml = new XSSFVMLDrawing();
List
");
+ int count = 0;
+ for (XmlObject xo : vml.getItems()) {
+ String split[] = p.split(xo.toString());
+ count += split.length-1;
+ }
+ assertEquals(16, count);
+ }
}
\ No newline at end of file
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
index a15b22c1c1..d9010fbbc2 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
@@ -17,94 +17,78 @@
package org.apache.poi.xssf.util;
+import static org.junit.Assert.assertArrayEquals;
+
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
-import junit.framework.TestCase;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.ReplacingInputStream;
+import org.junit.Test;
-public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
- public void testOK() throws Exception {
- byte[] ok = "
There!