diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
index 756ef39b03..59aa229853 100644
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
@@ -18,6 +18,7 @@
package org.apache.poi.xssf.usermodel;
import static org.apache.poi.ooxml.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
+import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
import java.io.IOException;
import java.io.InputStream;
@@ -46,7 +47,6 @@ import com.microsoft.schemas.vml.CTShapetype;
import com.microsoft.schemas.vml.STExt;
import com.microsoft.schemas.vml.STStrokeJoinStyle;
import org.apache.poi.ooxml.POIXMLDocumentPart;
-import org.apache.poi.ooxml.util.DocumentHelper;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.schemas.vmldrawing.XmlDocument;
import org.apache.poi.util.ReplacingInputStream;
@@ -55,8 +55,6 @@ import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.officeDocument.x2006.sharedTypes.STTrueFalse;
-import org.w3c.dom.Document;
-import org.xml.sax.SAXException;
/**
* Represents a SpreadsheetML VML drawing.
@@ -129,23 +127,26 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
protected void read(InputStream is) throws IOException, XmlException {
- Document doc;
- try {
- /*
- * This is a seriously sick fix for the fact that some .xlsx files contain raw bits
- * of HTML, without being escaped or properly turned into XML.
- * The result is that they contain things like >br<, which breaks the XML parsing.
- * This very sick InputStream wrapper attempts to spot these go past, and fix them.
- */
- doc = DocumentHelper.readDocument(new ReplacingInputStream(is, "
", "
"));
- } catch (SAXException e) {
- throw new XmlException(e.getMessage(), e);
- }
-
XmlOptions xopt = new XmlOptions(DEFAULT_XML_OPTIONS);
xopt.setLoadSubstituteNamespaces(Collections.singletonMap("", QNAME_VMLDRAWING.getNamespaceURI()));
+ xopt.setDocumentType(XmlDocument.type);
+
+ /*
+ * This is a seriously sick fix for the fact that some .xlsx files contain raw bits
+ * of HTML, without being escaped or properly turned into XML.
+ * The result is that they contain things like >br<, which breaks the XML parsing.
+ * This very sick InputStream wrapper attempts to spot these go past, and fix them.
+ *
+ * Furthermore some documents contain a default namespace of
+ * http://schemas.openxmlformats.org/spreadsheetml/2006/main for the namespace-less "xml" document type.
+ * this definition is wrong and removed.
+ */
+ root = XmlDocument.Factory.parse(
+ new ReplacingInputStream(
+ new ReplacingInputStream(is, "
", "
"),
+ " xmlns=\""+NS_SPREADSHEETML+"\"", "")
+ , xopt);
- root = XmlDocument.Factory.parse(doc, xopt);
XmlCursor cur = root.getXml().newCursor();
try {
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java
index 5e4d94b41a..35c4cfff79 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java
@@ -29,6 +29,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
@@ -42,6 +43,8 @@ import com.microsoft.schemas.vml.CTShadow;
import com.microsoft.schemas.vml.CTShape;
import com.microsoft.schemas.vml.CTShapetype;
import com.microsoft.schemas.vml.STExt;
+import com.microsoft.schemas.vml.STStrokeJoinStyle;
+import com.microsoft.schemas.vml.impl.CTShapetypeImpl;
import org.apache.poi.POIDataSamples;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
@@ -181,4 +184,30 @@ public class TestXSSFVMLDrawing {
}
assertEquals(16, count);
}
+
+ @Test
+ public void bug65061_InvalidXmlns() throws IOException, XmlException {
+ // input hasn't no \n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "";
+
+ XSSFVMLDrawing vml = new XSSFVMLDrawing();
+ vml.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
+
+ // check that the xml beans parsed correctly
+ assertNotNull(vml.getDocument().getXml());
+
+ // check the parsed child
+ List objs = vml.getItems();
+ assertEquals(1, objs.size());
+ XmlObject xst = objs.get(0);
+ assertTrue(xst instanceof CTShapetypeImpl);
+ CTShapetype st = (CTShapetype)xst;
+ assertEquals(STStrokeJoinStyle.MITER, st.getStrokeArray(0).getJoinstyle());
+ }
}
\ No newline at end of file