From 4c0d0b138161682b800a9f63eb67c0a7ccfa73b4 Mon Sep 17 00:00:00 2001 From: Andreas Beeker Date: Wed, 6 Jan 2021 12:39:02 +0000 Subject: [PATCH] #65061 - Handle VmlDrawings containing spreadsheet-ml default namespace git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1885197 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/xssf/usermodel/XSSFVMLDrawing.java | 35 ++++++++++--------- .../xssf/usermodel/TestXSSFVMLDrawing.java | 29 +++++++++++++++ 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java index 756ef39b03..59aa229853 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java @@ -18,6 +18,7 @@ package org.apache.poi.xssf.usermodel; import static org.apache.poi.ooxml.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; +import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML; import java.io.IOException; import java.io.InputStream; @@ -46,7 +47,6 @@ import com.microsoft.schemas.vml.CTShapetype; import com.microsoft.schemas.vml.STExt; import com.microsoft.schemas.vml.STStrokeJoinStyle; import org.apache.poi.ooxml.POIXMLDocumentPart; -import org.apache.poi.ooxml.util.DocumentHelper; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.schemas.vmldrawing.XmlDocument; import org.apache.poi.util.ReplacingInputStream; @@ -55,8 +55,6 @@ import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlObject; import org.apache.xmlbeans.XmlOptions; import org.openxmlformats.schemas.officeDocument.x2006.sharedTypes.STTrueFalse; -import org.w3c.dom.Document; -import org.xml.sax.SAXException; /** * Represents a SpreadsheetML VML drawing. @@ -129,23 +127,26 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart { protected void read(InputStream is) throws IOException, XmlException { - Document doc; - try { - /* - * This is a seriously sick fix for the fact that some .xlsx files contain raw bits - * of HTML, without being escaped or properly turned into XML. - * The result is that they contain things like >br<, which breaks the XML parsing. - * This very sick InputStream wrapper attempts to spot these go past, and fix them. - */ - doc = DocumentHelper.readDocument(new ReplacingInputStream(is, "
", "
")); - } catch (SAXException e) { - throw new XmlException(e.getMessage(), e); - } - XmlOptions xopt = new XmlOptions(DEFAULT_XML_OPTIONS); xopt.setLoadSubstituteNamespaces(Collections.singletonMap("", QNAME_VMLDRAWING.getNamespaceURI())); + xopt.setDocumentType(XmlDocument.type); + + /* + * This is a seriously sick fix for the fact that some .xlsx files contain raw bits + * of HTML, without being escaped or properly turned into XML. + * The result is that they contain things like >br<, which breaks the XML parsing. + * This very sick InputStream wrapper attempts to spot these go past, and fix them. + * + * Furthermore some documents contain a default namespace of + * http://schemas.openxmlformats.org/spreadsheetml/2006/main for the namespace-less "xml" document type. + * this definition is wrong and removed. + */ + root = XmlDocument.Factory.parse( + new ReplacingInputStream( + new ReplacingInputStream(is, "
", "
"), + " xmlns=\""+NS_SPREADSHEETML+"\"", "") + , xopt); - root = XmlDocument.Factory.parse(doc, xopt); XmlCursor cur = root.getXml().newCursor(); try { diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java index 5e4d94b41a..35c4cfff79 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java @@ -29,6 +29,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; @@ -42,6 +43,8 @@ import com.microsoft.schemas.vml.CTShadow; import com.microsoft.schemas.vml.CTShape; import com.microsoft.schemas.vml.CTShapetype; import com.microsoft.schemas.vml.STExt; +import com.microsoft.schemas.vml.STStrokeJoinStyle; +import com.microsoft.schemas.vml.impl.CTShapetypeImpl; import org.apache.poi.POIDataSamples; import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlObject; @@ -181,4 +184,30 @@ public class TestXSSFVMLDrawing { } assertEquals(16, count); } + + @Test + public void bug65061_InvalidXmlns() throws IOException, XmlException { + // input hasn't no \n" + + "\n" + + "\n" + + "\n" + + "\n" + + ""; + + XSSFVMLDrawing vml = new XSSFVMLDrawing(); + vml.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); + + // check that the xml beans parsed correctly + assertNotNull(vml.getDocument().getXml()); + + // check the parsed child + List objs = vml.getItems(); + assertEquals(1, objs.size()); + XmlObject xst = objs.get(0); + assertTrue(xst instanceof CTShapetypeImpl); + CTShapetype st = (CTShapetype)xst; + assertEquals(STStrokeJoinStyle.MITER, st.getStrokeArray(0).getJoinstyle()); + } } \ No newline at end of file