diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java index 4867810a5c..043fd632f4 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java @@ -24,9 +24,9 @@ import java.util.TreeMap; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; +import org.apache.poi.util.DocumentHelper; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; -import org.apache.poi.util.SAXHelper; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -311,7 +311,7 @@ public final class PackageRelationshipCollection implements throws InvalidFormatException { try { logger.log(POILogger.DEBUG, "Parsing relationship: " + relPart.getPartName()); - Document xmlRelationshipsDoc = SAXHelper.readSAXDocument(relPart.getInputStream()); + Document xmlRelationshipsDoc = DocumentHelper.readDocument(relPart.getInputStream()); // Browse default types Element root = xmlRelationshipsDoc.getDocumentElement(); diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ContentTypeManager.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ContentTypeManager.java index 7478590a29..581dabe000 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ContentTypeManager.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ContentTypeManager.java @@ -33,7 +33,6 @@ import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.util.DocumentHelper; -import org.apache.poi.util.SAXHelper; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -371,7 +370,7 @@ public abstract class ContentTypeManager { private void parseContentTypesFile(InputStream in) throws InvalidFormatException { try { - Document xmlContentTypetDoc = SAXHelper.readSAXDocument(in); + Document xmlContentTypetDoc = DocumentHelper.readDocument(in); // Default content types NodeList defaultTypes = xmlContentTypetDoc.getDocumentElement().getElementsByTagName(DEFAULT_TAG_NAME); diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/unmarshallers/PackagePropertiesUnmarshaller.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/unmarshallers/PackagePropertiesUnmarshaller.java index b4e3e83723..eb94f680e6 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/unmarshallers/PackagePropertiesUnmarshaller.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/unmarshallers/PackagePropertiesUnmarshaller.java @@ -31,7 +31,7 @@ import org.apache.poi.openxml4j.opc.ZipPackage; import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; import org.apache.poi.openxml4j.opc.internal.PartUnmarshaller; import org.apache.poi.openxml4j.opc.internal.ZipHelper; -import org.apache.poi.util.SAXHelper; +import org.apache.poi.util.DocumentHelper; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -105,7 +105,7 @@ public final class PackagePropertiesUnmarshaller implements PartUnmarshaller { Document xmlDoc; try { - xmlDoc = SAXHelper.readSAXDocument(in); + xmlDoc = DocumentHelper.readDocument(in); /* Check OPC compliance */ diff --git a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java b/src/ooxml/java/org/apache/poi/util/DocumentHelper.java index 22bdd4e0fd..9ff6e7fb02 100644 --- a/src/ooxml/java/org/apache/poi/util/DocumentHelper.java +++ b/src/ooxml/java/org/apache/poi/util/DocumentHelper.java @@ -17,6 +17,10 @@ package org.apache.poi.util; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Method; + import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -25,20 +29,78 @@ import javax.xml.stream.events.Namespace; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.xml.sax.SAXException; -public class DocumentHelper { +public final class DocumentHelper { + private static POILogger logger = POILogFactory.getLogger(DocumentHelper.class); + + private DocumentHelper() {} - private static final DocumentBuilder newDocumentBuilder; - static { + /** + * Creates a new document builder, with sensible defaults + */ + public static synchronized DocumentBuilder newDocumentBuilder() { try { - newDocumentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); + documentBuilder.setEntityResolver(SAXHelper.IGNORING_ENTITY_RESOLVER); + return documentBuilder; } catch (ParserConfigurationException e) { throw new IllegalStateException("cannot create a DocumentBuilder", e); } } + private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + static { + documentBuilderFactory.setNamespaceAware(true); + documentBuilderFactory.setValidating(false); + trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true); + trySetXercesSecurityManager(documentBuilderFactory); + } + + private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) { + try { + documentBuilderFactory.setFeature(feature, enabled); + } catch (Exception e) { + logger.log(POILogger.INFO, "SAX Feature unsupported", feature, e); + } + } + private static void trySetXercesSecurityManager(DocumentBuilderFactory documentBuilderFactory) { + // Try built-in JVM one first, standalone if not + for (String securityManagerClassName : new String[] { + "com.sun.org.apache.xerces.internal.util.SecurityManager", + "org.apache.xerces.util.SecurityManager" + }) { + try { + Object mgr = Class.forName(securityManagerClassName).newInstance(); + Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); + setLimit.invoke(mgr, 4096); + documentBuilderFactory.setAttribute("http://apache.org/xml/properties/security-manager", mgr); + // Stop once one can be setup without error + return; + } catch (Exception e) { + logger.log(POILogger.INFO, "SAX Security Manager could not be setup", e); + } + } + } + + /** + * Parses the given stream via the default (sensible) + * DocumentBuilder + * @param inp Stream to read the XML data from + * @return the parsed Document + */ + public static Document readDocument(InputStream inp) throws IOException, SAXException { + return newDocumentBuilder().parse(inp); + } + + // must only be used to create empty documents, do not use it for parsing! + private static final DocumentBuilder documentBuilderSingleton = newDocumentBuilder(); + + /** + * Creates a new DOM Document + */ public static synchronized Document createDocument() { - return newDocumentBuilder.newDocument(); + return documentBuilderSingleton.newDocument(); } /** diff --git a/src/ooxml/java/org/apache/poi/util/SAXHelper.java b/src/ooxml/java/org/apache/poi/util/SAXHelper.java index 81049a9a2e..bbc58e5130 100644 --- a/src/ooxml/java/org/apache/poi/util/SAXHelper.java +++ b/src/ooxml/java/org/apache/poi/util/SAXHelper.java @@ -18,19 +18,17 @@ package org.apache.poi.util; import java.io.IOException; -import java.io.InputStream; import java.io.StringReader; import java.lang.reflect.Method; import javax.xml.XMLConstants; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; -import org.w3c.dom.Document; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; /** @@ -39,43 +37,43 @@ import org.xml.sax.SAXException; public final class SAXHelper { private static POILogger logger = POILogFactory.getLogger(SAXHelper.class); - private static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() { + private SAXHelper() {} + + /** + * Creates a new SAX XMLReader, with sensible defaults + */ + public static synchronized XMLReader newXMLReader() throws SAXException, ParserConfigurationException { + XMLReader xmlReader = saxFactory.newSAXParser().getXMLReader(); + xmlReader.setEntityResolver(IGNORING_ENTITY_RESOLVER); + trySetSAXFeature(xmlReader, XMLConstants.FEATURE_SECURE_PROCESSING, true); + trySetXercesSecurityManager(xmlReader); + return xmlReader; + } + + static final EntityResolver IGNORING_ENTITY_RESOLVER = new EntityResolver() { @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { return new InputSource(new StringReader("")); } }; - - private static final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + + private static final SAXParserFactory saxFactory; static { - documentBuilderFactory.setNamespaceAware(true); - documentBuilderFactory.setValidating(false); - trySetSAXFeature(documentBuilderFactory, XMLConstants.FEATURE_SECURE_PROCESSING, true); - trySetXercesSecurityManager(documentBuilderFactory); + saxFactory = SAXParserFactory.newInstance(); + saxFactory.setValidating(false); + saxFactory.setNamespaceAware(true); } - - /** - * Creates a new document builder, with sensible defaults - */ - public static synchronized DocumentBuilder getDocumentBuilder() { + + private static void trySetSAXFeature(XMLReader xmlReader, String feature, boolean enabled) { try { - DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); - documentBuilder.setEntityResolver(IGNORING_ENTITY_RESOLVER); - return documentBuilder; - } catch (ParserConfigurationException e) { - throw new IllegalStateException("cannot create a DocumentBuilder", e); - } - } - - private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) { - try { - documentBuilderFactory.setFeature(feature, enabled); + xmlReader.setFeature(feature, enabled); } catch (Exception e) { logger.log(POILogger.INFO, "SAX Feature unsupported", feature, e); } } - private static void trySetXercesSecurityManager(DocumentBuilderFactory documentBuilderFactory) { + + private static void trySetXercesSecurityManager(XMLReader xmlReader) { // Try built-in JVM one first, standalone if not for (String securityManagerClassName : new String[] { "com.sun.org.apache.xerces.internal.util.SecurityManager", @@ -85,7 +83,7 @@ public final class SAXHelper { Object mgr = Class.forName(securityManagerClassName).newInstance(); Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE); setLimit.invoke(mgr, 4096); - documentBuilderFactory.setAttribute("http://apache.org/xml/properties/security-manager", mgr); + xmlReader.setProperty("http://apache.org/xml/properties/security-manager", mgr); // Stop once one can be setup without error return; } catch (Exception e) { @@ -93,14 +91,4 @@ public final class SAXHelper { } } } - - /** - * Parses the given stream via the default (sensible) - * SAX Reader - * @param inp Stream to read the XML data from - * @return the SAX processed Document - */ - public static Document readSAXDocument(InputStream inp) throws IOException, SAXException { - return getDocumentBuilder().parse(inp); - } } diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java index a8973dd32e..5263473bca 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java @@ -22,12 +22,11 @@ import java.util.ArrayList; import java.util.List; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.util.SAXHelper; import org.apache.poi.xssf.usermodel.XSSFRelation; import org.xml.sax.Attributes; import org.xml.sax.InputSource; @@ -134,10 +133,8 @@ public class ReadOnlySharedStringsTable extends DefaultHandler { */ public void readFrom(InputStream is) throws IOException, SAXException { InputSource sheetSource = new InputSource(is); - SAXParserFactory saxFactory = SAXParserFactory.newInstance(); try { - SAXParser saxParser = saxFactory.newSAXParser(); - XMLReader sheetParser = saxParser.getXMLReader(); + XMLReader sheetParser = SAXHelper.newXMLReader(); sheetParser.setContentHandler(this); sheetParser.parse(sheetSource); } catch(ParserConfigurationException e) { diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java index b70c4abb34..c52bed687d 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java @@ -24,8 +24,6 @@ import java.util.Locale; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; import org.apache.poi.POIXMLProperties; import org.apache.poi.POIXMLProperties.CoreProperties; @@ -35,6 +33,7 @@ import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.util.SAXHelper; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; @@ -174,10 +173,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor } InputSource sheetSource = new InputSource(sheetInputStream); - SAXParserFactory saxFactory = SAXParserFactory.newInstance(); try { - SAXParser saxParser = saxFactory.newSAXParser(); - XMLReader sheetParser = saxParser.getXMLReader(); + XMLReader sheetParser = SAXHelper.newXMLReader(); ContentHandler handler = new XSSFSheetXMLHandler( styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults); sheetParser.setContentHandler(handler); diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java index e9b75e5365..90b89e36bf 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExportToXml.java @@ -28,8 +28,6 @@ import java.util.List; import java.util.Map; import java.util.Vector; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; @@ -45,7 +43,7 @@ import javax.xml.validation.Validator; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DateUtil; -import org.apache.poi.util.XMLHelper; +import org.apache.poi.util.DocumentHelper; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFMap; import org.apache.poi.xssf.usermodel.XSSFRow; @@ -106,15 +104,6 @@ public class XSSFExportToXml implements Comparator{ exportToXML(os, "UTF-8", validate); } - private Document getEmptyDocument() throws ParserConfigurationException{ - - DocumentBuilderFactory dbfac = XMLHelper.getDocumentBuilderFactory(); - DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); - Document doc = docBuilder.newDocument(); - - return doc; - } - /** * Exports the data in an XML stream * @@ -132,7 +121,7 @@ public class XSSFExportToXml implements Comparator{ String rootElement = map.getCtMap().getRootElement(); - Document doc = getEmptyDocument(); + Document doc = DocumentHelper.createDocument(); Element root = null; diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFImportFromXML.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFImportFromXML.java index 945f1677ca..534ef0d83e 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFImportFromXML.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFImportFromXML.java @@ -24,16 +24,15 @@ import java.util.List; import javax.xml.namespace.NamespaceContext; import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; +import org.apache.poi.util.DocumentHelper; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; -import org.apache.poi.util.XMLHelper; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFMap; import org.apache.poi.xssf.usermodel.XSSFRow; @@ -76,11 +75,9 @@ public class XSSFImportFromXML { * @throws ParserConfigurationException if there are problems with XML parser configuration * @throws IOException if there are problems reading the input string */ - public void importFromXML(String xmlInputString) throws SAXException, XPathExpressionException, ParserConfigurationException, IOException { + public void importFromXML(String xmlInputString) throws SAXException, XPathExpressionException, IOException { - DocumentBuilderFactory factory = XMLHelper.getDocumentBuilderFactory(); - factory.setNamespaceAware(true); - DocumentBuilder builder = factory.newDocumentBuilder(); + DocumentBuilder builder = DocumentHelper.newDocumentBuilder(); Document doc = builder.parse(new InputSource(new StringReader(xmlInputString.trim()))); diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java index 1c64fd8629..c42ce63d43 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java @@ -42,7 +42,6 @@ import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; import org.apache.poi.util.DocumentHelper; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; -import org.apache.poi.util.SAXHelper; import org.apache.poi.util.TempFile; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -218,7 +217,7 @@ public final class TestPackage extends TestCase { PackagePartName relName = PackagingURIHelper.createPartName(PackageRelationship.getContainerPartRelationship()); PackagePart relPart = pkg.getPart(relName); - Document xmlRelationshipsDoc = SAXHelper.readSAXDocument(relPart.getInputStream()); + Document xmlRelationshipsDoc = DocumentHelper.readDocument(relPart.getInputStream()); Element root = xmlRelationshipsDoc.getDocumentElement(); NodeList nodeList = root.getElementsByTagName(PackageRelationship.RELATIONSHIP_TAG_NAME);