From 7de8c2eaecfdd76ec77104ced0e5ab75c834a7d8 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Thu, 27 Dec 2018 20:51:38 +0000 Subject: [PATCH] Re-instate two dev-tools that we lost in some refactoring, at least OOXMLPrettyPrint is still very useful for comparing ooxml-files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1849813 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/ooxml/dev/OOXMLLister.java | 145 ++++++++++++++++++ .../poi/ooxml/dev/OOXMLPrettyPrint.java | 137 +++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java create mode 100644 src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java new file mode 100644 index 0000000000..b585be4b00 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLLister.java @@ -0,0 +1,145 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.dev; + +import java.io.*; +import java.util.ArrayList; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; + +/** + * Prints out the contents of a OOXML container. + * Useful for seeing what parts are defined, and how + * they're all related to each other. + */ +public class OOXMLLister implements Closeable { + private final OPCPackage container; + private final PrintStream disp; + + public OOXMLLister(OPCPackage container) { + this(container, System.out); + } + public OOXMLLister(OPCPackage container, PrintStream disp) { + this.container = container; + this.disp = disp; + } + + /** + * Figures out how big a given PackagePart is. + * + * @param part the PackagePart + * @return the size of the PackagePart + * + * @throws IOException if the part can't be read + */ + public static long getSize(PackagePart part) throws IOException { + try (InputStream in = part.getInputStream()) { + byte[] b = new byte[8192]; + long size = 0; + int read = 0; + + while (read > -1) { + read = in.read(b); + if (read > 0) { + size += read; + } + } + + return size; + } + } + + /** + * Displays information on all the different + * parts of the OOXML file container. + * @throws InvalidFormatException if the package relations are invalid + * @throws IOException if the package can't be read + */ + public void displayParts() throws InvalidFormatException, IOException { + ArrayList parts = container.getParts(); + for (PackagePart part : parts) { + disp.println(part.getPartName()); + disp.println("\t" + part.getContentType()); + + if(! part.getPartName().toString().equals("/docProps/core.xml")) { + disp.println("\t" + getSize(part) + " bytes"); + } + + if(! part.isRelationshipPart()) { + disp.println("\t" + part.getRelationships().size() + " relations"); + for(PackageRelationship rel : part.getRelationships()) { + displayRelation(rel, "\t "); + } + } + } + } + /** + * Displays information on all the different + * relationships between different parts + * of the OOXML file container. + */ + public void displayRelations() { + PackageRelationshipCollection rels = + container.getRelationships(); + for (PackageRelationship rel : rels) { + displayRelation(rel, ""); + } + } + + private void displayRelation(PackageRelationship rel, String indent) { + disp.println(indent+"Relationship:"); + disp.println(indent+"\tFrom: "+ rel.getSourceURI()); + disp.println(indent+"\tTo: " + rel.getTargetURI()); + disp.println(indent+"\tID: " + rel.getId()); + disp.println(indent+"\tMode: " + rel.getTargetMode()); + disp.println(indent+"\tType: " + rel.getRelationshipType()); + } + + @Override + public void close() throws IOException { + container.close(); + } + + public static void main(String[] args) throws IOException, InvalidFormatException { + if(args.length == 0) { + System.err.println("Use:"); + System.err.println("\tjava OOXMLLister "); + System.exit(1); + } + + File f = new File(args[0]); + if(! f.exists()) { + System.err.println("Error, file not found!"); + System.err.println("\t" + f); + System.exit(2); + } + + try (OOXMLLister lister = new OOXMLLister( + OPCPackage.open(f.toString(), PackageAccess.READ) + )) { + lister.disp.println(f + "\n"); + lister.displayParts(); + lister.disp.println(); + lister.displayRelations(); + } + } +} diff --git a/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java new file mode 100644 index 0000000000..e48f65ffb9 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/ooxml/dev/OOXMLPrettyPrint.java @@ -0,0 +1,137 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.ooxml.dev; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Enumeration; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; +import java.util.zip.ZipOutputStream; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.poi.openxml4j.opc.internal.ZipHelper; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.util.IOUtils; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; + +/** + * Reads a zipped OOXML file and produces a copy with the included + * pretty-printed XML files. + * + * This is useful for comparing OOXML files produced by different tools as the often + * use different formatting of the XML. + */ +public class OOXMLPrettyPrint { + private final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + private final DocumentBuilder documentBuilder; + + public OOXMLPrettyPrint() throws ParserConfigurationException { + // allow files with much lower inflation rate here as there is no risk of Zip Bomb attacks in this developer tool + ZipSecureFile.setMinInflateRatio(0.00001); + + documentBuilder = documentBuilderFactory.newDocumentBuilder(); + } + + public static void main(String[] args) throws Exception { + if(args.length <= 1 || args.length % 2 != 0) { + System.err.println("Use:"); + System.err.println("\tjava OOXMLPrettyPrint [ ] ..."); + System.exit(1); + } + + for(int i = 0;i < args.length;i+=2) { + File f = new File(args[i]); + if(! f.exists()) { + System.err.println("Error, file not found!"); + System.err.println("\t" + f); + System.exit(2); + } + + handleFile(f, new File(args[i+1])); + } + System.out.println("Done."); + } + + private static void handleFile(File file, File outFile) throws ZipException, + IOException, ParserConfigurationException { + System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile); + + try (ZipSecureFile zipFile = ZipHelper.openZipFile(file)) { + try (ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outFile)))) { + new OOXMLPrettyPrint().handle(zipFile, out); + } + } finally { + System.out.println(); + } + } + + private void handle(ZipSecureFile file, ZipOutputStream out) throws IOException { + Enumeration entries = file.getEntries(); + while(entries.hasMoreElements()) { + ZipArchiveEntry entry = entries.nextElement(); + + String name = entry.getName(); + out.putNextEntry(new ZipEntry(name)); + try { + if(name.endsWith(".xml") || name.endsWith(".rels")) { + Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); + document.setXmlStandalone(true); + pretty(document, out, 2); + } else { + System.out.println("Not pretty-printing non-XML file " + name); + IOUtils.copy(file.getInputStream(entry), out); + } + } catch (Exception e) { + throw new IOException("While handling entry " + name, e); + } finally { + out.closeEntry(); + } + System.out.print("."); + } + } + + private static void pretty(Document document, OutputStream outputStream, int indent) throws TransformerException { + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + Transformer transformer = transformerFactory.newTransformer(); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + if (indent > 0) { + // set properties to indent the resulting XML nicely + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", Integer.toString(indent)); + } + Result result = new StreamResult(outputStream); + Source source = new DOMSource(document); + transformer.transform(source, result); + } +}