From 8c9bade1fcc7bd16c0a29479f795dd95489fd986 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 23 May 2008 15:05:12 +0000 Subject: [PATCH] Patch from Yury from bug #45018 - Support for fetching embeded documents from within an OOXML files git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@659564 13f79535-47bb-0310-9956-ffa450edef68 --- build.xml | 1 + src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../java/org/apache/poi/POIXMLDocument.java | 22 +++++++++++++++++++ .../org/apache/poi/POIXMLTextExtractor.java | 7 ++++++ .../poi/xwpf/extractor/XWPFWordExtractor.java | 2 +- 6 files changed, 33 insertions(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 121cbbd2d1..e1cb26c353 100644 --- a/build.xml +++ b/build.xml @@ -562,6 +562,7 @@ under the License. + diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 5a90a0463a..f26c6271b9 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45018 - Support for fetching embeded documents from within an OOXML file Port support for setting a policy on missing / blank cells when fetching, to XSSF too Common text extraction factory, which returns the correct POITextExtractor for the supplied data Text Extraction support for the new OOXML files (.xlsx, .docx and .pptx) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 6709b32719..4fc778a5f9 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45018 - Support for fetching embeded documents from within an OOXML file Port support for setting a policy on missing / blank cells when fetching, to XSSF too Common text extraction factory, which returns the correct POITextExtractor for the supplied data Text Extraction support for the new OOXML files (.xlsx, .docx and .pptx) diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 9fa4789db0..7be6372759 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -19,6 +19,8 @@ package org.apache.poi; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; +import java.util.LinkedList; +import java.util.List; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.IOUtils; @@ -39,6 +41,8 @@ public abstract class POIXMLDocument { public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"; + public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; + /** The OPC Package */ private Package pkg; @@ -50,6 +54,10 @@ public abstract class POIXMLDocument { */ private POIXMLProperties properties; + /** + * The embedded OLE2 files in the OPC package + */ + private List embedds; protected POIXMLDocument() {} @@ -62,6 +70,12 @@ public abstract class POIXMLDocument { // Get core part this.corePart = this.pkg.getPart(coreDocRelationship); + + // Get any embedded OLE2 documents + this.embedds = new LinkedList(); + for(PackageRelationship rel : corePart.getRelationshipsByType(OLE_OBJECT_REL_TYPE)) { + embedds.add(getTargetPart(rel)); + } } catch (OpenXML4JException e) { throw new IOException(e.toString()); } @@ -190,4 +204,12 @@ public abstract class POIXMLDocument { } return properties; } + + /** + * Get the document's embedded files. + */ + public List getAllEmbedds() throws OpenXML4JException + { + return embedds; + } } diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java index ae8514c278..8df75d949d 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java @@ -47,4 +47,11 @@ public abstract class POIXMLTextExtractor extends POITextExtractor { public ExtendedProperties getExtendedProperties() throws IOException, OpenXML4JException, XmlException { return document.getProperties().getExtendedProperties(); } + + /** + * Returns opened document + */ + public POIXMLDocument getDocument(){ + return document; + } } diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java index 8ca4f0349b..64c8e3f780 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java +++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java @@ -58,7 +58,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor { public static void main(String[] args) throws Exception { if(args.length < 1) { System.err.println("Use:"); - System.err.println(" HXFWordExtractor "); + System.err.println(" HXFWordExtractor "); System.exit(1); } POIXMLTextExtractor extractor =