mirror of https://github.com/apache/poi.git
Improved parsing of OOXML documents, see Bugzilla 47668
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@803667 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
58f32a32b5
commit
e5aac25b00
|
@ -33,6 +33,7 @@
|
|||
|
||||
<changes>
|
||||
<release version="3.5-beta7" date="2009-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">47668 - Improved parsing of OOXML documents</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47652 - Added support for reading encrypted workbooks</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47604 - Implementation of an XML to XLSX Importer using Custom XML Mapping</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47620 - Avoid FormulaParseException in XSSFWorkbook.setRepeatingRowsAndColumns when removing repeated rows and columns</action>
|
||||
|
|
|
@ -41,8 +41,7 @@ public class EmbeddedObjects {
|
|||
}
|
||||
// Excel Workbook – OpenXML file format
|
||||
else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
|
||||
OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
|
||||
XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(docPackage);
|
||||
XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream());
|
||||
}
|
||||
// Word Document – binary (OLE2CDF) file format
|
||||
else if (contentType.equals("application/msword")) {
|
||||
|
@ -50,8 +49,7 @@ public class EmbeddedObjects {
|
|||
}
|
||||
// Word Document – OpenXML file format
|
||||
else if (contentType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
|
||||
OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
|
||||
XWPFDocument document = new XWPFDocument(docPackage);
|
||||
XWPFDocument document = new XWPFDocument(pPart.getInputStream());
|
||||
}
|
||||
// PowerPoint Document – binary file format
|
||||
else if (contentType.equals("application/vnd.ms-powerpoint")) {
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
package org.apache.poi;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
@ -191,21 +191,33 @@ public abstract class POIXMLDocument extends POIXMLDocumentPart{
|
|||
return pkg;
|
||||
}
|
||||
|
||||
protected final void load(POIXMLFactory factory) throws IOException {
|
||||
Map<PackageRelationship, POIXMLDocumentPart> context = new HashMap<PackageRelationship, POIXMLDocumentPart>();
|
||||
try {
|
||||
read(factory, context);
|
||||
} catch (OpenXML4JException e){
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
onDocumentRead();
|
||||
context.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out this document to an Outputstream.
|
||||
*
|
||||
* @param stream - the java OutputStream you wish to write the XLS to
|
||||
* @param stream - the java OutputStream you wish to write the file to
|
||||
*
|
||||
* @exception IOException if anything can't be written.
|
||||
*/
|
||||
public final void write(OutputStream stream) throws IOException {
|
||||
//force all children to commit their changes into the underlying OOXML Package
|
||||
onSave();
|
||||
Set<PackageRelationship> context = new HashSet<PackageRelationship>();
|
||||
onSave(context);
|
||||
context.clear();
|
||||
|
||||
//save extended and custom properties
|
||||
getProperties().commit();
|
||||
|
||||
getPackage().save(stream);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,8 +17,7 @@
|
|||
package org.apache.poi;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
import java.net.URI;
|
||||
|
||||
import org.apache.xmlbeans.XmlOptions;
|
||||
|
@ -75,11 +74,11 @@ public class POIXMLDocumentPart {
|
|||
|
||||
/**
|
||||
* Creates an POIXMLDocumentPart representing the given package part and relationship.
|
||||
* Called by {@link #read(POIXMLFactory)} when reading in an exisiting file.
|
||||
* Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an exisiting file.
|
||||
*
|
||||
* @param part - The package part that holds xml data represenring this sheet.
|
||||
* @param rel - the relationship of the given package part
|
||||
* @see #read(POIXMLFactory)
|
||||
* @see #read(POIXMLFactory, java.util.Map)
|
||||
*/
|
||||
public POIXMLDocumentPart(PackagePart part, PackageRelationship rel){
|
||||
this.relations = new LinkedList<POIXMLDocumentPart>();
|
||||
|
@ -172,11 +171,14 @@ public class POIXMLDocumentPart {
|
|||
* Save changes in the underlying OOXML package.
|
||||
* Recursively fires {@link #commit()} for each package part
|
||||
*/
|
||||
protected final void onSave() throws IOException{
|
||||
commit();
|
||||
for(POIXMLDocumentPart p : relations){
|
||||
p.onSave();
|
||||
}
|
||||
protected final void onSave(Set<PackageRelationship> alreadySaved) throws IOException{
|
||||
commit();
|
||||
alreadySaved.add(this.getPackageRelationship());
|
||||
for(POIXMLDocumentPart p : relations){
|
||||
if (!alreadySaved.contains(p.getPackageRelationship())) {
|
||||
p.onSave(alreadySaved);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -228,10 +230,10 @@ public class POIXMLDocumentPart {
|
|||
*
|
||||
* @param factory the factory object that creates POIXMLFactory instances
|
||||
*/
|
||||
protected final void read(POIXMLFactory factory) throws OpenXML4JException {
|
||||
PackageRelationshipCollection rels = packagePart.getRelationships();
|
||||
for (PackageRelationship rel : rels) {
|
||||
if(rel.getTargetMode() == TargetMode.INTERNAL){
|
||||
protected void read(POIXMLFactory factory, Map<PackageRelationship, POIXMLDocumentPart> context) throws OpenXML4JException {
|
||||
PackageRelationshipCollection rels = packagePart.getRelationships();
|
||||
for (PackageRelationship rel : rels) {
|
||||
if(rel.getTargetMode() == TargetMode.INTERNAL){
|
||||
URI uri = rel.getTargetURI();
|
||||
|
||||
PackagePart p;
|
||||
|
@ -249,16 +251,22 @@ public class POIXMLDocumentPart {
|
|||
}
|
||||
}
|
||||
|
||||
POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p);
|
||||
childPart.parent = this;
|
||||
addRelation(childPart);
|
||||
|
||||
if(p != null && p.hasRelationships()) childPart.read(factory);
|
||||
}
|
||||
}
|
||||
if (!context.containsKey(rel)) {
|
||||
POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p);
|
||||
childPart.parent = this;
|
||||
addRelation(childPart);
|
||||
if(p != null){
|
||||
context.put(rel, childPart);
|
||||
if(p.hasRelationships()) childPart.read(factory, context);
|
||||
}
|
||||
}
|
||||
else {
|
||||
addRelation(context.get(rel));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fired when a new package part is created
|
||||
*/
|
||||
|
|
|
@ -129,8 +129,10 @@ public final class PackageRelationship {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.id.hashCode() + this.relationshipType.hashCode()
|
||||
+ this.source.hashCode() + this.targetMode.hashCode()
|
||||
return this.id.hashCode()
|
||||
+ this.relationshipType.hashCode()
|
||||
+ (this.source == null ? 0 : this.source.hashCode())
|
||||
+ this.targetMode.hashCode()
|
||||
+ this.targetUri.hashCode();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,9 @@ package org.apache.poi.util;
|
|||
import org.apache.poi.openxml4j.opc.*;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.POIXMLException;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URI;
|
||||
|
@ -41,6 +43,18 @@ public class PackageHelper {
|
|||
return clone(pkg, createTempFile());
|
||||
}
|
||||
|
||||
public static OPCPackage open(InputStream is) throws IOException {
|
||||
File file = TempFile.createTempFile("poi-ooxml-", ".tmp");
|
||||
FileOutputStream out = new FileOutputStream(file);
|
||||
IOUtils.copy(is, out);
|
||||
out.close();
|
||||
try {
|
||||
return OPCPackage.open(file.getAbsolutePath());
|
||||
} catch (InvalidFormatException e){
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clone the specified package.
|
||||
*
|
||||
|
|
|
@ -149,12 +149,14 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable<X
|
|||
super(ensureWriteAccess(pkg));
|
||||
|
||||
//build a tree of POIXMLDocumentParts, this workbook being the root
|
||||
try {
|
||||
read(XSSFFactory.getInstance());
|
||||
} catch (OpenXML4JException e){
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
onDocumentRead();
|
||||
load(XSSFFactory.getInstance());
|
||||
}
|
||||
|
||||
public XSSFWorkbook(InputStream is) throws IOException {
|
||||
super(PackageHelper.open(is));
|
||||
|
||||
//build a tree of POIXMLDocumentParts, this workbook being the root
|
||||
load(XSSFFactory.getInstance());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
|
@ -63,12 +64,14 @@ public class XWPFDocument extends POIXMLDocument {
|
|||
super(ensureWriteAccess(pkg));
|
||||
|
||||
//build a tree of POIXMLDocumentParts, this document being the root
|
||||
try {
|
||||
read(XWPFFactory.getInstance());
|
||||
} catch (OpenXML4JException e){
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
onDocumentRead();
|
||||
load(XWPFFactory.getInstance());
|
||||
}
|
||||
|
||||
public XWPFDocument(InputStream is) throws IOException {
|
||||
super(PackageHelper.open(is));
|
||||
|
||||
//build a tree of POIXMLDocumentParts, this workbook being the root
|
||||
load(XWPFFactory.getInstance());
|
||||
}
|
||||
|
||||
public XWPFDocument(){
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.TempFile;
|
||||
import org.apache.poi.xslf.XSLFSlideShow;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Test recursive read and write of OPC packages
|
||||
*/
|
||||
public class TestPOIXMLDocument extends TestCase
|
||||
{
|
||||
private static class OPCParser extends POIXMLDocument {
|
||||
|
||||
public OPCParser(OPCPackage pkg) throws IOException {
|
||||
super(pkg);
|
||||
}
|
||||
|
||||
public List<PackagePart> getAllEmbedds() {
|
||||
throw new RuntimeException("not supported");
|
||||
}
|
||||
|
||||
public void parse(POIXMLFactory factory) throws OpenXML4JException, IOException{
|
||||
load(factory);
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestFactory extends POIXMLFactory {
|
||||
|
||||
public POIXMLDocumentPart createDocumentPart(PackageRelationship rel, PackagePart part){
|
||||
return new POIXMLDocumentPart(part, rel);
|
||||
}
|
||||
|
||||
public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor){
|
||||
throw new RuntimeException("not supported");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void assertReadWrite(String path) throws Exception {
|
||||
|
||||
OPCPackage pkg1 = OPCPackage.open(path);
|
||||
OPCParser doc = new OPCParser(pkg1);
|
||||
doc.parse(new TestFactory());
|
||||
|
||||
File tmp = TempFile.createTempFile("poi-ooxml", ".tmp");
|
||||
FileOutputStream out = new FileOutputStream(tmp);
|
||||
doc.write(out);
|
||||
out.close();
|
||||
|
||||
OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath());
|
||||
|
||||
assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size());
|
||||
|
||||
ArrayList<PackagePart> l1 = pkg1.getParts();
|
||||
ArrayList<PackagePart> l2 = pkg2.getParts();
|
||||
|
||||
assertEquals(l1.size(), l2.size());
|
||||
for (int i=0; i < l1.size(); i++){
|
||||
PackagePart p1 = l1.get(i);
|
||||
PackagePart p2 = l2.get(i);
|
||||
|
||||
assertEquals(p1.getContentType(), p2.getContentType());
|
||||
assertEquals(p1.hasRelationships(), p2.hasRelationships());
|
||||
if(p1.hasRelationships()){
|
||||
assertEquals(p1.getRelationships().size(), p2.getRelationships().size());
|
||||
}
|
||||
assertEquals(p1.getPartName(), p2.getPartName());
|
||||
}
|
||||
}
|
||||
|
||||
public void testPPTX() throws Exception {
|
||||
File file = new File(System.getProperty("OOXML.testdata.path"), "PPTWithAttachments.pptx");
|
||||
assertReadWrite(file.getAbsolutePath());
|
||||
}
|
||||
|
||||
public void testXLSX() throws Exception {
|
||||
File file = new File(System.getProperty("OOXML.testdata.path"), "ExcelWithAttachments.xlsx");
|
||||
assertReadWrite(file.getAbsolutePath());
|
||||
}
|
||||
|
||||
public void testDOCX() throws Exception {
|
||||
File file = new File(System.getProperty("OOXML.testdata.path"), "WordWithAttachments.docx");
|
||||
assertReadWrite(file.getAbsolutePath());
|
||||
}
|
||||
}
|
|
@ -41,10 +41,7 @@ public class XSSFTestDataSamples {
|
|||
public static final XSSFWorkbook openSampleWorkbook(String sampleName) {
|
||||
InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleName);
|
||||
try {
|
||||
OPCPackage pkg = OPCPackage.open(is);
|
||||
return new XSSFWorkbook(pkg);
|
||||
} catch (InvalidFormatException e) {
|
||||
throw new RuntimeException(e);
|
||||
return new XSSFWorkbook(is);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.poi.xssf.usermodel;
|
|||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.zip.CRC32;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
|
@ -29,10 +31,7 @@ import org.apache.poi.ss.util.CellRangeAddress;
|
|||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||
import org.apache.poi.xssf.XSSFITestDataProvider;
|
||||
import org.apache.poi.xssf.model.StylesTable;
|
||||
import org.apache.poi.openxml4j.opc.ContentTypes;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||
import org.apache.poi.openxml4j.opc.*;
|
||||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
|
||||
import org.apache.poi.util.TempFile;
|
||||
import org.apache.poi.POIXMLProperties;
|
||||
|
@ -275,7 +274,41 @@ public final class TestXSSFWorkbook extends BaseTestWorkbook {
|
|||
opcProps = workbook.getProperties().getCoreProperties().getUnderlyingProperties();
|
||||
assertEquals("Testing Bugzilla #47460", opcProps.getTitleProperty().getValue());
|
||||
assertEquals("poi-dev@poi.apache.org", opcProps.getCreatorProperty().getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that the attached test data was not modified. If this test method
|
||||
* fails, the test data is not working properly.
|
||||
*/
|
||||
public void test47668() throws Exception {
|
||||
XSSFWorkbook workbook = XSSFTestDataSamples.openSampleWorkbook("47668.xlsx");
|
||||
List<XSSFPictureData> allPictures = workbook.getAllPictures();
|
||||
assertEquals(2, allPictures.size());
|
||||
|
||||
PackagePartName imagePartName = PackagingURIHelper
|
||||
.createPartName("/xl/media/image1.jpeg");
|
||||
PackagePart imagePart = workbook.getPackage().getPart(imagePartName);
|
||||
assertNotNull(imagePart);
|
||||
|
||||
for (XSSFPictureData pictureData : allPictures) {
|
||||
PackagePart picturePart = pictureData.getPackagePart();
|
||||
assertSame(imagePart, picturePart);
|
||||
}
|
||||
|
||||
XSSFSheet sheet0 = workbook.getSheetAt(0);
|
||||
XSSFDrawing drawing0 = sheet0.createDrawingPatriarch();
|
||||
XSSFPictureData pictureData0 = (XSSFPictureData) drawing0.getRelations().get(0);
|
||||
byte[] data0 = pictureData0.getData();
|
||||
CRC32 crc0 = new CRC32();
|
||||
crc0.update(data0);
|
||||
|
||||
XSSFSheet sheet1 = workbook.getSheetAt(1);
|
||||
XSSFDrawing drawing1 = sheet1.createDrawingPatriarch();
|
||||
XSSFPictureData pictureData1 = (XSSFPictureData) drawing1.getRelations().get(0);
|
||||
byte[] data1 = pictureData1.getData();
|
||||
CRC32 crc1 = new CRC32();
|
||||
crc1.update(data1);
|
||||
|
||||
assertEquals(crc0.getValue(), crc1.getValue());
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue