Improved parsing of OOXML documents, see Bugzilla 47668

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@803667 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-08-12 18:59:34 +00:00
parent 58f32a32b5
commit e5aac25b00
12 changed files with 239 additions and 51 deletions

View File

@ -33,6 +33,7 @@
<changes>
<release version="3.5-beta7" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="add">47668 - Improved parsing of OOXML documents</action>
<action dev="POI-DEVELOPERS" type="add">47652 - Added support for reading encrypted workbooks</action>
<action dev="POI-DEVELOPERS" type="add">47604 - Implementation of an XML to XLSX Importer using Custom XML Mapping</action>
<action dev="POI-DEVELOPERS" type="fix">47620 - Avoid FormulaParseException in XSSFWorkbook.setRepeatingRowsAndColumns when removing repeated rows and columns</action>

View File

@ -41,8 +41,7 @@ public class EmbeddedObjects {
}
// Excel Workbook OpenXML file format
else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(docPackage);
XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream());
}
// Word Document binary (OLE2CDF) file format
else if (contentType.equals("application/msword")) {
@ -50,8 +49,7 @@ public class EmbeddedObjects {
}
// Word Document OpenXML file format
else if (contentType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
XWPFDocument document = new XWPFDocument(docPackage);
XWPFDocument document = new XWPFDocument(pPart.getInputStream());
}
// PowerPoint Document binary file format
else if (contentType.equals("application/vnd.ms-powerpoint")) {

View File

@ -17,7 +17,7 @@
package org.apache.poi;
import java.io.*;
import java.util.List;
import java.util.*;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.IOUtils;
@ -191,21 +191,33 @@ public abstract class POIXMLDocument extends POIXMLDocumentPart{
return pkg;
}
protected final void load(POIXMLFactory factory) throws IOException {
Map<PackageRelationship, POIXMLDocumentPart> context = new HashMap<PackageRelationship, POIXMLDocumentPart>();
try {
read(factory, context);
} catch (OpenXML4JException e){
throw new POIXMLException(e);
}
onDocumentRead();
context.clear();
}
/**
* Write out this document to an Outputstream.
*
* @param stream - the java OutputStream you wish to write the XLS to
* @param stream - the java OutputStream you wish to write the file to
*
* @exception IOException if anything can't be written.
*/
public final void write(OutputStream stream) throws IOException {
//force all children to commit their changes into the underlying OOXML Package
onSave();
Set<PackageRelationship> context = new HashSet<PackageRelationship>();
onSave(context);
context.clear();
//save extended and custom properties
getProperties().commit();
getPackage().save(stream);
}
}

View File

@ -17,8 +17,7 @@
package org.apache.poi;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.*;
import java.net.URI;
import org.apache.xmlbeans.XmlOptions;
@ -75,11 +74,11 @@ public class POIXMLDocumentPart {
/**
* Creates an POIXMLDocumentPart representing the given package part and relationship.
* Called by {@link #read(POIXMLFactory)} when reading in an exisiting file.
* Called by {@link #read(POIXMLFactory, java.util.Map)} when reading in an exisiting file.
*
* @param part - The package part that holds xml data represenring this sheet.
* @param rel - the relationship of the given package part
* @see #read(POIXMLFactory)
* @see #read(POIXMLFactory, java.util.Map)
*/
public POIXMLDocumentPart(PackagePart part, PackageRelationship rel){
this.relations = new LinkedList<POIXMLDocumentPart>();
@ -172,11 +171,14 @@ public class POIXMLDocumentPart {
* Save changes in the underlying OOXML package.
* Recursively fires {@link #commit()} for each package part
*/
protected final void onSave() throws IOException{
commit();
for(POIXMLDocumentPart p : relations){
p.onSave();
}
protected final void onSave(Set<PackageRelationship> alreadySaved) throws IOException{
commit();
alreadySaved.add(this.getPackageRelationship());
for(POIXMLDocumentPart p : relations){
if (!alreadySaved.contains(p.getPackageRelationship())) {
p.onSave(alreadySaved);
}
}
}
/**
@ -228,10 +230,10 @@ public class POIXMLDocumentPart {
*
* @param factory the factory object that creates POIXMLFactory instances
*/
protected final void read(POIXMLFactory factory) throws OpenXML4JException {
PackageRelationshipCollection rels = packagePart.getRelationships();
for (PackageRelationship rel : rels) {
if(rel.getTargetMode() == TargetMode.INTERNAL){
protected void read(POIXMLFactory factory, Map<PackageRelationship, POIXMLDocumentPart> context) throws OpenXML4JException {
PackageRelationshipCollection rels = packagePart.getRelationships();
for (PackageRelationship rel : rels) {
if(rel.getTargetMode() == TargetMode.INTERNAL){
URI uri = rel.getTargetURI();
PackagePart p;
@ -249,16 +251,22 @@ public class POIXMLDocumentPart {
}
}
POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p);
childPart.parent = this;
addRelation(childPart);
if(p != null && p.hasRelationships()) childPart.read(factory);
}
}
if (!context.containsKey(rel)) {
POIXMLDocumentPart childPart = factory.createDocumentPart(rel, p);
childPart.parent = this;
addRelation(childPart);
if(p != null){
context.put(rel, childPart);
if(p.hasRelationships()) childPart.read(factory, context);
}
}
else {
addRelation(context.get(rel));
}
}
}
}
/**
* Fired when a new package part is created
*/

View File

@ -129,8 +129,10 @@ public final class PackageRelationship {
@Override
public int hashCode() {
return this.id.hashCode() + this.relationshipType.hashCode()
+ this.source.hashCode() + this.targetMode.hashCode()
return this.id.hashCode()
+ this.relationshipType.hashCode()
+ (this.source == null ? 0 : this.source.hashCode())
+ this.targetMode.hashCode()
+ this.targetUri.hashCode();
}

View File

@ -19,7 +19,9 @@ package org.apache.poi.util;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.util.IOUtils;
import org.apache.poi.POIXMLException;
import java.io.*;
import java.net.URI;
@ -41,6 +43,18 @@ public class PackageHelper {
return clone(pkg, createTempFile());
}
public static OPCPackage open(InputStream is) throws IOException {
File file = TempFile.createTempFile("poi-ooxml-", ".tmp");
FileOutputStream out = new FileOutputStream(file);
IOUtils.copy(is, out);
out.close();
try {
return OPCPackage.open(file.getAbsolutePath());
} catch (InvalidFormatException e){
throw new POIXMLException(e);
}
}
/**
* Clone the specified package.
*

View File

@ -149,12 +149,14 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable<X
super(ensureWriteAccess(pkg));
//build a tree of POIXMLDocumentParts, this workbook being the root
try {
read(XSSFFactory.getInstance());
} catch (OpenXML4JException e){
throw new POIXMLException(e);
}
onDocumentRead();
load(XSSFFactory.getInstance());
}
public XSSFWorkbook(InputStream is) throws IOException {
super(PackageHelper.open(is));
//build a tree of POIXMLDocumentParts, this workbook being the root
load(XSSFFactory.getInstance());
}
/**

View File

@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
import java.io.IOException;
import java.io.OutputStream;
import java.io.InputStream;
import java.util.*;
import org.apache.poi.POIXMLDocument;
@ -63,12 +64,14 @@ public class XWPFDocument extends POIXMLDocument {
super(ensureWriteAccess(pkg));
//build a tree of POIXMLDocumentParts, this document being the root
try {
read(XWPFFactory.getInstance());
} catch (OpenXML4JException e){
throw new POIXMLException(e);
}
onDocumentRead();
load(XWPFFactory.getInstance());
}
public XWPFDocument(InputStream is) throws IOException {
super(PackageHelper.open(is));
//build a tree of POIXMLDocumentParts, this workbook being the root
load(XWPFFactory.getInstance());
}
public XWPFDocument(){

View File

@ -0,0 +1,118 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi;
import java.io.File;
import java.io.IOException;
import java.io.FileOutputStream;
import java.util.List;
import java.util.ArrayList;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.TempFile;
import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import junit.framework.TestCase;
/**
* Test recursive read and write of OPC packages
*/
public class TestPOIXMLDocument extends TestCase
{
private static class OPCParser extends POIXMLDocument {
public OPCParser(OPCPackage pkg) throws IOException {
super(pkg);
}
public List<PackagePart> getAllEmbedds() {
throw new RuntimeException("not supported");
}
public void parse(POIXMLFactory factory) throws OpenXML4JException, IOException{
load(factory);
}
}
private static class TestFactory extends POIXMLFactory {
public POIXMLDocumentPart createDocumentPart(PackageRelationship rel, PackagePart part){
return new POIXMLDocumentPart(part, rel);
}
public POIXMLDocumentPart newDocumentPart(POIXMLRelation descriptor){
throw new RuntimeException("not supported");
}
}
public void assertReadWrite(String path) throws Exception {
OPCPackage pkg1 = OPCPackage.open(path);
OPCParser doc = new OPCParser(pkg1);
doc.parse(new TestFactory());
File tmp = TempFile.createTempFile("poi-ooxml", ".tmp");
FileOutputStream out = new FileOutputStream(tmp);
doc.write(out);
out.close();
OPCPackage pkg2 = OPCPackage.open(tmp.getAbsolutePath());
assertEquals(pkg1.getRelationships().size(), pkg2.getRelationships().size());
ArrayList<PackagePart> l1 = pkg1.getParts();
ArrayList<PackagePart> l2 = pkg2.getParts();
assertEquals(l1.size(), l2.size());
for (int i=0; i < l1.size(); i++){
PackagePart p1 = l1.get(i);
PackagePart p2 = l2.get(i);
assertEquals(p1.getContentType(), p2.getContentType());
assertEquals(p1.hasRelationships(), p2.hasRelationships());
if(p1.hasRelationships()){
assertEquals(p1.getRelationships().size(), p2.getRelationships().size());
}
assertEquals(p1.getPartName(), p2.getPartName());
}
}
public void testPPTX() throws Exception {
File file = new File(System.getProperty("OOXML.testdata.path"), "PPTWithAttachments.pptx");
assertReadWrite(file.getAbsolutePath());
}
public void testXLSX() throws Exception {
File file = new File(System.getProperty("OOXML.testdata.path"), "ExcelWithAttachments.xlsx");
assertReadWrite(file.getAbsolutePath());
}
public void testDOCX() throws Exception {
File file = new File(System.getProperty("OOXML.testdata.path"), "WordWithAttachments.docx");
assertReadWrite(file.getAbsolutePath());
}
}

View File

@ -41,10 +41,7 @@ public class XSSFTestDataSamples {
public static final XSSFWorkbook openSampleWorkbook(String sampleName) {
InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleName);
try {
OPCPackage pkg = OPCPackage.open(is);
return new XSSFWorkbook(pkg);
} catch (InvalidFormatException e) {
throw new RuntimeException(e);
return new XSSFWorkbook(is);
} catch (IOException e) {
throw new RuntimeException(e);
}

View File

@ -20,6 +20,8 @@ package org.apache.poi.xssf.usermodel;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.zip.CRC32;
import java.util.List;
import junit.framework.TestCase;
@ -29,10 +31,7 @@ import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.poi.xssf.XSSFITestDataProvider;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.openxml4j.opc.ContentTypes;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.util.TempFile;
import org.apache.poi.POIXMLProperties;
@ -275,7 +274,41 @@ public final class TestXSSFWorkbook extends BaseTestWorkbook {
opcProps = workbook.getProperties().getCoreProperties().getUnderlyingProperties();
assertEquals("Testing Bugzilla #47460", opcProps.getTitleProperty().getValue());
assertEquals("poi-dev@poi.apache.org", opcProps.getCreatorProperty().getValue());
}
/**
* Verify that the attached test data was not modified. If this test method
* fails, the test data is not working properly.
*/
public void test47668() throws Exception {
XSSFWorkbook workbook = XSSFTestDataSamples.openSampleWorkbook("47668.xlsx");
List<XSSFPictureData> allPictures = workbook.getAllPictures();
assertEquals(2, allPictures.size());
PackagePartName imagePartName = PackagingURIHelper
.createPartName("/xl/media/image1.jpeg");
PackagePart imagePart = workbook.getPackage().getPart(imagePartName);
assertNotNull(imagePart);
for (XSSFPictureData pictureData : allPictures) {
PackagePart picturePart = pictureData.getPackagePart();
assertSame(imagePart, picturePart);
}
XSSFSheet sheet0 = workbook.getSheetAt(0);
XSSFDrawing drawing0 = sheet0.createDrawingPatriarch();
XSSFPictureData pictureData0 = (XSSFPictureData) drawing0.getRelations().get(0);
byte[] data0 = pictureData0.getData();
CRC32 crc0 = new CRC32();
crc0.update(data0);
XSSFSheet sheet1 = workbook.getSheetAt(1);
XSSFDrawing drawing1 = sheet1.createDrawingPatriarch();
XSSFPictureData pictureData1 = (XSSFPictureData) drawing1.getRelations().get(0);
byte[] data1 = pictureData1.getData();
CRC32 crc1 = new CRC32();
crc1.update(data1);
assertEquals(crc0.getValue(), crc1.getValue());
}
}

Binary file not shown.