A quick play with OOXML parsing. Uses XmlBeans and OpenXml4J to get at the data. Expect the API to change rapidly in the near future as we discover what works and what doesn't!

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@606923 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2007-12-26 17:47:27 +00:00
parent 47207dd723
commit bd80c5bc4b
4 changed files with 235 additions and 0 deletions

View File

@ -0,0 +1,89 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.openxml4j.exceptions.InvalidFormatException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackageAccess;
import org.openxml4j.opc.PackagePart;
/**
* Parent class of the low level interface to
* all POI XML (OOXML) implementations.
* Normal users should probably deal with things that
* extends {@link POIXMLDocument}, unless they really
* do need to get low level access to the files.
*
* WARNING - APIs expected to change rapidly
*/
public abstract class HXFDocument {
/**
* File package/container.
*/
protected Package container;
/**
* The Package Part for our base document
*/
protected PackagePart basePart;
/**
* The base document of this instance, eg Workbook for
* xslsx
*/
protected Document baseDocument;
protected HXFDocument(Package container, String baseContentType) throws OpenXML4JException {
this.container = container;
// Find the base document
ArrayList<PackagePart> baseParts =
container.getPartsByContentType(baseContentType);
if(baseParts.size() != 1) {
throw new OpenXML4JException("Expecting one entry with content type of " + baseContentType + ", but found " + baseParts.size());
}
basePart = baseParts.get(0);
// And load it up
try {
SAXReader reader = new SAXReader();
baseDocument = reader.read(basePart.getInputStream());
} catch (DocumentException e) {
throw new OpenXML4JException(e.getMessage());
} catch (IOException ioe) {
throw new OpenXML4JException(ioe.getMessage());
}
}
public static Package openPackage(File f) throws InvalidFormatException {
return Package.open(f.toString(), PackageAccess.READ_WRITE);
}
/**
* Get the package container.
* @return The package associated to this document.
*/
public Package getPackage() {
return container;
}
}

View File

@ -0,0 +1,27 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi;
/**
* Parent class of all UserModel POI XML (ooxml)
* implementations.
* Provides a similar function to {@link POIDocument},
* for the XML based classes.
*/
public abstract class POIXMLDocument {
// TODO
}

View File

@ -0,0 +1,51 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf;
import java.io.IOException;
import org.apache.poi.HXFDocument;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
/**
* Experimental class to do low level processing
* of xlsx files.
*
* WARNING - APIs expected to change rapidly
*/
public class HSSFXML extends HXFDocument {
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml";
private WorkbookDocument workbookDoc;
public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
super(container, MAIN_CONTENT_TYPE);
workbookDoc =
WorkbookDocument.Factory.parse(basePart.getInputStream());
}
public CTWorkbook getWorkbook() {
return workbookDoc.getWorkbook();
}
}

View File

@ -0,0 +1,68 @@
package org.apache.poi.hssf;
import java.io.File;
import org.apache.poi.HXFDocument;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart;
import junit.framework.TestCase;
public class TestHSSFXML extends TestCase {
/**
* Uses the old style schemas.microsoft.com schema uri
*/
private File sampleFileBeta;
/**
* Uses the new style schemas.openxmlformats.org schema uri
*/
private File sampleFile;
protected void setUp() throws Exception {
super.setUp();
sampleFile = new File(
System.getProperty("HSSF.testdata.path") +
File.separator + "sample.xlsx"
);
sampleFileBeta = new File(
System.getProperty("HSSF.testdata.path") +
File.separator + "sample-beta.xlsx"
);
}
public void testContainsMainContentType() throws Exception {
Package pack = HXFDocument.openPackage(sampleFile);
boolean found = false;
for(PackagePart part : pack.getParts()) {
if(part.getContentType().equals(HSSFXML.MAIN_CONTENT_TYPE)) {
found = true;
}
System.out.println(part);
}
assertTrue(found);
}
public void testOpen() throws Exception {
HXFDocument.openPackage(sampleFile);
HXFDocument.openPackage(sampleFileBeta);
HSSFXML xml;
// With an old-style uri, as found in a file produced
// with the office 2007 beta, will fail, as we don't
// translate things
try {
xml = new HSSFXML(
HXFDocument.openPackage(sampleFileBeta)
);
fail();
} catch(Exception e) {}
// With the finalised uri, should be fine
xml = new HSSFXML(
HXFDocument.openPackage(sampleFile)
);
}
}