mirror of https://github.com/apache/poi.git
Update the word code to the new style ooxml stuff
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635253 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5b902b4562
commit
0a06d325fa
|
@ -124,6 +124,25 @@ public abstract class POIXMLDocument {
|
||||||
return getTargetPart(rel);
|
return getTargetPart(rel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves all the PackageParts which are defined as
|
||||||
|
* relationships of the base document with the
|
||||||
|
* specified content type.
|
||||||
|
*/
|
||||||
|
protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
|
||||||
|
PackageRelationshipCollection partsC =
|
||||||
|
getCorePart().getRelationshipsByType(contentType);
|
||||||
|
|
||||||
|
PackagePart[] parts = new PackagePart[partsC.size()];
|
||||||
|
int count = 0;
|
||||||
|
for (PackageRelationship rel : partsC) {
|
||||||
|
parts[count] = getTargetPart(rel);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks that the supplied InputStream (which MUST
|
* Checks that the supplied InputStream (which MUST
|
||||||
|
|
|
@ -14,11 +14,11 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf;
|
package org.apache.poi.xwpf;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.poi.hxf.HXFDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.openxml4j.exceptions.InvalidFormatException;
|
import org.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.openxml4j.exceptions.OpenXML4JException;
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
|
@ -41,7 +41,7 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
||||||
*
|
*
|
||||||
* WARNING - APIs expected to change rapidly
|
* WARNING - APIs expected to change rapidly
|
||||||
*/
|
*/
|
||||||
public class HWPFXML extends HXFDocument {
|
public class XWPFDocument extends POIXMLDocument {
|
||||||
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
|
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
|
||||||
public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
|
public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
|
||||||
public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
|
public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
|
||||||
|
@ -50,11 +50,11 @@ public class HWPFXML extends HXFDocument {
|
||||||
|
|
||||||
private DocumentDocument wordDoc;
|
private DocumentDocument wordDoc;
|
||||||
|
|
||||||
public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
|
public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||||
super(container, MAIN_CONTENT_TYPE);
|
super(container);
|
||||||
|
|
||||||
wordDoc =
|
wordDoc =
|
||||||
DocumentDocument.Factory.parse(basePart.getInputStream());
|
DocumentDocument.Factory.parse(getCorePart().getInputStream());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -14,15 +14,14 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf.extractor;
|
package org.apache.poi.xwpf.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
import org.apache.poi.hwpf.HWPFXML;
|
import org.apache.poi.xwpf.XWPFDocument;
|
||||||
import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
|
|
||||||
import org.apache.poi.hxf.HXFDocument;
|
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.openxml4j.exceptions.OpenXML4JException;
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
|
@ -34,15 +33,13 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
|
||||||
/**
|
/**
|
||||||
* Helper class to extract text from an OOXML Word file
|
* Helper class to extract text from an OOXML Word file
|
||||||
*/
|
*/
|
||||||
public class HXFWordExtractor extends POIXMLTextExtractor {
|
public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||||
private HWPFXMLDocument document;
|
private XWPFDocument document;
|
||||||
|
|
||||||
public HXFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
public XWPFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(new HWPFXMLDocument(
|
this(new XWPFDocument(container));
|
||||||
new HWPFXML(container)
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
public HXFWordExtractor(HWPFXMLDocument document) {
|
public XWPFWordExtractor(XWPFDocument document) {
|
||||||
super(document);
|
super(document);
|
||||||
this.document = document;
|
this.document = document;
|
||||||
}
|
}
|
||||||
|
@ -54,14 +51,14 @@ public class HXFWordExtractor extends POIXMLTextExtractor {
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
POIXMLTextExtractor extractor =
|
POIXMLTextExtractor extractor =
|
||||||
new HXFWordExtractor(HXFDocument.openPackage(
|
new XWPFWordExtractor(POIXMLDocument.openPackage(
|
||||||
new File(args[0])
|
args[0]
|
||||||
));
|
));
|
||||||
System.out.println(extractor.getText());
|
System.out.println(extractor.getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getText() {
|
public String getText() {
|
||||||
CTBody body = document._getHWPFXML().getDocumentBody();
|
CTBody body = document.getDocumentBody();
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
|
|
||||||
// Loop over paragraphs
|
// Loop over paragraphs
|
|
@ -14,23 +14,21 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.xwpf.usermodel;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.xwpf.XWPFDocument;
|
||||||
import org.apache.poi.hwpf.HWPFXML;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* High level representation of a ooxml text document.
|
* High level representation of a ooxml text document.
|
||||||
*/
|
*/
|
||||||
public class HWPFXMLDocument extends POIXMLDocument {
|
public class XMLWordDocument {
|
||||||
private HWPFXML hwpfXML;
|
private XWPFDocument xwpfXML;
|
||||||
|
|
||||||
public HWPFXMLDocument(HWPFXML xml) {
|
public XMLWordDocument(XWPFDocument xml) {
|
||||||
super(xml);
|
this.xwpfXML = xml;
|
||||||
this.hwpfXML = xml;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public HWPFXML _getHWPFXML() {
|
public XWPFDocument _getXWPFXML() {
|
||||||
return hwpfXML;
|
return xwpfXML;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -14,45 +14,47 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.hslf.extractor;
|
package org.apache.poi.xslf.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
import org.apache.poi.hslf.HSLFXML;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow;
|
import org.apache.poi.xslf.XSLFSlideShow;
|
||||||
import org.apache.poi.hxf.HXFDocument;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for HXFPowerPointExtractor
|
* Tests for HXFPowerPointExtractor
|
||||||
*/
|
*/
|
||||||
public class TestHXFPowerPointExtractor extends TestCase {
|
public class TestXSLFPowerPointExtractor extends TestCase {
|
||||||
/**
|
/**
|
||||||
* A simple file
|
* A simple file
|
||||||
*/
|
*/
|
||||||
private HSLFXML xmlA;
|
private XSLFSlideShow xmlA;
|
||||||
|
private File fileA;
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
||||||
File fileA = new File(
|
fileA = new File(
|
||||||
System.getProperty("HSLF.testdata.path") +
|
System.getProperty("HSLF.testdata.path") +
|
||||||
File.separator + "sample.pptx"
|
File.separator + "sample.pptx"
|
||||||
);
|
);
|
||||||
|
assertTrue(fileA.exists());
|
||||||
|
|
||||||
xmlA = new HSLFXML(HXFDocument.openPackage(fileA));
|
xmlA = new XSLFSlideShow(fileA.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get text out of the simple file
|
* Get text out of the simple file
|
||||||
*/
|
*/
|
||||||
public void testGetSimpleText() throws Exception {
|
public void testGetSimpleText() throws Exception {
|
||||||
new HXFPowerPointExtractor(xmlA.getPackage());
|
new XSLFPowerPointExtractor(xmlA);
|
||||||
new HXFPowerPointExtractor(new XMLSlideShow(xmlA));
|
new XSLFPowerPointExtractor(
|
||||||
|
POIXMLDocument.openPackage(fileA.toString()));
|
||||||
|
|
||||||
HXFPowerPointExtractor extractor =
|
XSLFPowerPointExtractor extractor =
|
||||||
new HXFPowerPointExtractor(xmlA.getPackage());
|
new XSLFPowerPointExtractor(xmlA);
|
||||||
extractor.getText();
|
extractor.getText();
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
|
@ -14,17 +14,17 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf;
|
package org.apache.poi.xwpf;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
import org.apache.poi.hxf.HXFDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
import org.openxml4j.opc.PackagePart;
|
import org.openxml4j.opc.PackagePart;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
public class TestHWPFXML extends TestCase {
|
public class TestXWPFDocument extends TestCase {
|
||||||
private File sampleFile;
|
private File sampleFile;
|
||||||
private File complexFile;
|
private File complexFile;
|
||||||
|
|
||||||
|
@ -39,14 +39,17 @@ public class TestHWPFXML extends TestCase {
|
||||||
System.getProperty("HWPF.testdata.path") +
|
System.getProperty("HWPF.testdata.path") +
|
||||||
File.separator + "IllustrativeCases.docx"
|
File.separator + "IllustrativeCases.docx"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assertTrue(sampleFile.exists());
|
||||||
|
assertTrue(complexFile.exists());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testContainsMainContentType() throws Exception {
|
public void testContainsMainContentType() throws Exception {
|
||||||
Package pack = HXFDocument.openPackage(sampleFile);
|
Package pack = POIXMLDocument.openPackage(sampleFile.toString());
|
||||||
|
|
||||||
boolean found = false;
|
boolean found = false;
|
||||||
for(PackagePart part : pack.getParts()) {
|
for(PackagePart part : pack.getParts()) {
|
||||||
if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
|
if(part.getContentType().equals(XWPFDocument.MAIN_CONTENT_TYPE)) {
|
||||||
found = true;
|
found = true;
|
||||||
}
|
}
|
||||||
System.out.println(part);
|
System.out.println(part);
|
||||||
|
@ -55,14 +58,21 @@ public class TestHWPFXML extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOpen() throws Exception {
|
public void testOpen() throws Exception {
|
||||||
HXFDocument.openPackage(sampleFile);
|
POIXMLDocument.openPackage(sampleFile.toString());
|
||||||
HXFDocument.openPackage(complexFile);
|
POIXMLDocument.openPackage(complexFile.toString());
|
||||||
|
|
||||||
HWPFXML xml;
|
new XWPFDocument(
|
||||||
|
POIXMLDocument.openPackage(sampleFile.toString())
|
||||||
|
);
|
||||||
|
new XWPFDocument(
|
||||||
|
POIXMLDocument.openPackage(complexFile.toString())
|
||||||
|
);
|
||||||
|
|
||||||
|
XWPFDocument xml;
|
||||||
|
|
||||||
// Simple file
|
// Simple file
|
||||||
xml = new HWPFXML(
|
xml = new XWPFDocument(
|
||||||
HXFDocument.openPackage(sampleFile)
|
POIXMLDocument.openPackage(sampleFile.toString())
|
||||||
);
|
);
|
||||||
// Check it has key parts
|
// Check it has key parts
|
||||||
assertNotNull(xml.getDocument());
|
assertNotNull(xml.getDocument());
|
||||||
|
@ -70,8 +80,8 @@ public class TestHWPFXML extends TestCase {
|
||||||
assertNotNull(xml.getStyle());
|
assertNotNull(xml.getStyle());
|
||||||
|
|
||||||
// Complex file
|
// Complex file
|
||||||
xml = new HWPFXML(
|
xml = new XWPFDocument(
|
||||||
HXFDocument.openPackage(complexFile)
|
POIXMLDocument.openPackage(complexFile.toString())
|
||||||
);
|
);
|
||||||
assertNotNull(xml.getDocument());
|
assertNotNull(xml.getDocument());
|
||||||
assertNotNull(xml.getDocumentBody());
|
assertNotNull(xml.getDocumentBody());
|
||||||
|
@ -79,8 +89,8 @@ public class TestHWPFXML extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMetadataBasics() throws Exception {
|
public void testMetadataBasics() throws Exception {
|
||||||
HWPFXML xml = new HWPFXML(
|
XWPFDocument xml = new XWPFDocument(
|
||||||
HXFDocument.openPackage(sampleFile)
|
POIXMLDocument.openPackage(sampleFile.toString())
|
||||||
);
|
);
|
||||||
assertNotNull(xml.getCoreProperties());
|
assertNotNull(xml.getCoreProperties());
|
||||||
assertNotNull(xml.getExtendedProperties());
|
assertNotNull(xml.getExtendedProperties());
|
||||||
|
@ -94,8 +104,8 @@ public class TestHWPFXML extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMetadataComplex() throws Exception {
|
public void testMetadataComplex() throws Exception {
|
||||||
HWPFXML xml = new HWPFXML(
|
XWPFDocument xml = new XWPFDocument(
|
||||||
HXFDocument.openPackage(complexFile)
|
POIXMLDocument.openPackage(complexFile.toString())
|
||||||
);
|
);
|
||||||
assertNotNull(xml.getCoreProperties());
|
assertNotNull(xml.getCoreProperties());
|
||||||
assertNotNull(xml.getExtendedProperties());
|
assertNotNull(xml.getExtendedProperties());
|
|
@ -14,54 +14,57 @@
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf.extractor;
|
package org.apache.poi.xwpf.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFXML;
|
import org.apache.poi.POIXMLDocument;
|
||||||
import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
|
import org.apache.poi.xwpf.XWPFDocument;
|
||||||
import org.apache.poi.hxf.HXFDocument;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for HXFWordExtractor
|
* Tests for HXFWordExtractor
|
||||||
*/
|
*/
|
||||||
public class TestHXFWordExtractor extends TestCase {
|
public class TestXWPFWordExtractor extends TestCase {
|
||||||
/**
|
/**
|
||||||
* A very simple file
|
* A very simple file
|
||||||
*/
|
*/
|
||||||
private HWPFXML xmlA;
|
private XWPFDocument xmlA;
|
||||||
|
private File fileA;
|
||||||
/**
|
/**
|
||||||
* A fairly complex file
|
* A fairly complex file
|
||||||
*/
|
*/
|
||||||
private HWPFXML xmlB;
|
private XWPFDocument xmlB;
|
||||||
|
private File fileB;
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
||||||
File fileA = new File(
|
fileA = new File(
|
||||||
System.getProperty("HWPF.testdata.path") +
|
System.getProperty("HWPF.testdata.path") +
|
||||||
File.separator + "sample.docx"
|
File.separator + "sample.docx"
|
||||||
);
|
);
|
||||||
File fileB = new File(
|
fileB = new File(
|
||||||
System.getProperty("HWPF.testdata.path") +
|
System.getProperty("HWPF.testdata.path") +
|
||||||
File.separator + "IllustrativeCases.docx"
|
File.separator + "IllustrativeCases.docx"
|
||||||
);
|
);
|
||||||
|
assertTrue(fileA.exists());
|
||||||
|
assertTrue(fileB.exists());
|
||||||
|
|
||||||
xmlA = new HWPFXML(HXFDocument.openPackage(fileA));
|
xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
|
||||||
xmlB = new HWPFXML(HXFDocument.openPackage(fileB));
|
xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get text out of the simple file
|
* Get text out of the simple file
|
||||||
*/
|
*/
|
||||||
public void testGetSimpleText() throws Exception {
|
public void testGetSimpleText() throws Exception {
|
||||||
new HXFWordExtractor(xmlA.getPackage());
|
new XWPFWordExtractor(xmlA);
|
||||||
new HXFWordExtractor(new HWPFXMLDocument(xmlA));
|
new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
|
||||||
|
|
||||||
HXFWordExtractor extractor =
|
XWPFWordExtractor extractor =
|
||||||
new HXFWordExtractor(xmlA.getPackage());
|
new XWPFWordExtractor(xmlA);
|
||||||
extractor.getText();
|
extractor.getText();
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
|
@ -88,8 +91,8 @@ public class TestHXFWordExtractor extends TestCase {
|
||||||
* Tests getting the text out of a complex file
|
* Tests getting the text out of a complex file
|
||||||
*/
|
*/
|
||||||
public void testGetComplexText() throws Exception {
|
public void testGetComplexText() throws Exception {
|
||||||
HXFWordExtractor extractor =
|
XWPFWordExtractor extractor =
|
||||||
new HXFWordExtractor(xmlB.getPackage());
|
new XWPFWordExtractor(xmlB);
|
||||||
extractor.getText();
|
extractor.getText();
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
Loading…
Reference in New Issue