Fixed ExtractorFactory to support .xltx and .dotx files, see Bugzilla 47517

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795327 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-07-18 09:09:11 +00:00
parent eda8d9631c
commit d09ab59ab0
4 changed files with 41 additions and 11 deletions

View File

@ -94,18 +94,27 @@ public class ExtractorFactory {
if(core.size() != 1) { if(core.size() != 1) {
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
} }
PackagePart corePart = pkg.getPart(core.getRelationship(0)); PackagePart corePart = pkg.getPart(core.getRelationship(0));
if(corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType())) { if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
return new XSSFExcelExtractor(pkg); corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
} corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) { corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
return new XSSFExcelExtractor(pkg);
}
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
return new XWPFWordExtractor(pkg); return new XWPFWordExtractor(pkg);
} }
if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) { if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
return new XSLFPowerPointExtractor(pkg); return new XSLFPowerPointExtractor(pkg);
} }
throw new IllegalArgumentException("No supported documents found in the OOXML package"); throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
} }
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException { public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {

View File

@ -50,9 +50,11 @@ public class TestExtractorFactory extends TestCase {
private File xls; private File xls;
private File xlsx; private File xlsx;
private File xltx;
private File doc; private File doc;
private File docx; private File docx;
private File dotx;
private File ppt; private File ppt;
private File pptx; private File pptx;
@ -77,10 +79,12 @@ public class TestExtractorFactory extends TestCase {
xls = new File(excel_dir, "SampleSS.xls"); xls = new File(excel_dir, "SampleSS.xls");
xlsx = new File(excel_dir, "SampleSS.xlsx"); xlsx = new File(excel_dir, "SampleSS.xlsx");
xltx = new File(excel_dir, "test.xltx");
doc = new File(word_dir, "SampleDoc.doc"); doc = new File(word_dir, "SampleDoc.doc");
docx = new File(word_dir, "SampleDoc.docx"); docx = new File(word_dir, "SampleDoc.docx");
dotx = new File(word_dir, "test.dotx");
ppt = new File(powerpoint_dir, "SampleShow.ppt"); ppt = new File(powerpoint_dir, "SampleShow.ppt");
pptx = new File(powerpoint_dir, "SampleShow.pptx"); pptx = new File(powerpoint_dir, "SampleShow.pptx");
@ -104,6 +108,15 @@ public class TestExtractorFactory extends TestCase {
assertTrue( assertTrue(
ExtractorFactory.createExtractor(xlsx).getText().length() > 200 ExtractorFactory.createExtractor(xlsx).getText().length() > 200
); );
assertTrue(
ExtractorFactory.createExtractor(xltx)
instanceof XSSFExcelExtractor
);
assertTrue(
ExtractorFactory.createExtractor(xltx).getText().contains("test")
);
// Word // Word
assertTrue( assertTrue(
@ -121,7 +134,15 @@ public class TestExtractorFactory extends TestCase {
assertTrue( assertTrue(
ExtractorFactory.createExtractor(docx).getText().length() > 120 ExtractorFactory.createExtractor(docx).getText().length() > 120
); );
assertTrue(
ExtractorFactory.createExtractor(dotx)
instanceof XWPFWordExtractor
);
assertTrue(
ExtractorFactory.createExtractor(dotx).getText().contains("Test")
);
// PowerPoint // PowerPoint
assertTrue( assertTrue(
ExtractorFactory.createExtractor(ppt) ExtractorFactory.createExtractor(ppt)

Binary file not shown.

Binary file not shown.