mirror of https://github.com/apache/poi.git
Fixed ExtractorFactory to support .xltx and .dotx files, see Bugzilla 47517
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795327 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eda8d9631c
commit
d09ab59ab0
|
@ -94,18 +94,27 @@ public class ExtractorFactory {
|
||||||
if(core.size() != 1) {
|
if(core.size() != 1) {
|
||||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||||
if(corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType())) {
|
if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
|
||||||
return new XSSFExcelExtractor(pkg);
|
corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
|
||||||
}
|
corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
|
||||||
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) {
|
corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
|
||||||
|
corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
|
||||||
|
return new XSSFExcelExtractor(pkg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
|
||||||
|
corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
|
||||||
|
corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
|
||||||
|
corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
|
||||||
return new XWPFWordExtractor(pkg);
|
return new XWPFWordExtractor(pkg);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
|
if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
|
||||||
return new XSLFPowerPointExtractor(pkg);
|
return new XSLFPowerPointExtractor(pkg);
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException("No supported documents found in the OOXML package");
|
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
|
|
|
@ -50,9 +50,11 @@ public class TestExtractorFactory extends TestCase {
|
||||||
|
|
||||||
private File xls;
|
private File xls;
|
||||||
private File xlsx;
|
private File xlsx;
|
||||||
|
private File xltx;
|
||||||
|
|
||||||
private File doc;
|
private File doc;
|
||||||
private File docx;
|
private File docx;
|
||||||
|
private File dotx;
|
||||||
|
|
||||||
private File ppt;
|
private File ppt;
|
||||||
private File pptx;
|
private File pptx;
|
||||||
|
@ -77,10 +79,12 @@ public class TestExtractorFactory extends TestCase {
|
||||||
|
|
||||||
xls = new File(excel_dir, "SampleSS.xls");
|
xls = new File(excel_dir, "SampleSS.xls");
|
||||||
xlsx = new File(excel_dir, "SampleSS.xlsx");
|
xlsx = new File(excel_dir, "SampleSS.xlsx");
|
||||||
|
xltx = new File(excel_dir, "test.xltx");
|
||||||
|
|
||||||
doc = new File(word_dir, "SampleDoc.doc");
|
doc = new File(word_dir, "SampleDoc.doc");
|
||||||
docx = new File(word_dir, "SampleDoc.docx");
|
docx = new File(word_dir, "SampleDoc.docx");
|
||||||
|
dotx = new File(word_dir, "test.dotx");
|
||||||
|
|
||||||
ppt = new File(powerpoint_dir, "SampleShow.ppt");
|
ppt = new File(powerpoint_dir, "SampleShow.ppt");
|
||||||
pptx = new File(powerpoint_dir, "SampleShow.pptx");
|
pptx = new File(powerpoint_dir, "SampleShow.pptx");
|
||||||
|
|
||||||
|
@ -104,6 +108,15 @@ public class TestExtractorFactory extends TestCase {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
ExtractorFactory.createExtractor(xlsx).getText().length() > 200
|
ExtractorFactory.createExtractor(xlsx).getText().length() > 200
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(xltx)
|
||||||
|
instanceof XSSFExcelExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(xltx).getText().contains("test")
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
// Word
|
// Word
|
||||||
assertTrue(
|
assertTrue(
|
||||||
|
@ -121,7 +134,15 @@ public class TestExtractorFactory extends TestCase {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
ExtractorFactory.createExtractor(docx).getText().length() > 120
|
ExtractorFactory.createExtractor(docx).getText().length() > 120
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(dotx)
|
||||||
|
instanceof XWPFWordExtractor
|
||||||
|
);
|
||||||
|
assertTrue(
|
||||||
|
ExtractorFactory.createExtractor(dotx).getText().contains("Test")
|
||||||
|
);
|
||||||
|
|
||||||
// PowerPoint
|
// PowerPoint
|
||||||
assertTrue(
|
assertTrue(
|
||||||
ExtractorFactory.createExtractor(ppt)
|
ExtractorFactory.createExtractor(ppt)
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue