mirror of https://github.com/apache/poi.git
Fixed ExtractorFactory to support .xltx and .dotx files, see Bugzilla 47517
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795327 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eda8d9631c
commit
d09ab59ab0
|
@ -96,16 +96,25 @@ public class ExtractorFactory {
|
|||
}
|
||||
|
||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||
if(corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType())) {
|
||||
if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
|
||||
return new XSSFExcelExtractor(pkg);
|
||||
}
|
||||
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) {
|
||||
|
||||
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
|
||||
return new XWPFWordExtractor(pkg);
|
||||
}
|
||||
|
||||
if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
|
||||
return new XSLFPowerPointExtractor(pkg);
|
||||
}
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package");
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
||||
}
|
||||
|
||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
||||
|
|
|
@ -50,9 +50,11 @@ public class TestExtractorFactory extends TestCase {
|
|||
|
||||
private File xls;
|
||||
private File xlsx;
|
||||
private File xltx;
|
||||
|
||||
private File doc;
|
||||
private File docx;
|
||||
private File dotx;
|
||||
|
||||
private File ppt;
|
||||
private File pptx;
|
||||
|
@ -77,9 +79,11 @@ public class TestExtractorFactory extends TestCase {
|
|||
|
||||
xls = new File(excel_dir, "SampleSS.xls");
|
||||
xlsx = new File(excel_dir, "SampleSS.xlsx");
|
||||
xltx = new File(excel_dir, "test.xltx");
|
||||
|
||||
doc = new File(word_dir, "SampleDoc.doc");
|
||||
docx = new File(word_dir, "SampleDoc.docx");
|
||||
dotx = new File(word_dir, "test.dotx");
|
||||
|
||||
ppt = new File(powerpoint_dir, "SampleShow.ppt");
|
||||
pptx = new File(powerpoint_dir, "SampleShow.pptx");
|
||||
|
@ -105,6 +109,15 @@ public class TestExtractorFactory extends TestCase {
|
|||
ExtractorFactory.createExtractor(xlsx).getText().length() > 200
|
||||
);
|
||||
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(xltx)
|
||||
instanceof XSSFExcelExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(xltx).getText().contains("test")
|
||||
);
|
||||
|
||||
|
||||
// Word
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(doc)
|
||||
|
@ -122,6 +135,14 @@ public class TestExtractorFactory extends TestCase {
|
|||
ExtractorFactory.createExtractor(docx).getText().length() > 120
|
||||
);
|
||||
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(dotx)
|
||||
instanceof XWPFWordExtractor
|
||||
);
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(dotx).getText().contains("Test")
|
||||
);
|
||||
|
||||
// PowerPoint
|
||||
assertTrue(
|
||||
ExtractorFactory.createExtractor(ppt)
|
||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue