diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 3ee469d1b9..db98f2e905 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -55,6 +55,7 @@ import org.apache.poi.util.IOUtils; import org.apache.poi.util.NotImplemented; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; +import org.apache.poi.util.Removal; import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xslf.usermodel.XSLFRelation; @@ -136,42 +137,20 @@ public class ExtractorFactory { POIOLE2TextExtractor extractor = createExtractor(fs); extractor.setFilesystem(fs); return extractor; - } catch (OfficeXmlFileException e) { // ensure file-handle release IOUtils.closeQuietly(fs); return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ)); - } catch (NotOLE2FileException ne) { // ensure file-handle release IOUtils.closeQuietly(fs); throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); - - } catch (OpenXML4JException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw e; - - } catch (XmlException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw e; - - } catch (IOException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw e; - - } catch (RuntimeException e) { - // ensure file-handle release - IOUtils.closeQuietly(fs); - throw e; - } catch (Error e) { + } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // ensure file-handle release IOUtils.closeQuietly(fs); throw e; } - } + } public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { InputStream is = FileMagic.prepareToCheckMagic(inp); @@ -265,27 +244,7 @@ public class ExtractorFactory { throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); - } catch (IOException e) { - // ensure that we close the package again if there is an error opening it, however - // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! - pkg.revert(); - throw e; - } catch (OpenXML4JException e) { - // ensure that we close the package again if there is an error opening it, however - // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! - pkg.revert(); - throw e; - } catch (XmlException e) { - // ensure that we close the package again if there is an error opening it, however - // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! - pkg.revert(); - throw e; - } catch (RuntimeException e) { - // ensure that we close the package again if there is an error opening it, however - // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! - pkg.revert(); - throw e; - } catch (Error e) { + } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) { // ensure that we close the package again if there is an error opening it, however // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! pkg.revert(); @@ -323,8 +282,23 @@ public class ExtractorFactory { * If there are no embedded documents, you'll get back an * empty array. Otherwise, you'll get one open * {@link POITextExtractor} for each embedded file. + * + * @deprecated Use the method with correct "embedded" */ + @Deprecated + @Removal(version="4.2") public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { + return getEmbeddedDocsTextExtractors(ext); + } + + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + */ + public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException { // All the embedded directories we spotted ArrayList dirs = new ArrayList<>(); // For anything else not directly held in as a POIFS directory @@ -392,15 +366,30 @@ public class ExtractorFactory { // Ignore, just means it didn't contain // a format we support as yet logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage()); - } catch (XmlException e) { - throw new IOException(e.getMessage(), e); - } catch (OpenXML4JException e) { + } catch (XmlException | OpenXML4JException e) { throw new IOException(e.getMessage(), e); } } return textExtractors.toArray(new POITextExtractor[textExtractors.size()]); } + /** + * Returns an array of text extractors, one for each of + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an + * empty array. Otherwise, you'll get one open + * {@link POITextExtractor} for each embedded file. + * + * @deprecated Use the method with correct "embedded" + */ + @Deprecated + @Removal(version="4.2") + @NotImplemented + @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) + public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) { + return getEmbeddedDocsTextExtractors(ext); + } + /** * Returns an array of text extractors, one for each of * the embedded documents in the file (if there are any). @@ -409,8 +398,8 @@ public class ExtractorFactory { * {@link POITextExtractor} for each embedded file. */ @NotImplemented - @SuppressWarnings("UnusedParameters") - public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) { + @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"}) + public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) { throw new IllegalStateException("Not yet supported"); } diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index fb378df0dd..345762b371 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -48,14 +48,12 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.util.IOUtils; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; import org.apache.poi.xssf.extractor.XSSFExcelExtractor; -import org.apache.poi.xssf.usermodel.TestMatrixFormulasFromXMLSpreadsheet; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.junit.BeforeClass; import org.junit.Test; @@ -779,26 +777,40 @@ public class TestExtractorFactory { } /** - * Test embeded docs text extraction. For now, only - * does poifs embeded, but will do ooxml ones + * Test embedded docs text extraction. For now, only + * does poifs embedded, but will do ooxml ones * at some point. */ + @SuppressWarnings("deprecation") @Test - public void testEmbeded() throws Exception { + public void testEmbedded() throws Exception { POIOLE2TextExtractor ext; POITextExtractor[] embeds; - // No embedings + // No embeddings ext = (POIOLE2TextExtractor) ExtractorFactory.createExtractor(xls); embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext); assertEquals(0, embeds.length); ext.close(); + // No embeddings + ext = (POIOLE2TextExtractor) + ExtractorFactory.createExtractor(xls); + embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext); + assertEquals(0, embeds.length); + ext.close(); + // Excel ext = (POIOLE2TextExtractor) ExtractorFactory.createExtractor(xlsEmb); embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext); + assertNotNull(embeds); + + // Excel + ext = (POIOLE2TextExtractor) + ExtractorFactory.createExtractor(xlsEmb); + embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext); assertEquals(6, embeds.length); int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX; @@ -1016,6 +1028,7 @@ public class TestExtractorFactory { } } + @SuppressWarnings("deprecation") @Test public void testGetEmbeddedFromXMLExtractor() { try { @@ -1025,6 +1038,14 @@ public class TestExtractorFactory { } catch (IllegalStateException e) { // expected here } + + try { + // currently not implemented + ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null); + fail("Unsupported currently"); + } catch (IllegalStateException e) { + // expected here + } } // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed. @@ -1032,13 +1053,10 @@ public class TestExtractorFactory { // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor @Test(expected=AssertionError.class) public void test45565() throws Exception { - POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls")); - try { + try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) { String text = extractor.getText(); assertContains(text, "testdoc"); assertContains(text, "test phrase"); - } finally { - extractor.close(); } } }