mirror of https://github.com/apache/poi.git
Give a more helpful exception if a Visio VSDX ooxml file is passed to ExtractorFactory
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1665929 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7ade0d4617
commit
47a2847cbe
|
@ -68,6 +68,8 @@ import org.apache.xmlbeans.XmlException;
|
||||||
public class ExtractorFactory {
|
public class ExtractorFactory {
|
||||||
public static final String CORE_DOCUMENT_REL =
|
public static final String CORE_DOCUMENT_REL =
|
||||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
|
||||||
|
public static final String VISIO_DOCUMENT_REL =
|
||||||
|
"http://schemas.microsoft.com/visio/2010/relationships/document";
|
||||||
|
|
||||||
|
|
||||||
/** Should this thread prefer event based over usermodel based extractors? */
|
/** Should this thread prefer event based over usermodel based extractors? */
|
||||||
|
@ -158,12 +160,25 @@ public class ExtractorFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
||||||
|
// Check for the normal Office core document
|
||||||
PackageRelationshipCollection core =
|
PackageRelationshipCollection core =
|
||||||
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||||
|
|
||||||
|
// If nothing was found, try some of the other OOXML-based core types
|
||||||
|
if (core.size() == 0) {
|
||||||
|
// Could it be a visio one?
|
||||||
|
PackageRelationshipCollection visio =
|
||||||
|
pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
|
||||||
|
if (visio.size() == 1) {
|
||||||
|
throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Should just be a single core document, complain if not
|
||||||
if (core.size() != 1) {
|
if (core.size() != 1) {
|
||||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Grab the core document part, and try to identify from that
|
||||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||||
|
|
||||||
// Is it XSSF?
|
// Is it XSSF?
|
||||||
|
|
|
@ -71,6 +71,7 @@ public class TestExtractorFactory extends TestCase {
|
||||||
private File msgEmbMsg;
|
private File msgEmbMsg;
|
||||||
|
|
||||||
private File vsd;
|
private File vsd;
|
||||||
|
private File vsdx;
|
||||||
|
|
||||||
private File pub;
|
private File pub;
|
||||||
|
|
||||||
|
@ -109,6 +110,7 @@ public class TestExtractorFactory extends TestCase {
|
||||||
|
|
||||||
POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
|
POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
|
||||||
vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
|
vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
|
||||||
|
vsdx = getFileAndCheck(dgTests, "test.vsdx");
|
||||||
|
|
||||||
POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
|
POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
|
||||||
pub = getFileAndCheck(pubTests, "Simple.pub");
|
pub = getFileAndCheck(pubTests, "Simple.pub");
|
||||||
|
@ -230,7 +232,7 @@ public class TestExtractorFactory extends TestCase {
|
||||||
);
|
);
|
||||||
extractor.close();
|
extractor.close();
|
||||||
|
|
||||||
// Visio
|
// Visio - binary
|
||||||
assertTrue(
|
assertTrue(
|
||||||
ExtractorFactory.createExtractor(vsd)
|
ExtractorFactory.createExtractor(vsd)
|
||||||
instanceof VisioTextExtractor
|
instanceof VisioTextExtractor
|
||||||
|
@ -238,6 +240,13 @@ public class TestExtractorFactory extends TestCase {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
ExtractorFactory.createExtractor(vsd).getText().length() > 50
|
ExtractorFactory.createExtractor(vsd).getText().length() > 50
|
||||||
);
|
);
|
||||||
|
// Visio - vsdx
|
||||||
|
try {
|
||||||
|
ExtractorFactory.createExtractor(vsdx);
|
||||||
|
fail();
|
||||||
|
} catch(IllegalArgumentException e) {
|
||||||
|
// Good
|
||||||
|
}
|
||||||
|
|
||||||
// Publisher
|
// Publisher
|
||||||
assertTrue(
|
assertTrue(
|
||||||
|
@ -342,6 +351,13 @@ public class TestExtractorFactory extends TestCase {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
|
ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
|
||||||
);
|
);
|
||||||
|
// Visio - vsdx
|
||||||
|
try {
|
||||||
|
ExtractorFactory.createExtractor(new FileInputStream(vsdx));
|
||||||
|
fail();
|
||||||
|
} catch(IllegalArgumentException e) {
|
||||||
|
// Good
|
||||||
|
}
|
||||||
|
|
||||||
// Publisher
|
// Publisher
|
||||||
assertTrue(
|
assertTrue(
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue