#62886 - Regression extracting text from corrupted docx files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1849252 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2018-12-18 23:55:58 +00:00
parent 5344049a98
commit 6c5a2de640
3 changed files with 17 additions and 3 deletions

View File

@ -20,6 +20,7 @@ package org.apache.poi.openxml4j.util;
import static org.apache.poi.openxml4j.util.ZipSecureFile.MAX_ENTRY_SIZE; import static org.apache.poi.openxml4j.util.ZipSecureFile.MAX_ENTRY_SIZE;
import static org.apache.poi.openxml4j.util.ZipSecureFile.MIN_INFLATE_RATIO; import static org.apache.poi.openxml4j.util.ZipSecureFile.MIN_INFLATE_RATIO;
import java.io.EOFException;
import java.io.FilterInputStream; import java.io.FilterInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -144,6 +145,8 @@ public class ZipArchiveThresholdInputStream extends FilterInputStream {
"No valid entries or contents found, this is not a valid OOXML (Office Open XML) file", ze); "No valid entries or contents found, this is not a valid OOXML (Office Open XML) file", ze);
} }
throw ze; throw ze;
} catch (EOFException e) {
return null;
} }
} }

View File

@ -43,13 +43,24 @@ import org.junit.Test;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument; import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
public class TestXWPFBugs { public class TestXWPFBugs {
private static final POIDataSamples samples = POIDataSamples.getDocumentInstance();
@Test
public void truncatedDocx() throws Exception {
try (InputStream fis = samples.openResourceAsStream("truncated62886.docx");
OPCPackage opc = OPCPackage.open(fis);
XWPFWordExtractor ext = new XWPFWordExtractor(opc)) {
assertNotNull(ext.getText());
}
}
/** /**
* A word document that's encrypted with non-standard * A word document that's encrypted with non-standard
* Encryption options, and no cspname section. See bug 53475 * Encryption options, and no cspname section. See bug 53475
*/ */
@Test @Test
public void bug53475NoCSPName() throws Exception { public void bug53475NoCSPName() throws Exception {
File file = POIDataSamples.getDocumentInstance().getFile("bug53475-password-is-solrcell.docx"); File file = samples.getFile("bug53475-password-is-solrcell.docx");
POIFSFileSystem filesystem = new POIFSFileSystem(file, true); POIFSFileSystem filesystem = new POIFSFileSystem(file, true);
// Check the encryption details // Check the encryption details
@ -84,7 +95,7 @@ public class TestXWPFBugs {
int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES"); int maxKeyLen = Cipher.getMaxAllowedKeyLength("AES");
Assume.assumeTrue("Please install JCE Unlimited Strength Jurisdiction Policy files for AES 256", maxKeyLen == 2147483647); Assume.assumeTrue("Please install JCE Unlimited Strength Jurisdiction Policy files for AES 256", maxKeyLen == 2147483647);
File file = POIDataSamples.getDocumentInstance().getFile("bug53475-password-is-pass.docx"); File file = samples.getFile("bug53475-password-is-pass.docx");
POIFSFileSystem filesystem = new POIFSFileSystem(file, true); POIFSFileSystem filesystem = new POIFSFileSystem(file, true);
// Check the encryption details // Check the encryption details
@ -117,7 +128,7 @@ public class TestXWPFBugs {
public void bug59058() throws IOException, XmlException { public void bug59058() throws IOException, XmlException {
String files[] = { "bug57031.docx", "bug59058.docx" }; String files[] = { "bug57031.docx", "bug59058.docx" };
for (String f : files) { for (String f : files) {
ZipFile zf = new ZipFile(POIDataSamples.getDocumentInstance().getFile(f)); ZipFile zf = new ZipFile(samples.getFile(f));
ZipArchiveEntry entry = zf.getEntry("word/document.xml"); ZipArchiveEntry entry = zf.getEntry("word/document.xml");
DocumentDocument document = DocumentDocument.Factory.parse(zf.getInputStream(entry)); DocumentDocument document = DocumentDocument.Factory.parse(zf.getInputStream(entry));
assertNotNull(document); assertNotNull(document);

Binary file not shown.