Add some more tests for the checks for files that can cause large memory usage.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1713217 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2015-11-08 10:00:34 +00:00
parent e15a3096d5
commit 013583a8a7
5 changed files with 120 additions and 2 deletions

View File

@ -234,6 +234,8 @@ public class TestAllFiles {
EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb"); EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb");
EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted
EXPECTED_FAILURES.add("poifs/only-zero-byte-streams.ole2"); // No actual contents EXPECTED_FAILURES.add("poifs/only-zero-byte-streams.ole2"); // No actual contents
EXPECTED_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx"); // contains xml-entity-expansion
EXPECTED_FAILURES.add("spreadsheet/poc-shared-strings.xlsx"); // contains shared-string-entity-expansion
// old Excel files, which we only support simple text extraction of // old Excel files, which we only support simple text extraction of
EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls");

View File

@ -76,6 +76,11 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
sheet.setColumnGroupCollapsed(4, true); sheet.setColumnGroupCollapsed(4, true);
sheet.setColumnGroupCollapsed(4, false); sheet.setColumnGroupCollapsed(4, false);
// don't do this for very large sheets as it will take a long time
if(sheet.getPhysicalNumberOfRows() > 1000) {
continue;
}
for(Row row : sheet) { for(Row row : sheet) {
for(Cell cell : row) { for(Cell cell : row) {
cell.toString(); cell.toString();

View File

@ -17,12 +17,29 @@
package org.apache.poi.openxml4j.opc; package org.apache.poi.openxml4j.opc;
import static org.junit.Assert.*; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLException;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.poi.xwpf.usermodel.XWPFRelation; import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.apache.xmlbeans.XmlException;
import org.junit.Test; import org.junit.Test;
public class TestZipPackage { public class TestZipPackage {
@ -51,4 +68,98 @@ public class TestZipPackage {
assertFalse("Document should not be found in " + p.getParts(), foundDocument); assertFalse("Document should not be found in " + p.getParts(), foundDocument);
assertFalse("Theme1 should not found in " + p.getParts(), foundTheme1); assertFalse("Theme1 should not found in " + p.getParts(), foundTheme1);
} }
@Test
public void testZipEntityExpansionTerminates() throws IOException {
try {
Workbook wb = XSSFTestDataSamples.openSampleWorkbook("poc-xmlbomb.xlsx");
wb.close();
fail("Should catch exception due to entity expansion limitations");
} catch (POIXMLException e) {
assertEntityLimitReached(e);
}
}
private void assertEntityLimitReached(Exception e) throws UnsupportedEncodingException {
ByteArrayOutputStream str = new ByteArrayOutputStream();
PrintWriter writer = new PrintWriter(new OutputStreamWriter(str, "UTF-8"));
try {
e.printStackTrace(writer);
} finally {
writer.close();
}
String string = new String(str.toByteArray(), "UTF-8");
assertTrue("Had: " + string, string.contains("Exceeded Entity dereference bytes limit"));
}
@Test
public void testZipEntityExpansionExceedsMemory() throws Exception {
try {
Workbook wb = WorkbookFactory.create(XSSFTestDataSamples.openSamplePackage("poc-xmlbomb.xlsx"));
wb.close();
fail("Should catch exception due to entity expansion limitations");
} catch (POIXMLException e) {
assertEntityLimitReached(e);
}
try {
POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-xmlbomb.xlsx"));
try {
assertNotNull(extractor);
try {
extractor.getText();
} catch (IllegalStateException e) {
// expected due to shared strings expansion
}
} finally {
extractor.close();
}
} catch (POIXMLException e) {
assertEntityLimitReached(e);
}
}
@Test
public void testZipEntityExpansionSharedStringTable() throws Exception {
Workbook wb = WorkbookFactory.create(XSSFTestDataSamples.openSamplePackage("poc-shared-strings.xlsx"));
wb.close();
POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"));
try {
assertNotNull(extractor);
try {
extractor.getText();
} catch (IllegalStateException e) {
// expected due to shared strings expansion
}
} finally {
extractor.close();
}
}
@Test
public void testZipEntityExpansionSharedStringTableEvents() throws Exception {
boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
ExtractorFactory.setThreadPrefersEventExtractors(true);
try {
POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("poc-shared-strings.xlsx"));
try {
assertNotNull(extractor);
try {
extractor.getText();
} catch (IllegalStateException e) {
// expected due to shared strings expansion
}
} finally {
extractor.close();
}
} catch (XmlException e) {
assertEntityLimitReached(e);
} finally {
ExtractorFactory.setThreadPrefersEventExtractors(before);
}
}
} }

Binary file not shown.

Binary file not shown.