#65046 - Simplify integration tests

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1885538 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2021-01-15 23:50:42 +00:00
parent ca71e9dacf
commit 309e657b0d
14 changed files with 397 additions and 721 deletions

View File

@ -1277,6 +1277,7 @@ under the License.
<!-- jvmarg value="-Duser.timezone=UTC"/ --> <!-- jvmarg value="-Duser.timezone=UTC"/ -->
<jvmarg value="${file.leak.detector}" /> <jvmarg value="${file.leak.detector}" />
<jvmarg value="-Djunit.jupiter.execution.parallel.enabled=true" />
<jvmarg value="-Xjit:verbose={compileStart|compileEnd},vlog=build/jit.log${no.jit.sherlock}" if:true="${isIBMVM}"/> <jvmarg value="-Xjit:verbose={compileStart|compileEnd},vlog=build/jit.log${no.jit.sherlock}" if:true="${isIBMVM}"/>
<modulepath refid="@{modulepath-ref}" unless:true="${isJava8}"/> <modulepath refid="@{modulepath-ref}" unless:true="${isJava8}"/>
@ -1285,6 +1286,10 @@ under the License.
<jvmarg line="--add-modules org.junit.jupiter.params" unless:true="${isJava8}"/> <jvmarg line="--add-modules org.junit.jupiter.params" unless:true="${isJava8}"/>
<jvmarg line="--add-modules org.apache.poi.@{module1}" if:set="use_module1"/> <jvmarg line="--add-modules org.apache.poi.@{module1}" if:set="use_module1"/>
<jvmarg line="--add-modules org.apache.poi.@{module2}" if:set="use_module2"/> <jvmarg line="--add-modules org.apache.poi.@{module2}" if:set="use_module2"/>
<!-- mute some notorious talkative classes -->
<jvmarg line="-Dorg.slf4j.simpleLogger.log.org.apache.poi.hdgf.chunks.Chunk=off"/>
<jvmarg line="-Dorg.slf4j.simpleLogger.log.org.apache.poi=error"/>
</fork> </fork>
<!-- can't use resultfile="status-as-tests-run.txt" here ... it's truncated with every test --> <!-- can't use resultfile="status-as-tests-run.txt" here ... it's truncated with every test -->

View File

@ -22,6 +22,7 @@ import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.hpsf.DocumentSummaryInformation;
@ -63,6 +64,8 @@ import org.apache.poi.util.TempFile;
public final class CopyCompare { public final class CopyCompare {
private CopyCompare() {} private CopyCompare() {}
private static final ThreadLocal<PrintStream> out = ThreadLocal.withInitial(() -> System.out);
/** /**
* Runs the example program. The application expects one or two arguments: * Runs the example program. The application expects one or two arguments:
* *
@ -117,10 +120,14 @@ public final class CopyCompare {
POIFSFileSystem cpfs = new POIFSFileSystem(new File(copyFileName))) { POIFSFileSystem cpfs = new POIFSFileSystem(new File(copyFileName))) {
final DirectoryEntry oRoot = opfs.getRoot(); final DirectoryEntry oRoot = opfs.getRoot();
final DirectoryEntry cRoot = cpfs.getRoot(); final DirectoryEntry cRoot = cpfs.getRoot();
System.out.println(EntryUtils.areDirectoriesIdentical(oRoot, cRoot) ? "Equal" : "Not equal"); out.get().println(EntryUtils.areDirectoriesIdentical(oRoot, cRoot) ? "Equal" : "Not equal");
} }
} }
public static void setOut(PrintStream ps) {
out.set(ps);
}
private interface InputStreamSupplier { private interface InputStreamSupplier {
InputStream get() throws IOException, WritingNotSupportedException; InputStream get() throws IOException, WritingNotSupportedException;
} }

View File

@ -34,6 +34,7 @@ import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.extractor.POITextExtractor; import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor; import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.ss.extractor.ExcelExtractor; import org.apache.poi.ss.extractor.ExcelExtractor;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
@ -132,6 +133,17 @@ public abstract class AbstractFileHandler implements FileHandler {
text = extractor.getText(); text = extractor.getText();
assertNotNull(text); assertNotNull(text);
} }
} catch (IOException | POIXMLException e) {
Exception prevE = e;
Throwable cause;
while ((cause = prevE.getCause()) instanceof Exception) {
if (cause instanceof IOException || cause instanceof POIXMLException) {
prevE = (Exception)cause;
} else {
throw (Exception)cause;
}
}
throw e;
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) { if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) {
throw e; throw e;

View File

@ -50,7 +50,7 @@ public class BaseIntegrationTest {
} }
void test() throws Exception { void test() throws Exception {
assertNotNull( handler, "Unknown file extension for file: " + file + ": " + TestAllFiles.getExtension(file) ); assertNotNull( handler, "Unknown file extension for file: " + file );
testOneFile(new File(rootDir, file)); testOneFile(new File(rootDir, file));
} }
@ -123,29 +123,29 @@ public class BaseIntegrationTest {
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
if(message != null && (message.equals("The document is really a XLS file"))) { if(message != null && (message.equals("The document is really a XLS file"))) {
handler = TestAllFiles.HANDLERS.get(".xls"); handler = new HSSFFileHandler();
} else if(message != null && (message.equals("The document is really a PPT file"))) { } else if(message != null && (message.equals("The document is really a PPT file"))) {
handler = TestAllFiles.HANDLERS.get(".ppt"); handler = new HSLFFileHandler();
} else if(message != null && (message.equals("The document is really a DOC file"))) { } else if(message != null && (message.equals("The document is really a DOC file"))) {
handler = TestAllFiles.HANDLERS.get(".doc"); handler = new HWPFFileHandler();
} else if(message != null && (message.equals("The document is really a VSD file"))) { } else if(message != null && (message.equals("The document is really a VSD file"))) {
handler = TestAllFiles.HANDLERS.get(".vsd"); handler = new HDGFFileHandler();
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
} else if (handler instanceof HWPFFileHandler) { } else if (handler instanceof HWPFFileHandler) {
handler = TestAllFiles.HANDLERS.get(".docx"); handler = new XWPFFileHandler();
} else if (handler instanceof HSSFFileHandler) { } else if (handler instanceof HSSFFileHandler) {
handler = TestAllFiles.HANDLERS.get(".xlsx"); handler = new XSSFFileHandler();
} else if (handler instanceof HSLFFileHandler) { } else if (handler instanceof HSLFFileHandler) {
handler = TestAllFiles.HANDLERS.get(".pptx"); handler = new XSLFFileHandler();
// and the other way around, use HWPF instead of XWPF and so forth // and the other way around, use HWPF instead of XWPF and so forth
} else if(handler instanceof XWPFFileHandler) { } else if(handler instanceof XWPFFileHandler) {
handler = TestAllFiles.HANDLERS.get(".doc"); handler = new HWPFFileHandler();
} else if(handler instanceof XSSFFileHandler) { } else if(handler instanceof XSSFFileHandler) {
handler = TestAllFiles.HANDLERS.get(".xls"); handler = new HSSFFileHandler();
} else if(handler instanceof XSLFFileHandler) { } else if(handler instanceof XSLFFileHandler) {
handler = TestAllFiles.HANDLERS.get(".ppt"); handler = new HSLFFileHandler();
} else { } else {
// nothing matched => throw the exception to the outside // nothing matched => throw the exception to the outside
throw e; throw e;

View File

@ -18,6 +18,7 @@ package org.apache.poi.stress;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeFalse; import static org.junit.jupiter.api.Assumptions.assumeFalse;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
@ -46,7 +47,7 @@ import org.junit.jupiter.api.Test;
class HPSFFileHandler extends POIFSFileHandler { class HPSFFileHandler extends POIFSFileHandler {
private static final String NL = System.getProperty("line.separator"); private static final String NL = System.getProperty("line.separator");
private static File copyOutput; private static final ThreadLocal<File> copyOutput = ThreadLocal.withInitial(HPSFFileHandler::getTempFile);
static final Set<String> EXCLUDES_HANDLE_ADD = unmodifiableHashSet( static final Set<String> EXCLUDES_HANDLE_ADD = unmodifiableHashSet(
"spreadsheet/45290.xls", "spreadsheet/45290.xls",
@ -58,12 +59,6 @@ class HPSFFileHandler extends POIFSFileHandler {
"document/word2.doc" "document/word2.doc"
); );
static final Set<String> EXCLUDES_HANDLE_FILE = unmodifiableHashSet(
"hpsf/Test_Humor-Generation.ppt",
"slideshow/missing-moveto.ppt" // POIFS properties corrupted
);
private static Set<String> unmodifiableHashSet(String... a) { private static Set<String> unmodifiableHashSet(String... a) {
return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(a))); return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(a)));
} }
@ -71,7 +66,6 @@ class HPSFFileHandler extends POIFSFileHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
assumeFalse(EXCLUDES_HANDLE_FILE.contains(path));
POIFSFileSystem poifs = new POIFSFileSystem(stream); POIFSFileSystem poifs = new POIFSFileSystem(stream);
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs); HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation(); DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
@ -95,24 +89,26 @@ class HPSFFileHandler extends POIFSFileHandler {
} }
} }
private static File getTempFile() {
File f = null;
try {
f = TempFile.createTempFile("hpsfCopy", "out");
} catch (IOException e) {
fail(e);
}
f.deleteOnExit();
return f;
}
@Override @Override
public void handleAdditional(File file) throws Exception { public void handleAdditional(File file) throws Exception {
assumeFalse(EXCLUDES_HANDLE_ADD.contains(file.getParentFile().getName()+"/"+file.getName())); assumeFalse(EXCLUDES_HANDLE_ADD.contains(file.getParentFile().getName()+"/"+file.getName()));
if (copyOutput == null) {
copyOutput = TempFile.createTempFile("hpsfCopy", "out");
copyOutput.deleteOnExit();
}
ByteArrayOutputStream bos = new ByteArrayOutputStream(); ByteArrayOutputStream bos = new ByteArrayOutputStream();
PrintStream psNew = new PrintStream(bos, true, "ISO-8859-1"); PrintStream psNew = new PrintStream(bos, true, "ISO-8859-1");
PrintStream ps = System.out; CopyCompare.setOut(psNew);
try { CopyCompare.main(new String[]{file.getAbsolutePath(), copyOutput.get().getAbsolutePath()});
System.setOut(psNew);
CopyCompare.main(new String[]{file.getAbsolutePath(), copyOutput.getAbsolutePath()});
assertEquals("Equal" + NL, bos.toString(StandardCharsets.UTF_8.name())); assertEquals("Equal" + NL, bos.toString(StandardCharsets.UTF_8.name()));
} finally {
System.setOut(ps);
}
} }

View File

@ -1,75 +0,0 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.stress;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.junit.jupiter.api.Test;
/**
* Needs to be implemented in this package to have access to
* HSSFWorkbook.getWorkbook()
*/
class HSSFRecordsStresser {
public static void handleWorkbook(HSSFWorkbook wb) {
List<org.apache.poi.hssf.record.Record> records = wb.getWorkbook().getRecords();
for(org.apache.poi.hssf.record.Record record : records) {
// some Records do not implement clone ?!
// equals instead of instanceof is on purpose here to only skip exactly this class and not any derived ones
// if(record.getClass().equals(InterfaceHdrRecord.class) ||
// record.getClass().equals(MMSRecord.class) ||
// record.getClass().equals(InterfaceEndRecord.class) ||
// record.getClass().equals(WriteAccessRecord.class) ||
// record.getClass().equals(CodepageRecord.class) ||
// record.getClass().equals(DSFRecord.class)) {
// continue;
// }
try {
Record newRecord = record.copy();
assertEquals( record.getClass(), newRecord.getClass(), "Expecting the same class back from clone(), but had Record of type " + record.getClass() + " and got back a " + newRecord.getClass() + " from clone()" );
byte[] origBytes = record.serialize();
byte[] newBytes = newRecord.serialize();
assertArrayEquals( origBytes, newBytes, "Record of type " + record.getClass() + " should return the same byte array via the clone() method, but did return a different array" );
} catch (RuntimeException e) {
// some Records do not implement clone, ignore those for now
assertTrue(e.getMessage().contains("needs to define a clone method"));
}
}
}
// a test-case to test this locally without executing the full TestAllFiles
@Test
void test() throws Exception {
try (InputStream stream = new FileInputStream(HSSFTestDataSamples.getSampleFile("15556.xls"))) {
HSSFWorkbook wb = new HSSFWorkbook(stream);
handleWorkbook(wb);
wb.close();
}
}
}

View File

@ -0,0 +1,65 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.stress;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.jupiter.api.Test;
public class OWPFFileHandler extends POIFSFileHandler {
@Override
public void handleFile(InputStream stream, String path) throws Exception {
try (POIFSFileSystem poifs = new POIFSFileSystem(stream)) {
HWPFOldDocument doc = new HWPFOldDocument(poifs);
assertNotNull(doc.getOldFontTable());
assertNotNull(doc.getCharacterTable());
}
}
// a test-case to test this locally without executing the full TestAllFiles
@Override
@Test
@SuppressWarnings("java:S2699")
public void test() throws Exception {
File file = new File("test-data/document/52117.doc");
try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath());
}
handleExtracting(file);
try (FileInputStream stream = new FileInputStream(file);
WordExtractor extractor = new WordExtractor(stream)) {
assertNotNull(extractor.getText());
}
}
@Test
public void testExtractingOld() {
File file = new File("test-data/document/52117.doc");
assertDoesNotThrow(() -> handleExtracting(file));
}
}

View File

@ -1,222 +0,0 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.stress;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.util.SuppressForbidden;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.tools.ant.DirectoryScanner;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
/**
* Helper class to scan a folder for files and return a collection of
* found files together with the matching {@link FileHandler}.
*
* Can also be used to get the appropriate FileHandler for a single file.
*/
class POIFileScanner {
private final static File ROOT_DIR;
static {
// when running in Gradle, current directory might be "build/integrationtest"
if(new File("../../test-data").exists()) {
ROOT_DIR = new File("../../test-data");
} else {
ROOT_DIR = new File("test-data");
}
}
/**
* Scan a folder for files and return a collection of
* found files together with the matching {@link FileHandler}.
*
* Note: unknown files will be assigned to {@link TestAllFiles.NullFileHandler}
*
* @param rootDir The directory to scan
* @return A collection with file-FileHandler pairs which can be used for running tests on that file
* @throws IOException If determining the file-type fails
*/
public static Collection<Map.Entry<String, FileHandler>> scan(File rootDir) throws IOException {
DirectoryScanner scanner = new DirectoryScanner();
scanner.setBasedir(rootDir);
scanner.setExcludes(TestAllFiles.SCAN_EXCLUDES);
System.out.println("Scanning for files in " + rootDir);
scanner.scan();
String[] includedFiles = scanner.getIncludedFiles();
System.out.println("Handling " + includedFiles.length + " files");
List<Map.Entry<String, FileHandler>> files = new ArrayList<>();
for(String file : includedFiles) {
// breaks files with slash in their name on Linux:
// file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise
FileHandler fileHandler = getFileHandler(rootDir, file);
files.add(new AbstractMap.SimpleImmutableEntry<>(file, fileHandler));
if(files.size() % 100 == 0) {
System.out.print(".");
if(files.size() % 100_000 == 0) {
System.out.println(file);
}
}
}
System.out.println();
return files;
}
/**
* Get the FileHandler for a single file
*
* @param rootDir The directory where the file resides
* @param file The name of the file without directory
* @return The matching {@link FileHandler}, A {@link TestAllFiles.NullFileHandler}
* is returned if no match is found
* @throws IOException If determining the file-type fails
*/
protected static FileHandler getFileHandler(File rootDir, String file) throws IOException {
FileHandler fileHandler = TestAllFiles.HANDLERS.get(TestAllFiles.getExtension(file));
if(fileHandler == null) {
// we could not detect a type of file based on the extension, so we
// need to take a close look at the file
fileHandler = detectUnnamedFile(rootDir, file);
}
return fileHandler;
}
private static FileHandler detectUnnamedFile(File rootDir, String file) throws IOException {
File testFile = new File(rootDir, file);
// find out if it looks like OLE2 (HSSF, HSLF, HWPF, ...) or OOXML (XSSF, XSLF, XWPF, ...)
// and then determine the file type accordingly
FileMagic magic = FileMagic.valueOf(testFile);
switch (magic) {
case OLE2: {
try {
try (POIFSFileSystem fs = new POIFSFileSystem(testFile, true)) {
HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
}
// we did not get an exception, so it seems this is a HSSFWorkbook
return TestAllFiles.HANDLERS.get(".xls");
} catch (IOException | RuntimeException e) {
try {
try (FileInputStream istream = new FileInputStream(testFile)) {
try (HWPFDocument ignored = new HWPFDocument(istream)) {
// seems to be a valid document
return TestAllFiles.HANDLERS.get(".doc");
}
}
} catch (IOException | RuntimeException e2) {
System.out.println("Could not open POIFSFileSystem for OLE2 file " + testFile + ": " + e + " and " + e2);
return TestAllFiles.NullFileHandler.instance;
}
}
}
case OOXML: {
try {
try (Workbook ignored = WorkbookFactory.create(testFile, null, true)) {
// seems to be a valid workbook
return TestAllFiles.HANDLERS.get(".xlsx");
}
} catch (IOException | RuntimeException e) {
try {
try (FileInputStream is = new FileInputStream(testFile)) {
try (XWPFDocument ignored = new XWPFDocument(is)) {
// seems to be a valid document
return TestAllFiles.HANDLERS.get(".docx");
}
}
} catch (IOException | RuntimeException e2) {
System.out.println("Could not open POIFSFileSystem for OOXML file " + testFile + ": " + e + " and " + e2);
return TestAllFiles.NullFileHandler.instance;
}
}
}
// do not warn about a few detected file types
case RTF:
case PDF:
case HTML:
case XML:
case JPEG:
case GIF:
case TIFF:
case WMF:
case EMF:
case BMP:
return TestAllFiles.NullFileHandler.instance;
}
System.out.println("Did not get a handler for extension " + TestAllFiles.getExtension(file) +
" of file " + file + ": " + magic);
return TestAllFiles.NullFileHandler.instance;
}
@Disabled
@Test
@SuppressForbidden("Just an ignored test")
void testInvalidFile() throws IOException, InterruptedException {
FileHandler fileHandler = POIFileScanner.getFileHandler(new File("/usbc/CommonCrawl"),
"www.bgs.ac.uk_downloads_directdownload.cfm_id=2362&noexcl=true&t=west_20sussex_20-_20building_20stone_20quarries");
assertEquals(XSSFFileHandler.class, fileHandler.getClass());
// to show the output from ZipFile() from commons-compress
// although I did not find out yet why the ZipFile is not closed here
System.gc();
Thread.sleep(1000);
System.gc();
Thread.sleep(1000);
}
@Test
void testDetectUnnamedFile() throws IOException {
File root = new File(ROOT_DIR, "spreadsheet");
assertDoesNotThrow(() -> POIFileScanner.detectUnnamedFile(root, "49156.xlsx"));
}
@Test
void test() throws IOException {
assertDoesNotThrow(() -> POIFileScanner.scan(ROOT_DIR));
}
}

View File

@ -17,32 +17,48 @@
package org.apache.poi.stress; package org.apache.poi.stress;
import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.function.BiConsumer;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.poi.OldFileFormatException; import org.apache.commons.collections4.MultiValuedMap;
import org.apache.commons.collections4.multimap.ArrayListValuedHashMap;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.tools.ant.DirectoryScanner; import org.apache.tools.ant.DirectoryScanner;
import org.junit.jupiter.api.function.Executable;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.MethodSource;
import org.opentest4j.TestAbortedException; import org.opentest4j.AssertionFailedError;
/** /**
* This is an integration test which performs various actions on all stored test-files and tries * This is an integration test which performs various actions on all stored test-files and tries
@ -68,395 +84,256 @@ import org.opentest4j.TestAbortedException;
* here as well! This is to ensure that files that should not work really do not work, e.g. * here as well! This is to ensure that files that should not work really do not work, e.g.
* that we do not remove expected sanity checks. * that we do not remove expected sanity checks.
*/ */
class TestAllFiles { // also need to set JVM parameter: -Djunit.jupiter.execution.parallel.enabled=true
@Execution(ExecutionMode.CONCURRENT)
public class TestAllFiles {
private static final File ROOT_DIR = new File("test-data"); private static final File ROOT_DIR = new File("test-data");
private static final boolean IGNORE_SCRATCHPAD = Boolean.getBoolean("scratchpad.ignore");
public static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" };
private static final Map<String,String> FILE_PASSWORD;
// map file extensions to the actual mappers public static final String[] SCAN_EXCLUDES = {
public static final Map<String, FileHandler> HANDLERS = new HashMap<>(); "**/.svn/**",
"lost+found",
"**/.git/**",
};
static { public static Stream<Arguments> allfiles(String testName) throws IOException {
// Excel MultiValuedMap<String, ExcInfo> exMap;
HANDLERS.put(".xls", new HSSFFileHandler()); Map<String,String> handlerMap;
HANDLERS.put(".xlsx", new XSSFFileHandler()); try (Workbook wb = WorkbookFactory.create(new File(ROOT_DIR, "spreadsheet/stress.xls"))) {
HANDLERS.put(".xlsm", new XSSFFileHandler()); exMap = readExMap(wb.getSheet("Exceptions"));
HANDLERS.put(".xltx", new XSSFFileHandler()); handlerMap = readHandlerMap(wb.getSheet("Handlers"));
HANDLERS.put(".xlsb", new XSSFBFileHandler());
// Word
HANDLERS.put(".doc", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HWPFFileHandler());
HANDLERS.put(".docx", new XWPFFileHandler());
HANDLERS.put(".dotx", new XWPFFileHandler());
HANDLERS.put(".docm", new XWPFFileHandler());
// OpenXML4J files
HANDLERS.put(".ooxml", new OPCFileHandler());
HANDLERS.put(".zip", new OPCFileHandler());
// Powerpoint
HANDLERS.put(".ppt", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSLFFileHandler());
HANDLERS.put(".pot", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSLFFileHandler());
HANDLERS.put(".pptx", new XSLFFileHandler());
HANDLERS.put(".pptm", new XSLFFileHandler());
HANDLERS.put(".ppsm", new XSLFFileHandler());
HANDLERS.put(".ppsx", new XSLFFileHandler());
HANDLERS.put(".thmx", new XSLFFileHandler());
HANDLERS.put(".potx", new XSLFFileHandler());
// Outlook
HANDLERS.put(".msg", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSMFFileHandler());
// Publisher
HANDLERS.put(".pub", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HPBFFileHandler());
// Visio - binary
HANDLERS.put(".vsd", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HDGFFileHandler());
// Visio - ooxml
HANDLERS.put(".vsdm", new XDGFFileHandler());
HANDLERS.put(".vsdx", new XDGFFileHandler());
HANDLERS.put(".vssm", new XDGFFileHandler());
HANDLERS.put(".vssx", new XDGFFileHandler());
HANDLERS.put(".vstm", new XDGFFileHandler());
HANDLERS.put(".vstx", new XDGFFileHandler());
// Visio - not handled yet
HANDLERS.put(".vst", NullFileHandler.instance);
HANDLERS.put(".vss", NullFileHandler.instance);
// POIFS
HANDLERS.put(".ole2", new POIFSFileHandler());
// Microsoft Admin Template?
HANDLERS.put(".adm", new HPSFFileHandler());
// Microsoft TNEF
HANDLERS.put(".dat", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HMEFFileHandler());
// TODO: are these readable by some of the formats?
HANDLERS.put(".wri", NullFileHandler.instance);
HANDLERS.put(".shw", NullFileHandler.instance);
HANDLERS.put(".zvi", NullFileHandler.instance);
HANDLERS.put(".mpp", NullFileHandler.instance);
HANDLERS.put(".qwp", NullFileHandler.instance);
HANDLERS.put(".wps", NullFileHandler.instance);
HANDLERS.put(".bin", NullFileHandler.instance);
HANDLERS.put(".xps", NullFileHandler.instance);
HANDLERS.put(".sldprt", NullFileHandler.instance);
HANDLERS.put(".mdb", NullFileHandler.instance);
HANDLERS.put(".vml", NullFileHandler.instance);
// ignore some file types, images, other formats, ...
HANDLERS.put(".txt", NullFileHandler.instance);
HANDLERS.put(".pdf", NullFileHandler.instance);
HANDLERS.put(".rtf", NullFileHandler.instance);
HANDLERS.put(".gif", NullFileHandler.instance);
HANDLERS.put(".html", NullFileHandler.instance);
HANDLERS.put(".png", NullFileHandler.instance);
HANDLERS.put(".wmf", NullFileHandler.instance);
HANDLERS.put(".emf", NullFileHandler.instance);
HANDLERS.put(".dib", NullFileHandler.instance);
HANDLERS.put(".svg", NullFileHandler.instance);
HANDLERS.put(".pict", NullFileHandler.instance);
HANDLERS.put(".jpg", NullFileHandler.instance);
HANDLERS.put(".jpeg", NullFileHandler.instance);
HANDLERS.put(".tif", NullFileHandler.instance);
HANDLERS.put(".tiff", NullFileHandler.instance);
HANDLERS.put(".wav", NullFileHandler.instance);
HANDLERS.put(".xml", NullFileHandler.instance);
HANDLERS.put(".csv", NullFileHandler.instance);
HANDLERS.put(".ods", NullFileHandler.instance);
HANDLERS.put(".ttf", NullFileHandler.instance);
HANDLERS.put(".fntdata", NullFileHandler.instance);
// VBA source files
HANDLERS.put(".vba", NullFileHandler.instance);
HANDLERS.put(".bas", NullFileHandler.instance);
HANDLERS.put(".frm", NullFileHandler.instance);
HANDLERS.put(".frx", NullFileHandler.instance); //binary
HANDLERS.put(".cls", NullFileHandler.instance);
// map some files without extension
HANDLERS.put("spreadsheet/BigSSTRecord", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2CR1", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2CR2", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2CR3", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2CR4", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2CR5", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2CR6", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecord2CR7", NullFileHandler.instance);
HANDLERS.put("spreadsheet/BigSSTRecordCR", NullFileHandler.instance);
HANDLERS.put("spreadsheet/test_properties1", NullFileHandler.instance);
// keystore files
HANDLERS.put(".pfx", NullFileHandler.instance);
HANDLERS.put(".pem", NullFileHandler.instance);
HANDLERS.put(".jks", NullFileHandler.instance);
HANDLERS.put(".pkcs12", NullFileHandler.instance);
Map<String,String> passmap = new HashMap<>();
passmap.put("slideshow/Password_Protected-hello.ppt", "hello");
passmap.put("slideshow/Password_Protected-56-hello.ppt", "hello");
passmap.put("slideshow/Password_Protected-np-hello.ppt", "hello");
passmap.put("slideshow/cryptoapi-proc2356.ppt", "crypto");
passmap.put("spreadsheet/xor-encryption-abc.xls", "abc");
passmap.put("spreadsheet/35897-type4.xls", "freedom");
passmap.put("spreadsheet/58616.xlsx", Decryptor.DEFAULT_PASSWORD);
passmap.put("spreadsheet/password.xls", "password");
passmap.put("spreadsheet/protected_passtika.xlsx", "tika");
passmap.put("document/bug53475-password-is-pass.docx", "pass");
passmap.put("document/bug53475-password-is-solrcell.docx", "solrcell");
passmap.put("document/password_password_cryptoapi.doc", "password");
passmap.put("document/password_tika_binaryrc4.doc", "tika");
passmap.put("poifs/protect.xlsx", Decryptor.DEFAULT_PASSWORD);
passmap.put("poifs/extenxls_pwd123.xlsx", "pwd123");
passmap.put("poifs/protected_agile.docx", Decryptor.DEFAULT_PASSWORD);
passmap.put("poifs/60320-protected.xlsx", "Test001!!");
passmap.put("poifs/protected_sha512.xlsx", "this is a test");
FILE_PASSWORD = Collections.unmodifiableMap(passmap);
} }
private static Set<String> unmodifiableHashSet(String... a) {
return Collections.unmodifiableSet(hashSet(a));
}
private static Set<String> hashSet(String... a) {
return new HashSet<>(Arrays.asList(a));
}
// Old Word Documents where we can at least extract some text
private static final Set<String> OLD_FILES_HWPF = unmodifiableHashSet(
"document/Bug49933.doc",
"document/Bug51944.doc",
"document/Word6.doc",
"document/Word6_sections.doc",
"document/Word6_sections2.doc",
"document/Word95.doc",
"document/word95err.doc",
"document/Bug60936.doc",
"document/Bug60942.doc",
"document/Bug60942b.doc",
"document/cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC",
"hpsf/TestMickey.doc",
"document/52117.doc",
"hpsf/TestInvertedClassID.doc",
"hpsf/TestBug52117.doc"
);
private static final Set<String> EXPECTED_FAILURES = unmodifiableHashSet(
// password protected files without known password
"spreadsheet/51832.xls",
"spreadsheet/64759.xlsx",
"document/PasswordProtected.doc",
// TODO: fails XMLExportTest, is this ok?
"spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx",
"spreadsheet/55864.xlsx",
"spreadsheet/57890.xlsx",
"spreadsheet/xxe_in_schema.xlsx",
// TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
"spreadsheet/44958.xls",
"spreadsheet/44958_1.xls",
"spreadsheet/testArraysAndTables.xls",
// TODO: good to ignore?
"spreadsheet/sample-beta.xlsx",
"document/cpansearch.perl.org_src_tobyink_acme-rundoc-0.001_word-lib_hello_world.docm",
// This is actually a spreadsheet!
"hpsf/TestRobert_Flaherty.doc",
// some files that are broken, eg Word 95, ...
"spreadsheet/43493.xls",
"spreadsheet/46904.xls",
"document/Bug50955.doc",
"document/57843.doc",
"slideshow/PPT95.ppt",
"slideshow/pp40only.ppt",
"slideshow/Divino_Revelado.pptx",
"openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx",
"openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx",
"openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx",
"openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx",
"openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx",
"openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx",
"openxml4j/OPCCompliance_DerivedPartNameFAIL.docx",
"openxml4j/invalid.xlsx",
"openxml4j/62592.thmx",
"spreadsheet/54764-2.xlsx", // see TestXSSFBugs.bug54764()
"spreadsheet/54764.xlsx", // see TestXSSFBugs.bug54764()
"poifs/unknown_properties.msg", // POIFS properties corrupted
"poifs/only-zero-byte-streams.ole2", // No actual contents
"spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion
"spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion
"spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion
"document/61612a.docx",
"document/word2.doc",
"spreadsheet/xlsx-corrupted.xlsx",
"integration/stress025.docx",
// old Excel files, which we only support simple text extraction of
"spreadsheet/testEXCEL_2.xls",
"spreadsheet/testEXCEL_3.xls",
"spreadsheet/testEXCEL_4.xls",
"spreadsheet/testEXCEL_5.xls",
"spreadsheet/testEXCEL_95.xls",
"spreadsheet/59074.xls",
"spreadsheet/60284.xls",
"spreadsheet/64130.xls",
// OOXML Strict is not yet supported, see bug #57699
"spreadsheet/SampleSS.strict.xlsx",
"spreadsheet/SimpleStrict.xlsx",
"spreadsheet/sample.strict.xlsx",
"spreadsheet/57914.xlsx",
// files with XML entities
"openxml4j/ContentTypeHasEntities.ooxml",
// non-TNEF files
"ddf/Container.dat",
"ddf/47143.dat",
// sheet cloning errors
"spreadsheet/56450.xls",
// "spreadsheet/OddStyleRecord.xls",
// msg files with non-standard encodings
"hsmf/ASCII_CP1251_LCID1049.msg",
"hsmf/ASCII_UTF-8_CP1252_LCID1031.msg",
"hsmf/ASCII_UTF-8_CP1252_LCID1031_HTML.msg",
"hsmf/HTMLBodyBinary_CP1251.msg",
"hsmf/HTMLBodyBinary_UTF-8.msg"
);
private static final Set<String> IGNORED = unmodifiableHashSet(
// OPC handler works / XSSF handler fails
"spreadsheet/57181.xlsm",
"spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop
);
public static Stream<Arguments> files() {
DirectoryScanner scanner = new DirectoryScanner(); DirectoryScanner scanner = new DirectoryScanner();
scanner.setBasedir(ROOT_DIR); scanner.setBasedir(ROOT_DIR);
scanner.setExcludes(SCAN_EXCLUDES); scanner.setExcludes(SCAN_EXCLUDES);
scanner.scan(); scanner.scan();
System.out.println("Handling " + scanner.getIncludedFiles().length + " files"); final List<Arguments> result = new ArrayList<>(100);
List<Arguments> files = new ArrayList<>();
for (String file : scanner.getIncludedFiles()) { for (String file : scanner.getIncludedFiles()) {
file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise // ... failures/handlers lookup doesn't work on windows otherwise
if (IGNORED.contains(file)) { final String uniFile = file.replace('\\', '/');
System.out.println("Ignoring " + file);
String firstHandler = handlerMap.entrySet().stream()
.filter(me -> uniFile.endsWith(me.getKey()))
.map(Map.Entry::getValue).findFirst().orElse("NULL");
final String[] handlerStr = { firstHandler, secondHandler(firstHandler) };
for (String hs : handlerStr) {
if ("NULL".equals(hs)) continue;
ExcInfo info1 = exMap.get(file).stream()
.filter(e ->
(e.tests == null || e.tests.contains(testName) || "IGNORE".equals(e.tests)) &&
(e.handler == null || e.handler.contains(hs))
).findFirst().orElse(null);
if (info1 == null || !"IGNORE".equals(info1.tests)) {
result.add(Arguments.of(
file,
hs,
(info1 != null) ? info1.password : null,
(info1 != null) ? info1.exClazz : null,
(info1 != null) ? info1.exMessage : null
));
}
}
}
return result.stream();
}
public static Stream<Arguments> extractFiles() throws IOException {
return allfiles("extract");
}
@ParameterizedTest(name = "#{index} {0} {1}")
@MethodSource("extractFiles")
void handleExtracting(String file, String handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException {
System.out.println("Running extractFiles on "+file);
FileHandler fileHandler = Handler.valueOf(handler).fileHandler.get();
assertNotNull(fileHandler, "Did not find a handler for file " + file);
Executable exec = () -> fileHandler.handleExtracting(new File(ROOT_DIR, file));
verify(exec, exClass, exMessage, password);
}
public static Stream<Arguments> handleFiles() throws IOException {
return allfiles("handle");
}
@ParameterizedTest(name = "#{index} {0} {1}")
@MethodSource("handleFiles")
void handleFile(String file, String handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException {
System.out.println("Running handleFiles on "+file);
FileHandler fileHandler = Handler.valueOf(handler).fileHandler.get();
assertNotNull(fileHandler, "Did not find a handler for file " + file);
try (InputStream stream = new BufferedInputStream(new FileInputStream(new File(ROOT_DIR, file)), 64 * 1024)) {
Executable exec = () -> fileHandler.handleFile(stream, file);
verify(exec, exClass, exMessage, password);
}
}
public static Stream<Arguments> handleAdditionals() throws IOException {
return allfiles("additional");
}
@ParameterizedTest(name = "#{index} {0} {1}")
@MethodSource("handleAdditionals")
void handleAdditional(String file, String handler, String password, Class<? extends Throwable> exClass, String exMessage) {
System.out.println("Running additionals on "+file);
FileHandler fileHandler = Handler.valueOf(handler).fileHandler.get();
assertNotNull(fileHandler, "Did not find a handler for file " + file);
Executable exec = () -> fileHandler.handleAdditional(new File(ROOT_DIR, file));
verify(exec, exClass, exMessage, password);
}
@SuppressWarnings("unchecked")
private static void verify(Executable exec, Class<? extends Throwable> exClass, String exMessage, String password) {
// this also removes the password for non encrypted files
Biff8EncryptionKey.setCurrentUserPassword(password);
if (exClass != null && AssertionFailedError.class.isAssignableFrom(exClass)) {
try {
exec.execute();
fail("expected failed assertion");
} catch (AssertionFailedError e) {
assertEquals(exMessage, e.getMessage());
} catch (Throwable e) {
fail("unexpected exception", e);
}
} else if (exClass != null) {
Exception e = assertThrows((Class<? extends Exception>)exClass, exec);
String actMsg = e.getMessage();
if (exMessage == null) {
assertNull(actMsg);
} else {
assertNotNull(actMsg);
assertTrue(actMsg.startsWith(exMessage), "Message: "+actMsg+" - didn't start with "+exMessage);
}
} else {
assertDoesNotThrow(exec);
}
}
private static String secondHandler(String handlerStr) {
switch (handlerStr) {
case "XSSF":
case "XWPF":
case "XSLF":
case "XDGF":
return "OPC";
case "HSSF":
case "HWPF":
case "HSLF":
case "HDGF":
case "HSMF":
case "HBPF":
return "HPSF";
default:
return "NULL";
}
}
private static Map<String,String> readHandlerMap(Sheet sh) {
Map<String,String> handlerMap = new LinkedHashMap<>();
boolean IGNORE_SCRATCHPAD = Boolean.getBoolean("scratchpad.ignore");
boolean isFirst = true;
for (Row row : sh) {
if (isFirst) {
isFirst = false;
continue; continue;
} }
FileHandler handler = HANDLERS.get(getExtension(file)); Cell cell = row.getCell(2);
files.add(Arguments.of( file, handler )); if (IGNORE_SCRATCHPAD || cell == null || cell.getCellType() != CellType.STRING) {
cell = row.getCell(1);
// for some file-types also run OPCFileHandler }
if(handler instanceof XSSFFileHandler || handlerMap.put(row.getCell(0).getStringCellValue(), cell.getStringCellValue());
handler instanceof XWPFFileHandler || }
handler instanceof XSLFFileHandler || return handlerMap;
handler instanceof XDGFFileHandler) {
files.add(Arguments.of( file, new OPCFileHandler() ));
} }
if (handler instanceof HSSFFileHandler ||
handler instanceof HSLFFileHandler || private static MultiValuedMap<String, ExcInfo> readExMap(Sheet sh) {
handler instanceof HWPFFileHandler || MultiValuedMap<String, ExcInfo> exMap = new ArrayListValuedHashMap<>();
handler instanceof HDGFFileHandler) {
files.add(Arguments.of( file, new HPSFFileHandler() )); Iterator<Row> iter = sh.iterator();
List<BiConsumer<ExcInfo,String>> cols = initCols(iter.next());
while (iter.hasNext()) {
ExcInfo info = new ExcInfo();
for (Cell cell : iter.next()) {
if (cell.getCellType() == CellType.STRING) {
cols.get(cell.getColumnIndex()).accept(info, cell.getStringCellValue());
} }
} }
exMap.put(info.file, info);
return files.stream(); }
return exMap;
} }
// the display name annotation is ignored by ants junitlauncher listeners :(
// ... even when using a custom display name generator
@ParameterizedTest(name = "#{index} {0}" )
@MethodSource("files")
void testAllFiles(String file, FileHandler handler) throws Exception {
assertNotNull(handler, "Did not find a handler for file " + file);
// this also removes the password for non encrypted files
String pass = TestAllFiles.FILE_PASSWORD.get(file);
Biff8EncryptionKey.setCurrentUserPassword(pass);
System.out.println("Reading " + file + " with " + handler.getClass().getSimpleName());
assertNotNull( handler, "Unknown file extension for file: " + file + ": " + getExtension(file) );
File inputFile = new File(ROOT_DIR, file);
// special cases where docx-handling breaks, but OPCPackage handling works
boolean ignoredOPC = (file.endsWith(".docx") || file.endsWith(".xlsx") ||
file.endsWith(".xlsb") || file.endsWith(".pptx")) &&
handler instanceof OPCFileHandler;
boolean ignoreHPSF = (handler instanceof HPSFFileHandler);
private static List<BiConsumer<ExcInfo,String>> initCols(Row row) {
Map<String,BiConsumer<ExcInfo,String>> m = new HashMap<>();
m.put("File", (e,s) -> e.file = s);
m.put("Tests", (e,s) -> e.tests = s);
m.put("Handler", (e,s) -> e.handler = s);
m.put("Password", (e,s) -> e.password = s);
m.put("Exception Class", (e,s) -> {
try { try {
try (InputStream stream = new BufferedInputStream(new FileInputStream(inputFile), 64 * 1024)) { e.exClazz = (Class<? extends Exception>) Class.forName(s);
handler.handleFile(stream, file); } catch (ClassNotFoundException ex) {
assertFalse( OLD_FILES_HWPF.contains(file) && !ignoreHPSF, "Expected to fail for file " + file + " and handler " + handler + ", but did not fail!" ); fail(ex);
}
});
m.put("Exception Message", (e,s) -> e.exMessage = s);
return StreamSupport
.stream(row.spliterator(), false)
.map(Cell::getStringCellValue)
.map(v -> m.getOrDefault(v, (e,s) -> {}))
.collect(Collectors.toList());
} }
handler.handleExtracting(inputFile); private static class ExcInfo {
String file;
String tests;
String handler;
String password;
Class<? extends Throwable> exClazz;
String exMessage;
assertFalse( EXPECTED_FAILURES.contains(file) && !ignoredOPC && !ignoreHPSF, "Expected to fail for file " + file + " and handler " + handler + ", but did not fail!" );
} catch (OldFileFormatException e) {
// for old word files we should still support extracting text
if(OLD_FILES_HWPF.contains(file)) {
handler.handleExtracting(inputFile);
} else {
// check if we expect failure for this file
if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
System.out.println("Failed: " + file);
throw new Exception("While handling " + file, e);
}
}
} catch (TestAbortedException e) {
// file handler ignored this file
} catch (Exception e) {
// check if we expect failure for this file
if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
System.out.println("Failed: " + file);
throw new Exception("While handling " + file, e);
}
} }
try { @SuppressWarnings("unused")
// let some file handlers do additional stuff private enum Handler {
handler.handleAdditional(inputFile); HDGF(HDGFFileHandler::new),
} catch (TestAbortedException e) { HMEF(HMEFFileHandler::new),
// file handler ignored this file HPBF(HPBFFileHandler::new),
} catch (Exception e) { HPSF(HPSFFileHandler::new),
if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { HSLF(HSLFFileHandler::new),
System.out.println("Failed: " + file); HSMF(HSMFFileHandler::new),
throw new Exception("While handling " + file, e); HSSF(HSSFFileHandler::new),
} HWPF(HWPFFileHandler::new),
} OPC(OPCFileHandler::new),
} POIFS(POIFSFileHandler::new),
XDGF(XDGFFileHandler::new),
XSLF(XSLFFileHandler::new),
XSSFB(XSSFBFileHandler::new),
XSSF(XSSFFileHandler::new),
XWPF(XWPFFileHandler::new),
OWPF(OWPFFileHandler::new),
NULL(NullFileHandler::new)
;
public static String getExtension(String file) { final Supplier<FileHandler> fileHandler;
int pos = file.lastIndexOf('.'); Handler(Supplier<FileHandler> fileHandler) {
if(pos == -1 || pos == file.length()-1) { this.fileHandler = fileHandler;
return file;
} }
return file.substring(pos).toLowerCase(Locale.ROOT);
} }
public static class NullFileHandler implements FileHandler { public static class NullFileHandler implements FileHandler {
public static final FileHandler instance = new NullFileHandler();
@Override @Override
public void handleFile(InputStream stream, String path) { public void handleFile(InputStream stream, String path) {
} }

View File

@ -24,6 +24,7 @@ import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.sl.extractor.SlideShowExtractor; import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlideShow; import org.apache.poi.xslf.usermodel.XSLFSlideShow;
@ -32,8 +33,9 @@ import org.junit.jupiter.api.Test;
class XSLFFileHandler extends SlideShowHandler { class XSLFFileHandler extends SlideShowHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
XMLSlideShow slide = new XMLSlideShow(stream); try (XMLSlideShow slide = new XMLSlideShow(stream);
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage()); XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
;
assertNotNull(slideInner.getPresentation()); assertNotNull(slideInner.getPresentation());
assertNotNull(slideInner.getSlideMasterReferences()); assertNotNull(slideInner.getSlideMasterReferences());
assertNotNull(slideInner.getSlideReferences()); assertNotNull(slideInner.getSlideReferences());
@ -41,9 +43,10 @@ class XSLFFileHandler extends SlideShowHandler {
new POIXMLDocumentHandler().handlePOIXMLDocument(slide); new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
handleSlideShow(slide); handleSlideShow(slide);
} catch (POIXMLException e) {
slideInner.close(); Exception cause = (Exception)e.getCause();
slide.close(); throw cause == null ? e : cause;
}
} }
@Override @Override

View File

@ -21,6 +21,7 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -30,9 +31,13 @@ class XWPFFileHandler extends AbstractFileHandler {
// ignore password protected files // ignore password protected files
if (POIXMLDocumentHandler.isEncrypted(stream)) return; if (POIXMLDocumentHandler.isEncrypted(stream)) return;
XWPFDocument doc = new XWPFDocument(stream); try (XWPFDocument doc = new XWPFDocument(stream)) {
new POIXMLDocumentHandler().handlePOIXMLDocument(doc); new POIXMLDocumentHandler().handlePOIXMLDocument(doc);
} catch (POIXMLException e) {
Exception cause = (Exception)e.getCause();
throw cause == null ? e : cause;
}
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles

View File

@ -24,13 +24,15 @@ public class Configurator {
private static POILogger logger = POILogFactory.getLogger(Configurator.class); private static POILogger logger = POILogFactory.getLogger(Configurator.class);
public static int getIntValue(String systemProperty, int defaultValue) { public static int getIntValue(String systemProperty, int defaultValue) {
int result = defaultValue;
String property = System.getProperty(systemProperty); String property = System.getProperty(systemProperty);
if (property == null || "".equals(property) || "null".equals(property)) {
return defaultValue;
}
try { try {
result = Integer.parseInt(property); return Integer.parseInt(property);
} catch (Exception e) { } catch (Exception e) {
logger.log(POILogger.ERROR, "System property -D", systemProperty, " does not contains a valid integer: ", property); logger.log(POILogger.ERROR, "System property -D", systemProperty, " does not contains a valid integer: ", property);
} return defaultValue;
return result; }
} }
} }

View File

@ -21,6 +21,7 @@ module org.apache.poi.stress {
requires net.bytebuddy; requires net.bytebuddy;
requires java.desktop; requires java.desktop;
requires org.apache.commons.collections4;
requires org.apache.poi.examples; requires org.apache.poi.examples;
exports org.apache.poi.stress; exports org.apache.poi.stress;

Binary file not shown.