mirror of https://github.com/apache/poi.git
#65046 - Simplify integration tests
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1885538 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ca71e9dacf
commit
309e657b0d
|
@ -1277,6 +1277,7 @@ under the License.
|
|||
|
||||
<!-- jvmarg value="-Duser.timezone=UTC"/ -->
|
||||
<jvmarg value="${file.leak.detector}" />
|
||||
<jvmarg value="-Djunit.jupiter.execution.parallel.enabled=true" />
|
||||
<jvmarg value="-Xjit:verbose={compileStart|compileEnd},vlog=build/jit.log${no.jit.sherlock}" if:true="${isIBMVM}"/>
|
||||
|
||||
<modulepath refid="@{modulepath-ref}" unless:true="${isJava8}"/>
|
||||
|
@ -1285,6 +1286,10 @@ under the License.
|
|||
<jvmarg line="--add-modules org.junit.jupiter.params" unless:true="${isJava8}"/>
|
||||
<jvmarg line="--add-modules org.apache.poi.@{module1}" if:set="use_module1"/>
|
||||
<jvmarg line="--add-modules org.apache.poi.@{module2}" if:set="use_module2"/>
|
||||
|
||||
<!-- mute some notorious talkative classes -->
|
||||
<jvmarg line="-Dorg.slf4j.simpleLogger.log.org.apache.poi.hdgf.chunks.Chunk=off"/>
|
||||
<jvmarg line="-Dorg.slf4j.simpleLogger.log.org.apache.poi=error"/>
|
||||
</fork>
|
||||
|
||||
<!-- can't use resultfile="status-as-tests-run.txt" here ... it's truncated with every test -->
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.FileOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||
|
@ -63,6 +64,8 @@ import org.apache.poi.util.TempFile;
|
|||
public final class CopyCompare {
|
||||
private CopyCompare() {}
|
||||
|
||||
private static final ThreadLocal<PrintStream> out = ThreadLocal.withInitial(() -> System.out);
|
||||
|
||||
/**
|
||||
* Runs the example program. The application expects one or two arguments:
|
||||
*
|
||||
|
@ -117,10 +120,14 @@ public final class CopyCompare {
|
|||
POIFSFileSystem cpfs = new POIFSFileSystem(new File(copyFileName))) {
|
||||
final DirectoryEntry oRoot = opfs.getRoot();
|
||||
final DirectoryEntry cRoot = cpfs.getRoot();
|
||||
System.out.println(EntryUtils.areDirectoriesIdentical(oRoot, cRoot) ? "Equal" : "Not equal");
|
||||
out.get().println(EntryUtils.areDirectoriesIdentical(oRoot, cRoot) ? "Equal" : "Not equal");
|
||||
}
|
||||
}
|
||||
|
||||
public static void setOut(PrintStream ps) {
|
||||
out.set(ps);
|
||||
}
|
||||
|
||||
private interface InputStreamSupplier {
|
||||
InputStream get() throws IOException, WritingNotSupportedException;
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.poi.extractor.POIOLE2TextExtractor;
|
|||
import org.apache.poi.extractor.POITextExtractor;
|
||||
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
|
||||
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
|
||||
import org.apache.poi.ooxml.POIXMLException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.ss.extractor.ExcelExtractor;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
@ -118,10 +119,10 @@ public abstract class AbstractFileHandler implements FileHandler {
|
|||
}
|
||||
|
||||
// test again with including formulas and cell-comments as this caused some bugs
|
||||
if(extractor instanceof ExcelExtractor &&
|
||||
if (extractor instanceof ExcelExtractor &&
|
||||
// comment-extraction and formula extraction are not well supported in event based extraction
|
||||
!(extractor instanceof EventBasedExcelExtractor)) {
|
||||
((ExcelExtractor)extractor).setFormulasNotResults(true);
|
||||
((ExcelExtractor) extractor).setFormulasNotResults(true);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertNotNull(text);
|
||||
|
@ -132,6 +133,17 @@ public abstract class AbstractFileHandler implements FileHandler {
|
|||
text = extractor.getText();
|
||||
assertNotNull(text);
|
||||
}
|
||||
} catch (IOException | POIXMLException e) {
|
||||
Exception prevE = e;
|
||||
Throwable cause;
|
||||
while ((cause = prevE.getCause()) instanceof Exception) {
|
||||
if (cause instanceof IOException || cause instanceof POIXMLException) {
|
||||
prevE = (Exception)cause;
|
||||
} else {
|
||||
throw (Exception)cause;
|
||||
}
|
||||
}
|
||||
throw e;
|
||||
} catch (IllegalArgumentException e) {
|
||||
if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) {
|
||||
throw e;
|
||||
|
|
|
@ -50,7 +50,7 @@ public class BaseIntegrationTest {
|
|||
}
|
||||
|
||||
void test() throws Exception {
|
||||
assertNotNull( handler, "Unknown file extension for file: " + file + ": " + TestAllFiles.getExtension(file) );
|
||||
assertNotNull( handler, "Unknown file extension for file: " + file );
|
||||
testOneFile(new File(rootDir, file));
|
||||
}
|
||||
|
||||
|
@ -123,29 +123,29 @@ public class BaseIntegrationTest {
|
|||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
||||
|
||||
if(message != null && (message.equals("The document is really a XLS file"))) {
|
||||
handler = TestAllFiles.HANDLERS.get(".xls");
|
||||
handler = new HSSFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a PPT file"))) {
|
||||
handler = TestAllFiles.HANDLERS.get(".ppt");
|
||||
handler = new HSLFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a DOC file"))) {
|
||||
handler = TestAllFiles.HANDLERS.get(".doc");
|
||||
handler = new HWPFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a VSD file"))) {
|
||||
handler = TestAllFiles.HANDLERS.get(".vsd");
|
||||
handler = new HDGFFileHandler();
|
||||
|
||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
|
||||
} else if (handler instanceof HWPFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".docx");
|
||||
handler = new XWPFFileHandler();
|
||||
} else if (handler instanceof HSSFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".xlsx");
|
||||
handler = new XSSFFileHandler();
|
||||
} else if (handler instanceof HSLFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".pptx");
|
||||
handler = new XSLFFileHandler();
|
||||
|
||||
// and the other way around, use HWPF instead of XWPF and so forth
|
||||
} else if(handler instanceof XWPFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".doc");
|
||||
handler = new HWPFFileHandler();
|
||||
} else if(handler instanceof XSSFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".xls");
|
||||
handler = new HSSFFileHandler();
|
||||
} else if(handler instanceof XSLFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".ppt");
|
||||
handler = new HSLFFileHandler();
|
||||
} else {
|
||||
// nothing matched => throw the exception to the outside
|
||||
throw e;
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.poi.stress;
|
|||
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
import static org.junit.jupiter.api.Assumptions.assumeFalse;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
@ -46,7 +47,7 @@ import org.junit.jupiter.api.Test;
|
|||
class HPSFFileHandler extends POIFSFileHandler {
|
||||
private static final String NL = System.getProperty("line.separator");
|
||||
|
||||
private static File copyOutput;
|
||||
private static final ThreadLocal<File> copyOutput = ThreadLocal.withInitial(HPSFFileHandler::getTempFile);
|
||||
|
||||
static final Set<String> EXCLUDES_HANDLE_ADD = unmodifiableHashSet(
|
||||
"spreadsheet/45290.xls",
|
||||
|
@ -58,12 +59,6 @@ class HPSFFileHandler extends POIFSFileHandler {
|
|||
"document/word2.doc"
|
||||
);
|
||||
|
||||
static final Set<String> EXCLUDES_HANDLE_FILE = unmodifiableHashSet(
|
||||
"hpsf/Test_Humor-Generation.ppt",
|
||||
"slideshow/missing-moveto.ppt" // POIFS properties corrupted
|
||||
);
|
||||
|
||||
|
||||
private static Set<String> unmodifiableHashSet(String... a) {
|
||||
return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(a)));
|
||||
}
|
||||
|
@ -71,7 +66,6 @@ class HPSFFileHandler extends POIFSFileHandler {
|
|||
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
assumeFalse(EXCLUDES_HANDLE_FILE.contains(path));
|
||||
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
||||
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
|
||||
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
|
||||
|
@ -95,24 +89,26 @@ class HPSFFileHandler extends POIFSFileHandler {
|
|||
}
|
||||
}
|
||||
|
||||
private static File getTempFile() {
|
||||
File f = null;
|
||||
try {
|
||||
f = TempFile.createTempFile("hpsfCopy", "out");
|
||||
} catch (IOException e) {
|
||||
fail(e);
|
||||
}
|
||||
f.deleteOnExit();
|
||||
return f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleAdditional(File file) throws Exception {
|
||||
assumeFalse(EXCLUDES_HANDLE_ADD.contains(file.getParentFile().getName()+"/"+file.getName()));
|
||||
if (copyOutput == null) {
|
||||
copyOutput = TempFile.createTempFile("hpsfCopy", "out");
|
||||
copyOutput.deleteOnExit();
|
||||
}
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
PrintStream psNew = new PrintStream(bos, true, "ISO-8859-1");
|
||||
PrintStream ps = System.out;
|
||||
try {
|
||||
System.setOut(psNew);
|
||||
CopyCompare.main(new String[]{file.getAbsolutePath(), copyOutput.getAbsolutePath()});
|
||||
CopyCompare.setOut(psNew);
|
||||
CopyCompare.main(new String[]{file.getAbsolutePath(), copyOutput.get().getAbsolutePath()});
|
||||
assertEquals("Equal" + NL, bos.toString(StandardCharsets.UTF_8.name()));
|
||||
} finally {
|
||||
System.setOut(ps);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.stress;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||
import org.apache.poi.hssf.record.Record;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Needs to be implemented in this package to have access to
|
||||
* HSSFWorkbook.getWorkbook()
|
||||
*/
|
||||
class HSSFRecordsStresser {
|
||||
public static void handleWorkbook(HSSFWorkbook wb) {
|
||||
List<org.apache.poi.hssf.record.Record> records = wb.getWorkbook().getRecords();
|
||||
for(org.apache.poi.hssf.record.Record record : records) {
|
||||
// some Records do not implement clone ?!
|
||||
// equals instead of instanceof is on purpose here to only skip exactly this class and not any derived ones
|
||||
// if(record.getClass().equals(InterfaceHdrRecord.class) ||
|
||||
// record.getClass().equals(MMSRecord.class) ||
|
||||
// record.getClass().equals(InterfaceEndRecord.class) ||
|
||||
// record.getClass().equals(WriteAccessRecord.class) ||
|
||||
// record.getClass().equals(CodepageRecord.class) ||
|
||||
// record.getClass().equals(DSFRecord.class)) {
|
||||
// continue;
|
||||
// }
|
||||
try {
|
||||
Record newRecord = record.copy();
|
||||
|
||||
assertEquals( record.getClass(), newRecord.getClass(), "Expecting the same class back from clone(), but had Record of type " + record.getClass() + " and got back a " + newRecord.getClass() + " from clone()" );
|
||||
|
||||
byte[] origBytes = record.serialize();
|
||||
byte[] newBytes = newRecord.serialize();
|
||||
|
||||
assertArrayEquals( origBytes, newBytes, "Record of type " + record.getClass() + " should return the same byte array via the clone() method, but did return a different array" );
|
||||
} catch (RuntimeException e) {
|
||||
// some Records do not implement clone, ignore those for now
|
||||
assertTrue(e.getMessage().contains("needs to define a clone method"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
try (InputStream stream = new FileInputStream(HSSFTestDataSamples.getSampleFile("15556.xls"))) {
|
||||
HSSFWorkbook wb = new HSSFWorkbook(stream);
|
||||
handleWorkbook(wb);
|
||||
wb.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.stress;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFOldDocument;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class OWPFFileHandler extends POIFSFileHandler {
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
try (POIFSFileSystem poifs = new POIFSFileSystem(stream)) {
|
||||
HWPFOldDocument doc = new HWPFOldDocument(poifs);
|
||||
assertNotNull(doc.getOldFontTable());
|
||||
assertNotNull(doc.getCharacterTable());
|
||||
}
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
@Test
|
||||
@SuppressWarnings("java:S2699")
|
||||
public void test() throws Exception {
|
||||
File file = new File("test-data/document/52117.doc");
|
||||
|
||||
try (InputStream stream = new FileInputStream(file)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
|
||||
handleExtracting(file);
|
||||
|
||||
try (FileInputStream stream = new FileInputStream(file);
|
||||
WordExtractor extractor = new WordExtractor(stream)) {
|
||||
assertNotNull(extractor.getText());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractingOld() {
|
||||
File file = new File("test-data/document/52117.doc");
|
||||
assertDoesNotThrow(() -> handleExtracting(file));
|
||||
}
|
||||
}
|
|
@ -1,222 +0,0 @@
|
|||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
package org.apache.poi.stress;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.apache.poi.util.SuppressForbidden;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.tools.ant.DirectoryScanner;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Helper class to scan a folder for files and return a collection of
|
||||
* found files together with the matching {@link FileHandler}.
|
||||
*
|
||||
* Can also be used to get the appropriate FileHandler for a single file.
|
||||
*/
|
||||
class POIFileScanner {
|
||||
private final static File ROOT_DIR;
|
||||
static {
|
||||
// when running in Gradle, current directory might be "build/integrationtest"
|
||||
if(new File("../../test-data").exists()) {
|
||||
ROOT_DIR = new File("../../test-data");
|
||||
} else {
|
||||
ROOT_DIR = new File("test-data");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan a folder for files and return a collection of
|
||||
* found files together with the matching {@link FileHandler}.
|
||||
*
|
||||
* Note: unknown files will be assigned to {@link TestAllFiles.NullFileHandler}
|
||||
*
|
||||
* @param rootDir The directory to scan
|
||||
* @return A collection with file-FileHandler pairs which can be used for running tests on that file
|
||||
* @throws IOException If determining the file-type fails
|
||||
*/
|
||||
public static Collection<Map.Entry<String, FileHandler>> scan(File rootDir) throws IOException {
|
||||
DirectoryScanner scanner = new DirectoryScanner();
|
||||
scanner.setBasedir(rootDir);
|
||||
|
||||
scanner.setExcludes(TestAllFiles.SCAN_EXCLUDES);
|
||||
|
||||
System.out.println("Scanning for files in " + rootDir);
|
||||
|
||||
scanner.scan();
|
||||
|
||||
String[] includedFiles = scanner.getIncludedFiles();
|
||||
System.out.println("Handling " + includedFiles.length + " files");
|
||||
|
||||
List<Map.Entry<String, FileHandler>> files = new ArrayList<>();
|
||||
for(String file : includedFiles) {
|
||||
// breaks files with slash in their name on Linux:
|
||||
// file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise
|
||||
|
||||
FileHandler fileHandler = getFileHandler(rootDir, file);
|
||||
|
||||
files.add(new AbstractMap.SimpleImmutableEntry<>(file, fileHandler));
|
||||
|
||||
if(files.size() % 100 == 0) {
|
||||
System.out.print(".");
|
||||
if(files.size() % 100_000 == 0) {
|
||||
System.out.println(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the FileHandler for a single file
|
||||
*
|
||||
* @param rootDir The directory where the file resides
|
||||
* @param file The name of the file without directory
|
||||
* @return The matching {@link FileHandler}, A {@link TestAllFiles.NullFileHandler}
|
||||
* is returned if no match is found
|
||||
* @throws IOException If determining the file-type fails
|
||||
*/
|
||||
protected static FileHandler getFileHandler(File rootDir, String file) throws IOException {
|
||||
FileHandler fileHandler = TestAllFiles.HANDLERS.get(TestAllFiles.getExtension(file));
|
||||
if(fileHandler == null) {
|
||||
// we could not detect a type of file based on the extension, so we
|
||||
// need to take a close look at the file
|
||||
fileHandler = detectUnnamedFile(rootDir, file);
|
||||
}
|
||||
return fileHandler;
|
||||
}
|
||||
|
||||
private static FileHandler detectUnnamedFile(File rootDir, String file) throws IOException {
|
||||
File testFile = new File(rootDir, file);
|
||||
|
||||
// find out if it looks like OLE2 (HSSF, HSLF, HWPF, ...) or OOXML (XSSF, XSLF, XWPF, ...)
|
||||
// and then determine the file type accordingly
|
||||
FileMagic magic = FileMagic.valueOf(testFile);
|
||||
switch (magic) {
|
||||
case OLE2: {
|
||||
try {
|
||||
try (POIFSFileSystem fs = new POIFSFileSystem(testFile, true)) {
|
||||
HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
|
||||
}
|
||||
|
||||
// we did not get an exception, so it seems this is a HSSFWorkbook
|
||||
return TestAllFiles.HANDLERS.get(".xls");
|
||||
} catch (IOException | RuntimeException e) {
|
||||
try {
|
||||
try (FileInputStream istream = new FileInputStream(testFile)) {
|
||||
try (HWPFDocument ignored = new HWPFDocument(istream)) {
|
||||
// seems to be a valid document
|
||||
return TestAllFiles.HANDLERS.get(".doc");
|
||||
}
|
||||
}
|
||||
} catch (IOException | RuntimeException e2) {
|
||||
System.out.println("Could not open POIFSFileSystem for OLE2 file " + testFile + ": " + e + " and " + e2);
|
||||
return TestAllFiles.NullFileHandler.instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
case OOXML: {
|
||||
try {
|
||||
try (Workbook ignored = WorkbookFactory.create(testFile, null, true)) {
|
||||
// seems to be a valid workbook
|
||||
return TestAllFiles.HANDLERS.get(".xlsx");
|
||||
}
|
||||
} catch (IOException | RuntimeException e) {
|
||||
try {
|
||||
try (FileInputStream is = new FileInputStream(testFile)) {
|
||||
try (XWPFDocument ignored = new XWPFDocument(is)) {
|
||||
// seems to be a valid document
|
||||
return TestAllFiles.HANDLERS.get(".docx");
|
||||
}
|
||||
}
|
||||
} catch (IOException | RuntimeException e2) {
|
||||
System.out.println("Could not open POIFSFileSystem for OOXML file " + testFile + ": " + e + " and " + e2);
|
||||
return TestAllFiles.NullFileHandler.instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// do not warn about a few detected file types
|
||||
case RTF:
|
||||
case PDF:
|
||||
case HTML:
|
||||
case XML:
|
||||
case JPEG:
|
||||
case GIF:
|
||||
case TIFF:
|
||||
case WMF:
|
||||
case EMF:
|
||||
case BMP:
|
||||
return TestAllFiles.NullFileHandler.instance;
|
||||
}
|
||||
|
||||
System.out.println("Did not get a handler for extension " + TestAllFiles.getExtension(file) +
|
||||
" of file " + file + ": " + magic);
|
||||
return TestAllFiles.NullFileHandler.instance;
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
@SuppressForbidden("Just an ignored test")
|
||||
void testInvalidFile() throws IOException, InterruptedException {
|
||||
FileHandler fileHandler = POIFileScanner.getFileHandler(new File("/usbc/CommonCrawl"),
|
||||
"www.bgs.ac.uk_downloads_directdownload.cfm_id=2362&noexcl=true&t=west_20sussex_20-_20building_20stone_20quarries");
|
||||
|
||||
assertEquals(XSSFFileHandler.class, fileHandler.getClass());
|
||||
|
||||
// to show the output from ZipFile() from commons-compress
|
||||
// although I did not find out yet why the ZipFile is not closed here
|
||||
System.gc();
|
||||
Thread.sleep(1000);
|
||||
System.gc();
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDetectUnnamedFile() throws IOException {
|
||||
File root = new File(ROOT_DIR, "spreadsheet");
|
||||
assertDoesNotThrow(() -> POIFileScanner.detectUnnamedFile(root, "49156.xlsx"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void test() throws IOException {
|
||||
assertDoesNotThrow(() -> POIFileScanner.scan(ROOT_DIR));
|
||||
}
|
||||
}
|
|
@ -17,32 +17,48 @@
|
|||
package org.apache.poi.stress;
|
||||
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.poi.OldFileFormatException;
|
||||
import org.apache.commons.collections4.MultiValuedMap;
|
||||
import org.apache.commons.collections4.multimap.ArrayListValuedHashMap;
|
||||
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
|
||||
import org.apache.poi.poifs.crypt.Decryptor;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.CellType;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.apache.tools.ant.DirectoryScanner;
|
||||
import org.junit.jupiter.api.function.Executable;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
import org.opentest4j.TestAbortedException;
|
||||
import org.opentest4j.AssertionFailedError;
|
||||
|
||||
/**
|
||||
* This is an integration test which performs various actions on all stored test-files and tries
|
||||
|
@ -68,395 +84,256 @@ import org.opentest4j.TestAbortedException;
|
|||
* here as well! This is to ensure that files that should not work really do not work, e.g.
|
||||
* that we do not remove expected sanity checks.
|
||||
*/
|
||||
class TestAllFiles {
|
||||
// also need to set JVM parameter: -Djunit.jupiter.execution.parallel.enabled=true
|
||||
@Execution(ExecutionMode.CONCURRENT)
|
||||
public class TestAllFiles {
|
||||
private static final File ROOT_DIR = new File("test-data");
|
||||
private static final boolean IGNORE_SCRATCHPAD = Boolean.getBoolean("scratchpad.ignore");
|
||||
|
||||
public static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" };
|
||||
|
||||
private static final Map<String,String> FILE_PASSWORD;
|
||||
|
||||
|
||||
// map file extensions to the actual mappers
|
||||
public static final Map<String, FileHandler> HANDLERS = new HashMap<>();
|
||||
public static final String[] SCAN_EXCLUDES = {
|
||||
"**/.svn/**",
|
||||
"lost+found",
|
||||
"**/.git/**",
|
||||
};
|
||||
|
||||
static {
|
||||
// Excel
|
||||
HANDLERS.put(".xls", new HSSFFileHandler());
|
||||
HANDLERS.put(".xlsx", new XSSFFileHandler());
|
||||
HANDLERS.put(".xlsm", new XSSFFileHandler());
|
||||
HANDLERS.put(".xltx", new XSSFFileHandler());
|
||||
HANDLERS.put(".xlsb", new XSSFBFileHandler());
|
||||
|
||||
// Word
|
||||
HANDLERS.put(".doc", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HWPFFileHandler());
|
||||
HANDLERS.put(".docx", new XWPFFileHandler());
|
||||
HANDLERS.put(".dotx", new XWPFFileHandler());
|
||||
HANDLERS.put(".docm", new XWPFFileHandler());
|
||||
|
||||
// OpenXML4J files
|
||||
HANDLERS.put(".ooxml", new OPCFileHandler());
|
||||
HANDLERS.put(".zip", new OPCFileHandler());
|
||||
|
||||
// Powerpoint
|
||||
HANDLERS.put(".ppt", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSLFFileHandler());
|
||||
HANDLERS.put(".pot", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSLFFileHandler());
|
||||
HANDLERS.put(".pptx", new XSLFFileHandler());
|
||||
HANDLERS.put(".pptm", new XSLFFileHandler());
|
||||
HANDLERS.put(".ppsm", new XSLFFileHandler());
|
||||
HANDLERS.put(".ppsx", new XSLFFileHandler());
|
||||
HANDLERS.put(".thmx", new XSLFFileHandler());
|
||||
HANDLERS.put(".potx", new XSLFFileHandler());
|
||||
|
||||
// Outlook
|
||||
HANDLERS.put(".msg", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSMFFileHandler());
|
||||
|
||||
// Publisher
|
||||
HANDLERS.put(".pub", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HPBFFileHandler());
|
||||
|
||||
// Visio - binary
|
||||
HANDLERS.put(".vsd", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HDGFFileHandler());
|
||||
|
||||
// Visio - ooxml
|
||||
HANDLERS.put(".vsdm", new XDGFFileHandler());
|
||||
HANDLERS.put(".vsdx", new XDGFFileHandler());
|
||||
HANDLERS.put(".vssm", new XDGFFileHandler());
|
||||
HANDLERS.put(".vssx", new XDGFFileHandler());
|
||||
HANDLERS.put(".vstm", new XDGFFileHandler());
|
||||
HANDLERS.put(".vstx", new XDGFFileHandler());
|
||||
|
||||
// Visio - not handled yet
|
||||
HANDLERS.put(".vst", NullFileHandler.instance);
|
||||
HANDLERS.put(".vss", NullFileHandler.instance);
|
||||
|
||||
// POIFS
|
||||
HANDLERS.put(".ole2", new POIFSFileHandler());
|
||||
|
||||
// Microsoft Admin Template?
|
||||
HANDLERS.put(".adm", new HPSFFileHandler());
|
||||
|
||||
// Microsoft TNEF
|
||||
HANDLERS.put(".dat", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HMEFFileHandler());
|
||||
|
||||
// TODO: are these readable by some of the formats?
|
||||
HANDLERS.put(".wri", NullFileHandler.instance);
|
||||
HANDLERS.put(".shw", NullFileHandler.instance);
|
||||
HANDLERS.put(".zvi", NullFileHandler.instance);
|
||||
HANDLERS.put(".mpp", NullFileHandler.instance);
|
||||
HANDLERS.put(".qwp", NullFileHandler.instance);
|
||||
HANDLERS.put(".wps", NullFileHandler.instance);
|
||||
HANDLERS.put(".bin", NullFileHandler.instance);
|
||||
HANDLERS.put(".xps", NullFileHandler.instance);
|
||||
HANDLERS.put(".sldprt", NullFileHandler.instance);
|
||||
HANDLERS.put(".mdb", NullFileHandler.instance);
|
||||
HANDLERS.put(".vml", NullFileHandler.instance);
|
||||
|
||||
// ignore some file types, images, other formats, ...
|
||||
HANDLERS.put(".txt", NullFileHandler.instance);
|
||||
HANDLERS.put(".pdf", NullFileHandler.instance);
|
||||
HANDLERS.put(".rtf", NullFileHandler.instance);
|
||||
HANDLERS.put(".gif", NullFileHandler.instance);
|
||||
HANDLERS.put(".html", NullFileHandler.instance);
|
||||
HANDLERS.put(".png", NullFileHandler.instance);
|
||||
HANDLERS.put(".wmf", NullFileHandler.instance);
|
||||
HANDLERS.put(".emf", NullFileHandler.instance);
|
||||
HANDLERS.put(".dib", NullFileHandler.instance);
|
||||
HANDLERS.put(".svg", NullFileHandler.instance);
|
||||
HANDLERS.put(".pict", NullFileHandler.instance);
|
||||
HANDLERS.put(".jpg", NullFileHandler.instance);
|
||||
HANDLERS.put(".jpeg", NullFileHandler.instance);
|
||||
HANDLERS.put(".tif", NullFileHandler.instance);
|
||||
HANDLERS.put(".tiff", NullFileHandler.instance);
|
||||
HANDLERS.put(".wav", NullFileHandler.instance);
|
||||
HANDLERS.put(".xml", NullFileHandler.instance);
|
||||
HANDLERS.put(".csv", NullFileHandler.instance);
|
||||
HANDLERS.put(".ods", NullFileHandler.instance);
|
||||
HANDLERS.put(".ttf", NullFileHandler.instance);
|
||||
HANDLERS.put(".fntdata", NullFileHandler.instance);
|
||||
// VBA source files
|
||||
HANDLERS.put(".vba", NullFileHandler.instance);
|
||||
HANDLERS.put(".bas", NullFileHandler.instance);
|
||||
HANDLERS.put(".frm", NullFileHandler.instance);
|
||||
HANDLERS.put(".frx", NullFileHandler.instance); //binary
|
||||
HANDLERS.put(".cls", NullFileHandler.instance);
|
||||
|
||||
// map some files without extension
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2CR1", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2CR2", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2CR3", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2CR4", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2CR5", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2CR6", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecord2CR7", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/BigSSTRecordCR", NullFileHandler.instance);
|
||||
HANDLERS.put("spreadsheet/test_properties1", NullFileHandler.instance);
|
||||
|
||||
// keystore files
|
||||
HANDLERS.put(".pfx", NullFileHandler.instance);
|
||||
HANDLERS.put(".pem", NullFileHandler.instance);
|
||||
HANDLERS.put(".jks", NullFileHandler.instance);
|
||||
HANDLERS.put(".pkcs12", NullFileHandler.instance);
|
||||
|
||||
Map<String,String> passmap = new HashMap<>();
|
||||
passmap.put("slideshow/Password_Protected-hello.ppt", "hello");
|
||||
passmap.put("slideshow/Password_Protected-56-hello.ppt", "hello");
|
||||
passmap.put("slideshow/Password_Protected-np-hello.ppt", "hello");
|
||||
passmap.put("slideshow/cryptoapi-proc2356.ppt", "crypto");
|
||||
passmap.put("spreadsheet/xor-encryption-abc.xls", "abc");
|
||||
passmap.put("spreadsheet/35897-type4.xls", "freedom");
|
||||
passmap.put("spreadsheet/58616.xlsx", Decryptor.DEFAULT_PASSWORD);
|
||||
passmap.put("spreadsheet/password.xls", "password");
|
||||
passmap.put("spreadsheet/protected_passtika.xlsx", "tika");
|
||||
passmap.put("document/bug53475-password-is-pass.docx", "pass");
|
||||
passmap.put("document/bug53475-password-is-solrcell.docx", "solrcell");
|
||||
passmap.put("document/password_password_cryptoapi.doc", "password");
|
||||
passmap.put("document/password_tika_binaryrc4.doc", "tika");
|
||||
passmap.put("poifs/protect.xlsx", Decryptor.DEFAULT_PASSWORD);
|
||||
passmap.put("poifs/extenxls_pwd123.xlsx", "pwd123");
|
||||
passmap.put("poifs/protected_agile.docx", Decryptor.DEFAULT_PASSWORD);
|
||||
passmap.put("poifs/60320-protected.xlsx", "Test001!!");
|
||||
passmap.put("poifs/protected_sha512.xlsx", "this is a test");
|
||||
|
||||
FILE_PASSWORD = Collections.unmodifiableMap(passmap);
|
||||
public static Stream<Arguments> allfiles(String testName) throws IOException {
|
||||
MultiValuedMap<String, ExcInfo> exMap;
|
||||
Map<String,String> handlerMap;
|
||||
try (Workbook wb = WorkbookFactory.create(new File(ROOT_DIR, "spreadsheet/stress.xls"))) {
|
||||
exMap = readExMap(wb.getSheet("Exceptions"));
|
||||
handlerMap = readHandlerMap(wb.getSheet("Handlers"));
|
||||
}
|
||||
|
||||
private static Set<String> unmodifiableHashSet(String... a) {
|
||||
return Collections.unmodifiableSet(hashSet(a));
|
||||
}
|
||||
private static Set<String> hashSet(String... a) {
|
||||
return new HashSet<>(Arrays.asList(a));
|
||||
}
|
||||
|
||||
// Old Word Documents where we can at least extract some text
|
||||
private static final Set<String> OLD_FILES_HWPF = unmodifiableHashSet(
|
||||
"document/Bug49933.doc",
|
||||
"document/Bug51944.doc",
|
||||
"document/Word6.doc",
|
||||
"document/Word6_sections.doc",
|
||||
"document/Word6_sections2.doc",
|
||||
"document/Word95.doc",
|
||||
"document/word95err.doc",
|
||||
"document/Bug60936.doc",
|
||||
"document/Bug60942.doc",
|
||||
"document/Bug60942b.doc",
|
||||
"document/cn.orthodox.www_divenbog_APRIL_30-APRIL.DOC",
|
||||
"hpsf/TestMickey.doc",
|
||||
"document/52117.doc",
|
||||
"hpsf/TestInvertedClassID.doc",
|
||||
"hpsf/TestBug52117.doc"
|
||||
);
|
||||
|
||||
private static final Set<String> EXPECTED_FAILURES = unmodifiableHashSet(
|
||||
// password protected files without known password
|
||||
"spreadsheet/51832.xls",
|
||||
"spreadsheet/64759.xlsx",
|
||||
"document/PasswordProtected.doc",
|
||||
|
||||
// TODO: fails XMLExportTest, is this ok?
|
||||
"spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx",
|
||||
"spreadsheet/55864.xlsx",
|
||||
"spreadsheet/57890.xlsx",
|
||||
"spreadsheet/xxe_in_schema.xlsx",
|
||||
|
||||
// TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
|
||||
"spreadsheet/44958.xls",
|
||||
"spreadsheet/44958_1.xls",
|
||||
"spreadsheet/testArraysAndTables.xls",
|
||||
|
||||
// TODO: good to ignore?
|
||||
"spreadsheet/sample-beta.xlsx",
|
||||
"document/cpansearch.perl.org_src_tobyink_acme-rundoc-0.001_word-lib_hello_world.docm",
|
||||
|
||||
// This is actually a spreadsheet!
|
||||
"hpsf/TestRobert_Flaherty.doc",
|
||||
|
||||
// some files that are broken, eg Word 95, ...
|
||||
"spreadsheet/43493.xls",
|
||||
"spreadsheet/46904.xls",
|
||||
"document/Bug50955.doc",
|
||||
"document/57843.doc",
|
||||
"slideshow/PPT95.ppt",
|
||||
"slideshow/pp40only.ppt",
|
||||
"slideshow/Divino_Revelado.pptx",
|
||||
"openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx",
|
||||
"openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx",
|
||||
"openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx",
|
||||
"openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx",
|
||||
"openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx",
|
||||
"openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx",
|
||||
"openxml4j/OPCCompliance_DerivedPartNameFAIL.docx",
|
||||
"openxml4j/invalid.xlsx",
|
||||
"openxml4j/62592.thmx",
|
||||
"spreadsheet/54764-2.xlsx", // see TestXSSFBugs.bug54764()
|
||||
"spreadsheet/54764.xlsx", // see TestXSSFBugs.bug54764()
|
||||
"poifs/unknown_properties.msg", // POIFS properties corrupted
|
||||
"poifs/only-zero-byte-streams.ole2", // No actual contents
|
||||
"spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion
|
||||
"spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion
|
||||
"spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion
|
||||
"document/61612a.docx",
|
||||
"document/word2.doc",
|
||||
"spreadsheet/xlsx-corrupted.xlsx",
|
||||
"integration/stress025.docx",
|
||||
|
||||
// old Excel files, which we only support simple text extraction of
|
||||
"spreadsheet/testEXCEL_2.xls",
|
||||
"spreadsheet/testEXCEL_3.xls",
|
||||
"spreadsheet/testEXCEL_4.xls",
|
||||
"spreadsheet/testEXCEL_5.xls",
|
||||
"spreadsheet/testEXCEL_95.xls",
|
||||
"spreadsheet/59074.xls",
|
||||
"spreadsheet/60284.xls",
|
||||
"spreadsheet/64130.xls",
|
||||
|
||||
// OOXML Strict is not yet supported, see bug #57699
|
||||
"spreadsheet/SampleSS.strict.xlsx",
|
||||
"spreadsheet/SimpleStrict.xlsx",
|
||||
"spreadsheet/sample.strict.xlsx",
|
||||
"spreadsheet/57914.xlsx",
|
||||
|
||||
// files with XML entities
|
||||
"openxml4j/ContentTypeHasEntities.ooxml",
|
||||
|
||||
// non-TNEF files
|
||||
"ddf/Container.dat",
|
||||
"ddf/47143.dat",
|
||||
|
||||
// sheet cloning errors
|
||||
"spreadsheet/56450.xls",
|
||||
// "spreadsheet/OddStyleRecord.xls",
|
||||
|
||||
// msg files with non-standard encodings
|
||||
"hsmf/ASCII_CP1251_LCID1049.msg",
|
||||
"hsmf/ASCII_UTF-8_CP1252_LCID1031.msg",
|
||||
"hsmf/ASCII_UTF-8_CP1252_LCID1031_HTML.msg",
|
||||
"hsmf/HTMLBodyBinary_CP1251.msg",
|
||||
"hsmf/HTMLBodyBinary_UTF-8.msg"
|
||||
);
|
||||
|
||||
private static final Set<String> IGNORED = unmodifiableHashSet(
|
||||
// OPC handler works / XSSF handler fails
|
||||
"spreadsheet/57181.xlsm",
|
||||
"spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop
|
||||
);
|
||||
|
||||
public static Stream<Arguments> files() {
|
||||
DirectoryScanner scanner = new DirectoryScanner();
|
||||
scanner.setBasedir(ROOT_DIR);
|
||||
scanner.setExcludes(SCAN_EXCLUDES);
|
||||
|
||||
scanner.scan();
|
||||
|
||||
System.out.println("Handling " + scanner.getIncludedFiles().length + " files");
|
||||
final List<Arguments> result = new ArrayList<>(100);
|
||||
for (String file : scanner.getIncludedFiles()) {
|
||||
// ... failures/handlers lookup doesn't work on windows otherwise
|
||||
final String uniFile = file.replace('\\', '/');
|
||||
|
||||
List<Arguments> files = new ArrayList<>();
|
||||
for(String file : scanner.getIncludedFiles()) {
|
||||
file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise
|
||||
if (IGNORED.contains(file)) {
|
||||
System.out.println("Ignoring " + file);
|
||||
String firstHandler = handlerMap.entrySet().stream()
|
||||
.filter(me -> uniFile.endsWith(me.getKey()))
|
||||
.map(Map.Entry::getValue).findFirst().orElse("NULL");
|
||||
|
||||
final String[] handlerStr = { firstHandler, secondHandler(firstHandler) };
|
||||
for (String hs : handlerStr) {
|
||||
if ("NULL".equals(hs)) continue;
|
||||
ExcInfo info1 = exMap.get(file).stream()
|
||||
.filter(e ->
|
||||
(e.tests == null || e.tests.contains(testName) || "IGNORE".equals(e.tests)) &&
|
||||
(e.handler == null || e.handler.contains(hs))
|
||||
).findFirst().orElse(null);
|
||||
|
||||
if (info1 == null || !"IGNORE".equals(info1.tests)) {
|
||||
result.add(Arguments.of(
|
||||
file,
|
||||
hs,
|
||||
(info1 != null) ? info1.password : null,
|
||||
(info1 != null) ? info1.exClazz : null,
|
||||
(info1 != null) ? info1.exMessage : null
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result.stream();
|
||||
}
|
||||
|
||||
public static Stream<Arguments> extractFiles() throws IOException {
|
||||
return allfiles("extract");
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = "#{index} {0} {1}")
|
||||
@MethodSource("extractFiles")
|
||||
void handleExtracting(String file, String handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException {
|
||||
System.out.println("Running extractFiles on "+file);
|
||||
FileHandler fileHandler = Handler.valueOf(handler).fileHandler.get();
|
||||
assertNotNull(fileHandler, "Did not find a handler for file " + file);
|
||||
Executable exec = () -> fileHandler.handleExtracting(new File(ROOT_DIR, file));
|
||||
verify(exec, exClass, exMessage, password);
|
||||
}
|
||||
|
||||
|
||||
public static Stream<Arguments> handleFiles() throws IOException {
|
||||
return allfiles("handle");
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = "#{index} {0} {1}")
|
||||
@MethodSource("handleFiles")
|
||||
void handleFile(String file, String handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException {
|
||||
System.out.println("Running handleFiles on "+file);
|
||||
FileHandler fileHandler = Handler.valueOf(handler).fileHandler.get();
|
||||
assertNotNull(fileHandler, "Did not find a handler for file " + file);
|
||||
try (InputStream stream = new BufferedInputStream(new FileInputStream(new File(ROOT_DIR, file)), 64 * 1024)) {
|
||||
Executable exec = () -> fileHandler.handleFile(stream, file);
|
||||
verify(exec, exClass, exMessage, password);
|
||||
}
|
||||
}
|
||||
|
||||
public static Stream<Arguments> handleAdditionals() throws IOException {
|
||||
return allfiles("additional");
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = "#{index} {0} {1}")
|
||||
@MethodSource("handleAdditionals")
|
||||
void handleAdditional(String file, String handler, String password, Class<? extends Throwable> exClass, String exMessage) {
|
||||
System.out.println("Running additionals on "+file);
|
||||
FileHandler fileHandler = Handler.valueOf(handler).fileHandler.get();
|
||||
assertNotNull(fileHandler, "Did not find a handler for file " + file);
|
||||
Executable exec = () -> fileHandler.handleAdditional(new File(ROOT_DIR, file));
|
||||
verify(exec, exClass, exMessage, password);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static void verify(Executable exec, Class<? extends Throwable> exClass, String exMessage, String password) {
|
||||
// this also removes the password for non encrypted files
|
||||
Biff8EncryptionKey.setCurrentUserPassword(password);
|
||||
if (exClass != null && AssertionFailedError.class.isAssignableFrom(exClass)) {
|
||||
try {
|
||||
exec.execute();
|
||||
fail("expected failed assertion");
|
||||
} catch (AssertionFailedError e) {
|
||||
assertEquals(exMessage, e.getMessage());
|
||||
} catch (Throwable e) {
|
||||
fail("unexpected exception", e);
|
||||
}
|
||||
} else if (exClass != null) {
|
||||
Exception e = assertThrows((Class<? extends Exception>)exClass, exec);
|
||||
String actMsg = e.getMessage();
|
||||
if (exMessage == null) {
|
||||
assertNull(actMsg);
|
||||
} else {
|
||||
assertNotNull(actMsg);
|
||||
assertTrue(actMsg.startsWith(exMessage), "Message: "+actMsg+" - didn't start with "+exMessage);
|
||||
}
|
||||
} else {
|
||||
assertDoesNotThrow(exec);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static String secondHandler(String handlerStr) {
|
||||
switch (handlerStr) {
|
||||
case "XSSF":
|
||||
case "XWPF":
|
||||
case "XSLF":
|
||||
case "XDGF":
|
||||
return "OPC";
|
||||
case "HSSF":
|
||||
case "HWPF":
|
||||
case "HSLF":
|
||||
case "HDGF":
|
||||
case "HSMF":
|
||||
case "HBPF":
|
||||
return "HPSF";
|
||||
default:
|
||||
return "NULL";
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String,String> readHandlerMap(Sheet sh) {
|
||||
Map<String,String> handlerMap = new LinkedHashMap<>();
|
||||
boolean IGNORE_SCRATCHPAD = Boolean.getBoolean("scratchpad.ignore");
|
||||
boolean isFirst = true;
|
||||
for (Row row : sh) {
|
||||
if (isFirst) {
|
||||
isFirst = false;
|
||||
continue;
|
||||
}
|
||||
FileHandler handler = HANDLERS.get(getExtension(file));
|
||||
files.add(Arguments.of( file, handler ));
|
||||
|
||||
// for some file-types also run OPCFileHandler
|
||||
if(handler instanceof XSSFFileHandler ||
|
||||
handler instanceof XWPFFileHandler ||
|
||||
handler instanceof XSLFFileHandler ||
|
||||
handler instanceof XDGFFileHandler) {
|
||||
files.add(Arguments.of( file, new OPCFileHandler() ));
|
||||
Cell cell = row.getCell(2);
|
||||
if (IGNORE_SCRATCHPAD || cell == null || cell.getCellType() != CellType.STRING) {
|
||||
cell = row.getCell(1);
|
||||
}
|
||||
handlerMap.put(row.getCell(0).getStringCellValue(), cell.getStringCellValue());
|
||||
}
|
||||
return handlerMap;
|
||||
}
|
||||
|
||||
if (handler instanceof HSSFFileHandler ||
|
||||
handler instanceof HSLFFileHandler ||
|
||||
handler instanceof HWPFFileHandler ||
|
||||
handler instanceof HDGFFileHandler) {
|
||||
files.add(Arguments.of( file, new HPSFFileHandler() ));
|
||||
|
||||
private static MultiValuedMap<String, ExcInfo> readExMap(Sheet sh) {
|
||||
MultiValuedMap<String, ExcInfo> exMap = new ArrayListValuedHashMap<>();
|
||||
|
||||
Iterator<Row> iter = sh.iterator();
|
||||
List<BiConsumer<ExcInfo,String>> cols = initCols(iter.next());
|
||||
|
||||
while (iter.hasNext()) {
|
||||
ExcInfo info = new ExcInfo();
|
||||
for (Cell cell : iter.next()) {
|
||||
if (cell.getCellType() == CellType.STRING) {
|
||||
cols.get(cell.getColumnIndex()).accept(info, cell.getStringCellValue());
|
||||
}
|
||||
}
|
||||
|
||||
return files.stream();
|
||||
exMap.put(info.file, info);
|
||||
}
|
||||
return exMap;
|
||||
}
|
||||
|
||||
// the display name annotation is ignored by ants junitlauncher listeners :(
|
||||
// ... even when using a custom display name generator
|
||||
@ParameterizedTest(name = "#{index} {0}" )
|
||||
@MethodSource("files")
|
||||
void testAllFiles(String file, FileHandler handler) throws Exception {
|
||||
assertNotNull(handler, "Did not find a handler for file " + file);
|
||||
|
||||
// this also removes the password for non encrypted files
|
||||
String pass = TestAllFiles.FILE_PASSWORD.get(file);
|
||||
Biff8EncryptionKey.setCurrentUserPassword(pass);
|
||||
|
||||
|
||||
System.out.println("Reading " + file + " with " + handler.getClass().getSimpleName());
|
||||
assertNotNull( handler, "Unknown file extension for file: " + file + ": " + getExtension(file) );
|
||||
File inputFile = new File(ROOT_DIR, file);
|
||||
|
||||
// special cases where docx-handling breaks, but OPCPackage handling works
|
||||
boolean ignoredOPC = (file.endsWith(".docx") || file.endsWith(".xlsx") ||
|
||||
file.endsWith(".xlsb") || file.endsWith(".pptx")) &&
|
||||
handler instanceof OPCFileHandler;
|
||||
boolean ignoreHPSF = (handler instanceof HPSFFileHandler);
|
||||
|
||||
private static List<BiConsumer<ExcInfo,String>> initCols(Row row) {
|
||||
Map<String,BiConsumer<ExcInfo,String>> m = new HashMap<>();
|
||||
m.put("File", (e,s) -> e.file = s);
|
||||
m.put("Tests", (e,s) -> e.tests = s);
|
||||
m.put("Handler", (e,s) -> e.handler = s);
|
||||
m.put("Password", (e,s) -> e.password = s);
|
||||
m.put("Exception Class", (e,s) -> {
|
||||
try {
|
||||
try (InputStream stream = new BufferedInputStream(new FileInputStream(inputFile), 64 * 1024)) {
|
||||
handler.handleFile(stream, file);
|
||||
assertFalse( OLD_FILES_HWPF.contains(file) && !ignoreHPSF, "Expected to fail for file " + file + " and handler " + handler + ", but did not fail!" );
|
||||
e.exClazz = (Class<? extends Exception>) Class.forName(s);
|
||||
} catch (ClassNotFoundException ex) {
|
||||
fail(ex);
|
||||
}
|
||||
});
|
||||
m.put("Exception Message", (e,s) -> e.exMessage = s);
|
||||
|
||||
return StreamSupport
|
||||
.stream(row.spliterator(), false)
|
||||
.map(Cell::getStringCellValue)
|
||||
.map(v -> m.getOrDefault(v, (e,s) -> {}))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
handler.handleExtracting(inputFile);
|
||||
private static class ExcInfo {
|
||||
String file;
|
||||
String tests;
|
||||
String handler;
|
||||
String password;
|
||||
Class<? extends Throwable> exClazz;
|
||||
String exMessage;
|
||||
|
||||
|
||||
assertFalse( EXPECTED_FAILURES.contains(file) && !ignoredOPC && !ignoreHPSF, "Expected to fail for file " + file + " and handler " + handler + ", but did not fail!" );
|
||||
} catch (OldFileFormatException e) {
|
||||
// for old word files we should still support extracting text
|
||||
if(OLD_FILES_HWPF.contains(file)) {
|
||||
handler.handleExtracting(inputFile);
|
||||
} else {
|
||||
// check if we expect failure for this file
|
||||
if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
|
||||
System.out.println("Failed: " + file);
|
||||
throw new Exception("While handling " + file, e);
|
||||
}
|
||||
}
|
||||
} catch (TestAbortedException e) {
|
||||
// file handler ignored this file
|
||||
} catch (Exception e) {
|
||||
// check if we expect failure for this file
|
||||
if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
|
||||
System.out.println("Failed: " + file);
|
||||
throw new Exception("While handling " + file, e);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// let some file handlers do additional stuff
|
||||
handler.handleAdditional(inputFile);
|
||||
} catch (TestAbortedException e) {
|
||||
// file handler ignored this file
|
||||
} catch (Exception e) {
|
||||
if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
|
||||
System.out.println("Failed: " + file);
|
||||
throw new Exception("While handling " + file, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@SuppressWarnings("unused")
|
||||
private enum Handler {
|
||||
HDGF(HDGFFileHandler::new),
|
||||
HMEF(HMEFFileHandler::new),
|
||||
HPBF(HPBFFileHandler::new),
|
||||
HPSF(HPSFFileHandler::new),
|
||||
HSLF(HSLFFileHandler::new),
|
||||
HSMF(HSMFFileHandler::new),
|
||||
HSSF(HSSFFileHandler::new),
|
||||
HWPF(HWPFFileHandler::new),
|
||||
OPC(OPCFileHandler::new),
|
||||
POIFS(POIFSFileHandler::new),
|
||||
XDGF(XDGFFileHandler::new),
|
||||
XSLF(XSLFFileHandler::new),
|
||||
XSSFB(XSSFBFileHandler::new),
|
||||
XSSF(XSSFFileHandler::new),
|
||||
XWPF(XWPFFileHandler::new),
|
||||
OWPF(OWPFFileHandler::new),
|
||||
NULL(NullFileHandler::new)
|
||||
;
|
||||
|
||||
public static String getExtension(String file) {
|
||||
int pos = file.lastIndexOf('.');
|
||||
if(pos == -1 || pos == file.length()-1) {
|
||||
return file;
|
||||
final Supplier<FileHandler> fileHandler;
|
||||
Handler(Supplier<FileHandler> fileHandler) {
|
||||
this.fileHandler = fileHandler;
|
||||
}
|
||||
|
||||
return file.substring(pos).toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
public static class NullFileHandler implements FileHandler {
|
||||
public static final FileHandler instance = new NullFileHandler();
|
||||
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) {
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.io.FileInputStream;
|
|||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.extractor.ExtractorFactory;
|
||||
import org.apache.poi.ooxml.POIXMLException;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
||||
|
@ -32,8 +33,9 @@ import org.junit.jupiter.api.Test;
|
|||
class XSLFFileHandler extends SlideShowHandler {
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
XMLSlideShow slide = new XMLSlideShow(stream);
|
||||
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage());
|
||||
try (XMLSlideShow slide = new XMLSlideShow(stream);
|
||||
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
|
||||
;
|
||||
assertNotNull(slideInner.getPresentation());
|
||||
assertNotNull(slideInner.getSlideMasterReferences());
|
||||
assertNotNull(slideInner.getSlideReferences());
|
||||
|
@ -41,9 +43,10 @@ class XSLFFileHandler extends SlideShowHandler {
|
|||
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
|
||||
|
||||
handleSlideShow(slide);
|
||||
|
||||
slideInner.close();
|
||||
slide.close();
|
||||
} catch (POIXMLException e) {
|
||||
Exception cause = (Exception)e.getCause();
|
||||
throw cause == null ? e : cause;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.ooxml.POIXMLException;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
|
@ -30,9 +31,13 @@ class XWPFFileHandler extends AbstractFileHandler {
|
|||
// ignore password protected files
|
||||
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
|
||||
|
||||
XWPFDocument doc = new XWPFDocument(stream);
|
||||
try (XWPFDocument doc = new XWPFDocument(stream)) {
|
||||
|
||||
new POIXMLDocumentHandler().handlePOIXMLDocument(doc);
|
||||
} catch (POIXMLException e) {
|
||||
Exception cause = (Exception)e.getCause();
|
||||
throw cause == null ? e : cause;
|
||||
}
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
|
|
|
@ -24,13 +24,15 @@ public class Configurator {
|
|||
private static POILogger logger = POILogFactory.getLogger(Configurator.class);
|
||||
|
||||
public static int getIntValue(String systemProperty, int defaultValue) {
|
||||
int result = defaultValue;
|
||||
String property = System.getProperty(systemProperty);
|
||||
if (property == null || "".equals(property) || "null".equals(property)) {
|
||||
return defaultValue;
|
||||
}
|
||||
try {
|
||||
result = Integer.parseInt(property);
|
||||
return Integer.parseInt(property);
|
||||
} catch (Exception e) {
|
||||
logger.log(POILogger.ERROR, "System property -D", systemProperty, " does not contains a valid integer: ", property);
|
||||
return defaultValue;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ module org.apache.poi.stress {
|
|||
requires net.bytebuddy;
|
||||
requires java.desktop;
|
||||
|
||||
requires org.apache.commons.collections4;
|
||||
requires org.apache.poi.examples;
|
||||
|
||||
exports org.apache.poi.stress;
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue