mirror of https://github.com/apache/poi.git
Add some more code from the separate integration test project to be able to publish the femaining functionality as separate project at some point
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1811144 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9f1e234ee4
commit
56254a17c4
|
@ -0,0 +1,138 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi;
|
||||
|
||||
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
|
||||
import org.apache.poi.hssf.OldExcelFormatException;
|
||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||
import org.apache.poi.stress.*;
|
||||
import org.junit.Assume;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.zip.ZipException;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
||||
public class BaseIntegrationTest {
|
||||
private final File rootDir;
|
||||
private String file;
|
||||
private FileHandler handler;
|
||||
|
||||
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
|
||||
this.rootDir = rootDir;
|
||||
this.file = file;
|
||||
this.handler = handler;
|
||||
}
|
||||
|
||||
public void test() throws Exception {
|
||||
assertNotNull("Unknown file extension for file: " + file + ": " + TestAllFiles.getExtension(file), handler);
|
||||
|
||||
File inputFile = new File(rootDir, file);
|
||||
try {
|
||||
handleFile(inputFile);
|
||||
} catch (OfficeXmlFileException e) {
|
||||
// check if the file-extension is wrong
|
||||
if(!e.getMessage().contains("data appears to be in the Office 2007")) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
|
||||
handleWrongExtension(inputFile, e);
|
||||
} catch (OldWordFileFormatException | OldExcelFormatException | OldPowerPointFormatException e) {
|
||||
// at least perform extracting tests on these old files
|
||||
} catch (OldFileFormatException e) {
|
||||
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
|
||||
//noinspection ConstantConditions
|
||||
Assume.assumeFalse("File " + file + " excluded because it is unsupported old Excel format", true);
|
||||
} catch (EncryptedDocumentException e) {
|
||||
// Do not try to read encrypted files
|
||||
//noinspection ConstantConditions
|
||||
Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true);
|
||||
} catch (ZipException e) {
|
||||
// some files are corrupted
|
||||
if (e.getMessage().equals("unexpected EOF")) {
|
||||
//noinspection ConstantConditions
|
||||
Assume.assumeFalse("File " + file + " excluded because the Zip file is incomplete", true);
|
||||
}
|
||||
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
// sometimes binary format has XML-format-extension...
|
||||
if(e.getMessage().contains("rong file format or file extension for OO XML file")) {
|
||||
handleWrongExtension(inputFile, e);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ignore errors for documents with incorrect extension
|
||||
String message = e.getMessage();
|
||||
if(message != null && (message.equals("The document is really a RTF file") ||
|
||||
message.equals("The document is really a PDF file") ||
|
||||
message.equals("The document is really a HTML file"))) {
|
||||
//noinspection ConstantConditions
|
||||
Assume.assumeFalse("File " + file + " excluded because it is actually a PDF/RTF file", true);
|
||||
}
|
||||
|
||||
if(e.getMessage().equals("The document is really a OOXML file")) {
|
||||
handleWrongExtension(inputFile, e);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
handler.handleExtracting(inputFile);
|
||||
} catch (EncryptedDocumentException e) {
|
||||
// Do not try to read encrypted files
|
||||
//noinspection ConstantConditions
|
||||
Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true);
|
||||
}
|
||||
}
|
||||
|
||||
void handleWrongExtension(File inputFile, Exception e) throws Exception {
|
||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
|
||||
if (handler instanceof HWPFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".docx");
|
||||
handleFile(inputFile);
|
||||
} else if (handler instanceof HSSFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".xlsx");
|
||||
handleFile(inputFile);
|
||||
} else if (handler instanceof HSLFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".pptx");
|
||||
handleFile(inputFile);
|
||||
// and the other way around, use HWPF instead of XWPF and so forth
|
||||
} else if(handler instanceof XWPFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".doc");
|
||||
handleFile(inputFile);
|
||||
} else if(handler instanceof XSSFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".xls");
|
||||
handleFile(inputFile);
|
||||
} else if(handler instanceof XSLFFileHandler) {
|
||||
handler = TestAllFiles.HANDLERS.get(".ppt");
|
||||
handleFile(inputFile);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private void handleFile(File inputFile) throws Exception {
|
||||
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
|
||||
handler.handleFile(newStream, inputFile.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -91,13 +91,13 @@ import org.junit.runners.Parameterized.Parameters;
|
|||
public class TestAllFiles {
|
||||
private static final File ROOT_DIR = new File("test-data");
|
||||
|
||||
static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" };
|
||||
public static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" };
|
||||
|
||||
private static final Map<String,String> FILE_PASSWORD;
|
||||
|
||||
|
||||
// map file extensions to the actual mappers
|
||||
static final Map<String, FileHandler> HANDLERS = new HashMap<>();
|
||||
public static final Map<String, FileHandler> HANDLERS = new HashMap<>();
|
||||
static {
|
||||
// Excel
|
||||
HANDLERS.put(".xls", new HSSFFileHandler());
|
||||
|
@ -443,7 +443,7 @@ public class TestAllFiles {
|
|||
handler.handleAdditional(inputFile);
|
||||
}
|
||||
|
||||
static String getExtension(String file) {
|
||||
public static String getExtension(String file) {
|
||||
int pos = file.lastIndexOf('.');
|
||||
if(pos == -1 || pos == file.length()-1) {
|
||||
return file;
|
||||
|
@ -452,7 +452,7 @@ public class TestAllFiles {
|
|||
return file.substring(pos).toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
private static class NullFileHandler implements FileHandler {
|
||||
public static class NullFileHandler implements FileHandler {
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
}
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.stress;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class FileHandlerFactory {
|
||||
// map from patterns for mimetypes to the FileHandlers that should be able to
|
||||
// work with that file
|
||||
// use a Set<Pair> to have a defined order of applying the matches
|
||||
private static final Map<Pattern, FileHandler> MIME_TYPES = new HashMap<>();
|
||||
static {
|
||||
////////////////// Word
|
||||
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.document.macroenabled.12"), new XWPFFileHandler());
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.template.macroenabled.12"), new XWPFFileHandler());
|
||||
|
||||
// application/msword
|
||||
MIME_TYPES.put(Pattern.compile(".*msword.*"), new HWPFFileHandler());
|
||||
// application/vnd.ms-word
|
||||
MIME_TYPES.put(Pattern.compile(".*ms-word.*"), new HWPFFileHandler());
|
||||
|
||||
// application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
||||
MIME_TYPES.put(Pattern.compile(".*wordprocessingml.*"), new XWPFFileHandler());
|
||||
|
||||
////////////////// Excel
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.addin.macroEnabled.12"), new XSSFFileHandler());
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.sheet.binary.macroEnabled.12"), new XSSFFileHandler());
|
||||
|
||||
// application/msexcel
|
||||
MIME_TYPES.put(Pattern.compile(".*msexcel.*"), new HSSFFileHandler());
|
||||
// application/vnd.ms-excel
|
||||
MIME_TYPES.put(Pattern.compile(".*ms-excel.*"), new HSSFFileHandler());
|
||||
|
||||
// application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
||||
MIME_TYPES.put(Pattern.compile(".*spreadsheetml.*"), new XSSFFileHandler());
|
||||
|
||||
////////////////// Powerpoint
|
||||
|
||||
// application/vnd.ms-powerpoint
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint"), new HSLFFileHandler());
|
||||
// application/vnd.ms-officetheme
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-officetheme"), new HSLFFileHandler());
|
||||
|
||||
// application/vnd.openxmlformats-officedocument.presentationml.presentation
|
||||
MIME_TYPES.put(Pattern.compile(".*presentationml.*"), new XSLFFileHandler());
|
||||
// application/vnd.ms-powerpoint.presentation.macroenabled.12
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.presentation.macroenabled.12"), new XSLFFileHandler());
|
||||
// application/vnd.ms-powerpoint.slideshow.macroenabled.12
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.slideshow.macroenabled.12"), new XSLFFileHandler());
|
||||
|
||||
////////////////// Mail/TNEF
|
||||
|
||||
// application/vnd.ms-tnef
|
||||
MIME_TYPES.put(Pattern.compile(".*ms-tnef.*"), new HMEFFileHandler());
|
||||
|
||||
// application/vnd.ms-outlook
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-outlook"), new HSMFFileHandler());
|
||||
|
||||
////////////////// Visio
|
||||
|
||||
// application/vnd.visio
|
||||
MIME_TYPES.put(Pattern.compile("application/vnd.visio.*"), new HDGFFileHandler());
|
||||
|
||||
// application/vnd.ms-visio.drawing
|
||||
MIME_TYPES.put(Pattern.compile(".*vnd.ms-visio\\."), new XDGFFileHandler());
|
||||
|
||||
//application/vnd.ms-visio.viewer
|
||||
MIME_TYPES.put(Pattern.compile(".*visio.*"), new HDGFFileHandler());
|
||||
|
||||
|
||||
////////////////// Publisher
|
||||
|
||||
// application/x-mspublisher
|
||||
MIME_TYPES.put(Pattern.compile("application/x-mspublisher"), new HPBFFileHandler());
|
||||
|
||||
|
||||
////////////////// Others
|
||||
|
||||
// special type used by Tika
|
||||
MIME_TYPES.put(Pattern.compile("application/x-tika-ooxml.*"), new OPCFileHandler());
|
||||
// special type used by Tika
|
||||
MIME_TYPES.put(Pattern.compile("application/x-tika-msoffice.*"), new POIFSFileHandler());
|
||||
|
||||
// application/x-tika-old-excel
|
||||
MIME_TYPES.put(Pattern.compile("application/x-tika-old-excel"), new POIFSFileHandler());
|
||||
|
||||
// application/vnd.openxmlformats-officedocument.drawingml.chart+xml
|
||||
// ?!MIME_TYPES.put(Pattern.compile(".*drawingml.*"), ".dwg");
|
||||
|
||||
// application/vnd.openxmlformats-officedocument.vmlDrawing
|
||||
// ?!MIME_TYPES.put(Pattern.compile(".*vmlDrawing.*"), ".dwg");
|
||||
}
|
||||
|
||||
public static FileHandler getHandler(String mimeType) {
|
||||
for(Map.Entry<Pattern,FileHandler> entry : MIME_TYPES.entrySet()) {
|
||||
if(entry.getKey().matcher(mimeType).matches()) {
|
||||
return entry.getValue();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue