convert tabs to spaces

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1890125 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2021-05-22 22:08:50 +00:00
parent 212a7b9655
commit 0eb475ee3a
14 changed files with 500 additions and 500 deletions

View File

@ -46,7 +46,7 @@ public abstract class AbstractFileHandler implements FileHandler {
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>();
static {
// password protected files without password
// ... currently none ...
// ... currently none ...
// unsupported file-types, no supported OLE2 parts
EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
@ -75,9 +75,9 @@ public abstract class AbstractFileHandler implements FileHandler {
/* Did fail for some documents with special XML contents...
try {
OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
"/tmp/pretty-" + file.getName() });
"/tmp/pretty-" + file.getName() });
} catch (ZipException e) {
// ignore, not a Zip/OOXML file
// ignore, not a Zip/OOXML file
}*/
}

View File

@ -38,128 +38,128 @@ import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
*
*/
public class BaseIntegrationTest {
private final File rootDir;
private final String file;
private FileHandler handler;
private final File rootDir;
private final String file;
private FileHandler handler;
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
this.rootDir = rootDir;
this.file = file;
this.handler = handler;
}
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
this.rootDir = rootDir;
this.file = file;
this.handler = handler;
}
/**
* Keep this public so it can be used by the regression-tests
*/
public void test() throws Exception {
/**
* Keep this public so it can be used by the regression-tests
*/
public void test() throws Exception {
assertNotNull( handler, "Unknown file extension for file: " + file );
testOneFile(new File(rootDir, file));
}
testOneFile(new File(rootDir, file));
}
protected void testOneFile(File inputFile) throws Exception {
try {
handleFile(inputFile);
} catch (OfficeXmlFileException e) {
// switch XWPF and HWPF and so forth depending on the error message
handleWrongOLE2XMLExtension(inputFile, e);
} catch (OldFileFormatException e) {
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
} catch (EncryptedDocumentException e) {
// Do not try to read encrypted files
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
} catch (ZipException e) {
// some files are corrupted
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
}
protected void testOneFile(File inputFile) throws Exception {
try {
handleFile(inputFile);
} catch (OfficeXmlFileException e) {
// switch XWPF and HWPF and so forth depending on the error message
handleWrongOLE2XMLExtension(inputFile, e);
} catch (OldFileFormatException e) {
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
} catch (EncryptedDocumentException e) {
// Do not try to read encrypted files
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
} catch (ZipException e) {
// some files are corrupted
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
}
throw e;
} catch (IOException e) {
// ignore some other ways of corrupted files
String message = e.getMessage();
if(message != null && message.contains("Truncated ZIP file")) {
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
}
throw e;
} catch (IOException e) {
// ignore some other ways of corrupted files
String message = e.getMessage();
if(message != null && message.contains("Truncated ZIP file")) {
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
}
// sometimes binary format has XML-format-extension...
if(message != null && message.contains("rong file format or file extension for OO XML file")) {
handleWrongOLE2XMLExtension(inputFile, e);
return;
}
// sometimes binary format has XML-format-extension...
if(message != null && message.contains("rong file format or file extension for OO XML file")) {
handleWrongOLE2XMLExtension(inputFile, e);
return;
}
throw e;
} catch (IllegalArgumentException e) {
// ignore errors for documents with incorrect extension
String message = e.getMessage();
if(message != null && (message.equals("The document is really a RTF file") ||
message.equals("The document is really a PDF file") ||
message.equals("The document is really a HTML file"))) {
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
}
throw e;
} catch (IllegalArgumentException e) {
// ignore errors for documents with incorrect extension
String message = e.getMessage();
if(message != null && (message.equals("The document is really a RTF file") ||
message.equals("The document is really a PDF file") ||
message.equals("The document is really a HTML file"))) {
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
}
if(message != null && message.equals("The document is really a OOXML file")) {
handleWrongOLE2XMLExtension(inputFile, e);
return;
}
if(message != null && message.equals("The document is really a OOXML file")) {
handleWrongOLE2XMLExtension(inputFile, e);
return;
}
throw e;
}
throw e;
}
try {
handler.handleExtracting(inputFile);
} catch (EncryptedDocumentException e) {
// Do not try to read encrypted files
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
}
}
try {
handler.handleExtracting(inputFile);
} catch (EncryptedDocumentException e) {
// Do not try to read encrypted files
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
}
}
void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception {
// we sometimes have wrong extensions, so for some exceptions we try to handle it
// with the correct FileHandler instead
String message = e.getMessage();
// we sometimes have wrong extensions, so for some exceptions we try to handle it
// with the correct FileHandler instead
String message = e.getMessage();
// ignore some file-types that we do not want to handle here
assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
message.equals("The document is really a PDF file") ||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
// ignore some file-types that we do not want to handle here
assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
message.equals("The document is really a PDF file") ||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
if(message != null && (message.equals("The document is really a XLS file"))) {
handler = new HSSFFileHandler();
} else if(message != null && (message.equals("The document is really a PPT file"))) {
handler = new HSLFFileHandler();
} else if(message != null && (message.equals("The document is really a DOC file"))) {
handler = new HWPFFileHandler();
} else if(message != null && (message.equals("The document is really a VSD file"))) {
handler = new HDGFFileHandler();
if(message != null && (message.equals("The document is really a XLS file"))) {
handler = new HSSFFileHandler();
} else if(message != null && (message.equals("The document is really a PPT file"))) {
handler = new HSLFFileHandler();
} else if(message != null && (message.equals("The document is really a DOC file"))) {
handler = new HWPFFileHandler();
} else if(message != null && (message.equals("The document is really a VSD file"))) {
handler = new HDGFFileHandler();
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
} else if (handler instanceof HWPFFileHandler) {
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
} else if (handler instanceof HWPFFileHandler) {
handler = new XWPFFileHandler();
} else if (handler instanceof HSSFFileHandler) {
handler = new XSSFFileHandler();
} else if (handler instanceof HSLFFileHandler) {
handler = new XSLFFileHandler();
handler = new XSLFFileHandler();
// and the other way around, use HWPF instead of XWPF and so forth
} else if(handler instanceof XWPFFileHandler) {
handler = new HWPFFileHandler();
} else if(handler instanceof XSSFFileHandler) {
handler = new HSSFFileHandler();
} else if(handler instanceof XSLFFileHandler) {
handler = new HSLFFileHandler();
// and the other way around, use HWPF instead of XWPF and so forth
} else if(handler instanceof XWPFFileHandler) {
handler = new HWPFFileHandler();
} else if(handler instanceof XSSFFileHandler) {
handler = new HSSFFileHandler();
} else if(handler instanceof XSLFFileHandler) {
handler = new HSLFFileHandler();
} else {
// nothing matched => throw the exception to the outside
throw e;
}
// nothing matched => throw the exception to the outside
throw e;
}
// we found a different handler to try processing again
handleFile(inputFile);
}
// we found a different handler to try processing again
handleFile(inputFile);
}
private void handleFile(File inputFile) throws Exception {
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
handler.handleFile(newStream, inputFile.getAbsolutePath());
}
}
private void handleFile(File inputFile) throws Exception {
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
handler.handleFile(newStream, inputFile.getAbsolutePath());
}
}
}

View File

@ -24,28 +24,28 @@ import java.io.InputStream;
* used in the stress testing.
*/
public interface FileHandler {
/**
* The FileHandler receives a stream ready for reading the
* file and should handle the content that is provided and
* try to read and interpret the data.
*
* Closing is handled by the framework outside this call.
*
* @param stream The input stream to read the file from.
* @param path the relative path to the file
* @throws Exception If an error happens in the file-specific handler
*/
void handleFile(InputStream stream, String path) throws Exception;
/**
* The FileHandler receives a stream ready for reading the
* file and should handle the content that is provided and
* try to read and interpret the data.
*
* Closing is handled by the framework outside this call.
*
* @param stream The input stream to read the file from.
* @param path the relative path to the file
* @throws Exception If an error happens in the file-specific handler
*/
void handleFile(InputStream stream, String path) throws Exception;
/**
* Ensures that extracting text from the given file
* is returning some text.
*/
void handleExtracting(File file) throws Exception;
/**
* Ensures that extracting text from the given file
* is returning some text.
*/
void handleExtracting(File file) throws Exception;
/**
* Allows to perform some additional work, e.g. run
* some of the example applications
*/
void handleAdditional(File file) throws Exception;
/**
* Allows to perform some additional work, e.g. run
* some of the example applications
*/
void handleAdditional(File file) throws Exception;
}

View File

@ -32,47 +32,47 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.jupiter.api.Test;
class HDGFFileHandler extends POIFSFileHandler {
@Override
public void handleFile(InputStream stream, String path) throws IOException {
POIFSFileSystem poifs = new POIFSFileSystem(stream);
HDGFDiagram diagram = new HDGFDiagram(poifs);
Stream[] topLevelStreams = diagram.getTopLevelStreams();
assertNotNull(topLevelStreams);
for(Stream str : topLevelStreams) {
assertTrue(str.getPointer().getLength() >= 0);
}
@Override
public void handleFile(InputStream stream, String path) throws IOException {
POIFSFileSystem poifs = new POIFSFileSystem(stream);
HDGFDiagram diagram = new HDGFDiagram(poifs);
Stream[] topLevelStreams = diagram.getTopLevelStreams();
assertNotNull(topLevelStreams);
for(Stream str : topLevelStreams) {
assertTrue(str.getPointer().getLength() >= 0);
}
TrailerStream trailerStream = diagram.getTrailerStream();
assertNotNull(trailerStream);
assertTrue(trailerStream.getPointer().getLength() >= 0);
diagram.close();
poifs.close();
TrailerStream trailerStream = diagram.getTrailerStream();
assertNotNull(trailerStream);
assertTrue(trailerStream.getPointer().getLength() >= 0);
diagram.close();
poifs.close();
// writing is not yet implemented... handlePOIDocument(diagram);
}
// writing is not yet implemented... handlePOIDocument(diagram);
}
// a test-case to test this locally without executing the full TestAllFiles
@Override
// a test-case to test this locally without executing the full TestAllFiles
@Override
@Test
void test() throws Exception {
void test() throws Exception {
File file = new File("test-data/diagram/44501.vsd");
InputStream stream = new FileInputStream(file);
try {
handleFile(stream, file.getPath());
} finally {
stream.close();
}
try {
handleFile(stream, file.getPath());
} finally {
stream.close();
}
handleExtracting(file);
handleExtracting(file);
stream = new FileInputStream(file);
try {
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
assertNotNull(extractor.getText());
}
} finally {
stream.close();
}
}
stream = new FileInputStream(file);
try {
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
assertNotNull(extractor.getText());
}
} finally {
stream.close();
}
}
}

View File

@ -34,58 +34,58 @@ import org.junit.jupiter.api.Test;
class HMEFFileHandler extends AbstractFileHandler {
@Override
public void handleExtracting(File file) throws Exception {
FileMagic fm = FileMagic.valueOf(file);
if (fm == FileMagic.OLE2) {
super.handleExtracting(file);
}
}
@Override
public void handleExtracting(File file) throws Exception {
FileMagic fm = FileMagic.valueOf(file);
if (fm == FileMagic.OLE2) {
super.handleExtracting(file);
}
}
@Override
@Override
public void handleFile(InputStream stream, String path) throws Exception {
HMEFMessage msg = new HMEFMessage(stream);
HMEFMessage msg = new HMEFMessage(stream);
// there are test-files that have no body...
String[] HTML_BODY = {
"Testing TNEF Message", "TNEF test message with attachments", "Test"
};
String bodyStr;
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
assertNotNull(bodyHtml);
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
} else {
bodyStr = msg.getBody();
}
assertNotNull( bodyStr, "Body is not set" );
assertNotNull( msg.getSubject(), "Subject is not set" );
}
// there are test-files that have no body...
String[] HTML_BODY = {
"Testing TNEF Message", "TNEF test message with attachments", "Test"
};
String bodyStr;
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
assertNotNull(bodyHtml);
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
} else {
bodyStr = msg.getBody();
}
assertNotNull( bodyStr, "Body is not set" );
assertNotNull( msg.getSubject(), "Subject is not set" );
}
// a test-case to test this locally without executing the full TestAllFiles
@Test
void test() throws Exception {
String path = "test-data/hmef/quick-winmail.dat";
try (InputStream stream = new FileInputStream(path)) {
handleFile(stream, path);
}
}
// a test-case to test this locally without executing the full TestAllFiles
@Test
void test() throws Exception {
String path = "test-data/hmef/quick-winmail.dat";
try (InputStream stream = new FileInputStream(path)) {
handleFile(stream, path);
}
}
private String getEncoding(HMEFMessage tnefDat) {
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
int codePage = 1252;
if (oemCP != null) {
codePage = LittleEndian.getInt(oemCP.getData());
} else if (cpId != null) {
codePage = LittleEndian.getInt(cpId.getData());
}
switch (codePage) {
// see http://en.wikipedia.org/wiki/Code_page for more
case 1252: return "Windows-1252";
case 20127: return "US-ASCII";
default: return "cp"+codePage;
}
}
private String getEncoding(HMEFMessage tnefDat) {
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
int codePage = 1252;
if (oemCP != null) {
codePage = LittleEndian.getInt(oemCP.getData());
} else if (cpId != null) {
codePage = LittleEndian.getInt(cpId.getData());
}
switch (codePage) {
// see http://en.wikipedia.org/wiki/Code_page for more
case 1252: return "Windows-1252";
case 20127: return "US-ASCII";
default: return "cp"+codePage;
}
}
}

View File

@ -28,40 +28,40 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.jupiter.api.Test;
class HPBFFileHandler extends POIFSFileHandler {
@Override
public void handleFile(InputStream stream, String path) throws Exception {
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
assertNotNull(pub.getEscherDelayStm());
assertNotNull(pub.getMainContents());
assertNotNull(pub.getQuillContents());
@Override
public void handleFile(InputStream stream, String path) throws Exception {
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
assertNotNull(pub.getEscherDelayStm());
assertNotNull(pub.getMainContents());
assertNotNull(pub.getQuillContents());
// writing is not yet implemented... handlePOIDocument(pub);
pub.close();
}
// writing is not yet implemented... handlePOIDocument(pub);
pub.close();
}
// a test-case to test this locally without executing the full TestAllFiles
@Override
// a test-case to test this locally without executing the full TestAllFiles
@Override
@Test
void test() throws Exception {
void test() throws Exception {
File file = new File("test-data/publisher/SampleBrochure.pub");
InputStream stream = new FileInputStream(file);
try {
handleFile(stream, file.getPath());
} finally {
stream.close();
}
try {
handleFile(stream, file.getPath());
} finally {
stream.close();
}
handleExtracting(file);
handleExtracting(file);
stream = new FileInputStream(file);
try {
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
assertNotNull(extractor.getText());
}
} finally {
stream.close();
}
}
stream = new FileInputStream(file);
try {
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
assertNotNull(extractor.getText());
}
} finally {
stream.close();
}
}
}

View File

@ -66,30 +66,30 @@ class HPSFFileHandler extends POIFSFileHandler {
@Override
public void handleFile(InputStream stream, String path) throws Exception {
POIFSFileSystem poifs = new POIFSFileSystem(stream);
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
SummaryInformation si = hpsf.getSummaryInformation();
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
POIFSFileSystem poifs = new POIFSFileSystem(stream);
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
SummaryInformation si = hpsf.getSummaryInformation();
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
assertEquals(hasDSI, dsi != null);
assertEquals(hasDSI, dsi != null);
assertEquals(hasSI, si != null);
handlePOIDocument(hpsf);
}
handlePOIDocument(hpsf);
}
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
DirectoryNode root = poifs.getRoot();
if (!root.hasEntry(streamName)) {
return false;
}
if (!root.hasEntry(streamName)) {
return false;
}
try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) {
return PropertySet.isPropertySetStream(dis);
}
}
}
private static File getTempFile() {
private static File getTempFile() {
File f = null;
try {
f = TempFile.createTempFile("hpsfCopy", "out");
@ -112,16 +112,16 @@ class HPSFFileHandler extends POIFSFileHandler {
}
// a test-case to test this locally without executing the full TestAllFiles
@Override
// a test-case to test this locally without executing the full TestAllFiles
@Override
@Test
@SuppressWarnings("java:S2699")
void test() throws Exception {
String path = "test-data/diagram/44501.vsd";
void test() throws Exception {
String path = "test-data/diagram/44501.vsd";
try (InputStream stream = new FileInputStream(path)) {
handleFile(stream, path);
}
}
}
// a test-case to test this locally without executing the full TestAllFiles
@Test

View File

@ -28,61 +28,61 @@ import org.apache.poi.hsmf.datatypes.DirectoryChunk;
import org.junit.jupiter.api.Test;
class HSMFFileHandler extends POIFSFileHandler {
@Override
public void handleFile(InputStream stream, String path) throws Exception {
MAPIMessage mapi = new MAPIMessage(stream);
assertNotNull(mapi.getAttachmentFiles());
assertNotNull(mapi.getDisplayBCC());
assertNotNull(mapi.getMessageDate());
@Override
public void handleFile(InputStream stream, String path) throws Exception {
MAPIMessage mapi = new MAPIMessage(stream);
assertNotNull(mapi.getAttachmentFiles());
assertNotNull(mapi.getDisplayBCC());
assertNotNull(mapi.getMessageDate());
AttachmentChunks[] attachments = mapi.getAttachmentFiles();
AttachmentChunks[] attachments = mapi.getAttachmentFiles();
for(AttachmentChunks attachment : attachments) {
for(AttachmentChunks attachment : attachments) {
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
if(chunkDirectory != null) {
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
assertNotNull(attachmentMSG);
String body = attachmentMSG.getTextBody();
assertNotNull(body);
}
}
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
if(chunkDirectory != null) {
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
assertNotNull(attachmentMSG);
String body = attachmentMSG.getTextBody();
assertNotNull(body);
}
}
/* => Writing isn't yet supported...
// write out the file
File file = TempFile.createTempFile("StressTest", ".msg");
writeToFile(mapi, file);
/* => Writing isn't yet supported...
// write out the file
File file = TempFile.createTempFile("StressTest", ".msg");
writeToFile(mapi, file);
MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
assertNotNull(read.getAttachmentFiles());
assertNotNull(read.getDisplayBCC());
assertNotNull(read.getMessageDate());
*/
MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
assertNotNull(read.getAttachmentFiles());
assertNotNull(read.getDisplayBCC());
assertNotNull(read.getMessageDate());
*/
// writing is not yet supported... handlePOIDocument(mapi);
// writing is not yet supported... handlePOIDocument(mapi);
mapi.close();
}
mapi.close();
}
// private void writeToFile(MAPIMessage mapi, File file)
// throws FileNotFoundException, IOException {
// OutputStream stream = new FileOutputStream(file);
// try {
// mapi.write(stream);
// } finally {
// stream.close();
// }
// }
// private void writeToFile(MAPIMessage mapi, File file)
// throws FileNotFoundException, IOException {
// OutputStream stream = new FileOutputStream(file);
// try {
// mapi.write(stream);
// } finally {
// stream.close();
// }
// }
// a test-case to test this locally without executing the full TestAllFiles
@Override
// a test-case to test this locally without executing the full TestAllFiles
@Override
@Test
void test() throws Exception {
void test() throws Exception {
File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg");
try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath());
}
try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath());
}
handleExtracting(file);
}
handleExtracting(file);
}
}

View File

@ -37,90 +37,90 @@ import org.apache.commons.io.output.NullPrintStream;
import org.junit.jupiter.api.Test;
class HSSFFileHandler extends SpreadsheetHandler {
private final POIFSFileHandler delegate = new POIFSFileHandler();
@Override
private final POIFSFileHandler delegate = new POIFSFileHandler();
@Override
public void handleFile(InputStream stream, String path) throws Exception {
HSSFWorkbook wb = new HSSFWorkbook(stream);
handleWorkbook(wb);
HSSFWorkbook wb = new HSSFWorkbook(stream);
handleWorkbook(wb);
// TODO: some documents fail currently...
// TODO: some documents fail currently...
// Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating
// IntersectionPtg. However it is still not capable of parsing it.
// So FormulaEvalTestData.xls now contains a few formulas that produce errors here.
//HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
//evaluator.evaluateAll();
delegate.handlePOIDocument(wb);
delegate.handlePOIDocument(wb);
// also try to see if some of the Records behave incorrectly
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
// also try to see if some of the Records behave incorrectly
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
HSSFOptimiser.optimiseCellStyles(wb);
for(Sheet sheet : wb) {
for (Row row : sheet) {
for (Cell cell : row) {
assertNotNull(cell.getCellStyle());
}
}
}
HSSFOptimiser.optimiseCellStyles(wb);
for(Sheet sheet : wb) {
for (Row row : sheet) {
for (Cell cell : row) {
assertNotNull(cell.getCellStyle());
}
}
}
HSSFOptimiser.optimiseFonts(wb);
}
HSSFOptimiser.optimiseFonts(wb);
}
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
static {
// encrypted
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
// broken files
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
// TODO: ok to ignore?
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
}
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
static {
// encrypted
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
// broken files
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
// TODO: ok to ignore?
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
}
@Override
public void handleAdditional(File file) throws Exception {
// redirect stdout as the examples often write lots of text
PrintStream oldOut = System.out;
String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
try {
System.setOut(new NullPrintStream());
@Override
public void handleAdditional(File file) throws Exception {
// redirect stdout as the examples often write lots of text
PrintStream oldOut = System.out;
String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
try {
System.setOut(new NullPrintStream());
BiffViewer.main(new String[]{file.getAbsolutePath()});
BiffViewer.main(new String[]{file.getAbsolutePath()});
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
} catch (OldExcelFormatException e) {
// old excel formats are not supported here
} catch (RuntimeException e) {
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
throw e;
}
} finally {
System.setOut(oldOut);
}
}
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
} catch (OldExcelFormatException e) {
// old excel formats are not supported here
} catch (RuntimeException e) {
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
throw e;
}
} finally {
System.setOut(oldOut);
}
}
// a test-case to test this locally without executing the full TestAllFiles
@Test
void test() throws Exception {
// a test-case to test this locally without executing the full TestAllFiles
@Test
void test() throws Exception {
File file = new File("../test-data/spreadsheet/59074.xls");
try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath());
}
try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath());
}
handleExtracting(file);
handleExtracting(file);
handleAdditional(file);
}
handleAdditional(file);
}
// a test-case to test this locally without executing the full TestAllFiles
// a test-case to test this locally without executing the full TestAllFiles
@Test
@SuppressWarnings("java:S2699")
@SuppressWarnings("java:S2699")
void testExtractor() throws Exception {
handleExtracting(new File("../test-data/spreadsheet/59074.xls"));
}

View File

@ -31,7 +31,7 @@ import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.junit.jupiter.api.Test;
class OPCFileHandler extends AbstractFileHandler {
@Override
@Override
public void handleFile(InputStream stream, String path) throws Exception {
// ignore password protected files
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
@ -59,15 +59,15 @@ class OPCFileHandler extends AbstractFileHandler {
// text-extraction is not possible currently for these types of files
}
// a test-case to test this locally without executing the full TestAllFiles
@Test
void test() throws Exception {
// a test-case to test this locally without executing the full TestAllFiles
@Test
void test() throws Exception {
File file = new File("test-data/diagram/test.vsdx");
try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) {
handleFile(stream, file.getPath());
}
handleExtracting(file);
}
handleExtracting(file);
}
}

View File

@ -31,15 +31,15 @@ import org.junit.jupiter.api.Test;
class POIFSFileHandler extends AbstractFileHandler {
@Override
@Override
public void handleFile(InputStream stream, String path) throws Exception {
try (POIFSFileSystem fs = new POIFSFileSystem(stream)) {
handlePOIFSFileSystem(fs);
handleHPSFProperties(fs);
}
}
}
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) {
// can be null
ext.getDocSummaryInformation();
@ -52,12 +52,12 @@ class POIFSFileHandler extends AbstractFileHandler {
}
private void handlePOIFSFileSystem(POIFSFileSystem fs) {
assertNotNull(fs);
assertNotNull(fs.getRoot());
}
assertNotNull(fs);
assertNotNull(fs.getRoot());
}
protected void handlePOIDocument(POIDocument doc) throws Exception {
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) {
protected void handlePOIDocument(POIDocument doc) throws Exception {
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) {
doc.write(out);
try (InputStream in = out.toInputStream();
@ -65,7 +65,7 @@ class POIFSFileHandler extends AbstractFileHandler {
handlePOIFSFileSystem(fs);
}
}
}
}
// a test-case to test this locally without executing the full TestAllFiles
@Test

View File

@ -29,13 +29,13 @@ import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
public final class POIXMLDocumentHandler {
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
assertNotNull(doc.getAllEmbeddedParts());
assertNotNull(doc.getPackage());
assertNotNull(doc.getPackagePart());
assertNotNull(doc.getProperties());
assertNotNull(doc.getRelations());
}
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
assertNotNull(doc.getAllEmbeddedParts());
assertNotNull(doc.getPackage());
assertNotNull(doc.getPackagePart());
assertNotNull(doc.getProperties());
assertNotNull(doc.getRelations());
}
protected static boolean isEncrypted(InputStream stream) throws IOException {
if (FileMagic.valueOf(stream) == FileMagic.OLE2) {

View File

@ -33,68 +33,68 @@ import org.apache.poi.util.RecordFormatException;
import org.apache.poi.xssf.usermodel.XSSFChartSheet;
public abstract class SpreadsheetHandler extends AbstractFileHandler {
public void handleWorkbook(Workbook wb) throws IOException {
// try to access some of the content
readContent(wb);
public void handleWorkbook(Workbook wb) throws IOException {
// try to access some of the content
readContent(wb);
// write out the file
writeToArray(wb);
// write out the file
writeToArray(wb);
// access some more content (we had cases where writing corrupts the data in memory)
readContent(wb);
// access some more content (we had cases where writing corrupts the data in memory)
readContent(wb);
// write once more
UnsynchronizedByteArrayOutputStream out = writeToArray(wb);
// write once more
UnsynchronizedByteArrayOutputStream out = writeToArray(wb);
// read in the written file
Workbook read = WorkbookFactory.create(out.toInputStream());
// read in the written file
Workbook read = WorkbookFactory.create(out.toInputStream());
assertNotNull(read);
assertNotNull(read);
readContent(read);
readContent(read);
extractEmbedded(read);
extractEmbedded(read);
modifyContent(read);
modifyContent(read);
read.close();
}
read.close();
}
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream();
wb.write(stream);
return stream;
}
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream();
wb.write(stream);
return stream;
}
private void readContent(Workbook wb) {
for(int i = 0;i < wb.getNumberOfSheets();i++) {
Sheet sheet = wb.getSheetAt(i);
assertNotNull(wb.getSheet(sheet.getSheetName()));
sheet.groupColumn((short) 4, (short) 5);
sheet.setColumnGroupCollapsed(4, true);
sheet.setColumnGroupCollapsed(4, false);
private void readContent(Workbook wb) {
for(int i = 0;i < wb.getNumberOfSheets();i++) {
Sheet sheet = wb.getSheetAt(i);
assertNotNull(wb.getSheet(sheet.getSheetName()));
sheet.groupColumn((short) 4, (short) 5);
sheet.setColumnGroupCollapsed(4, true);
sheet.setColumnGroupCollapsed(4, false);
// don't do this for very large sheets as it will take a long time
if(sheet.getPhysicalNumberOfRows() > 1000) {
continue;
}
// don't do this for very large sheets as it will take a long time
if(sheet.getPhysicalNumberOfRows() > 1000) {
continue;
}
for(Row row : sheet) {
for(Cell cell : row) {
assertNotNull(cell.toString());
}
}
}
for(Row row : sheet) {
for(Cell cell : row) {
assertNotNull(cell.toString());
}
}
}
for (Name name : wb.getAllNames()) {
// this sometimes caused exceptions
for (Name name : wb.getAllNames()) {
// this sometimes caused exceptions
if(!name.isFunctionName()) {
name.getRefersToFormula();
}
}
}
}
}
private void extractEmbedded(Workbook wb) throws IOException {
private void extractEmbedded(Workbook wb) throws IOException {
EmbeddedExtractor ee = new EmbeddedExtractor();
for (Sheet s : wb) {
@ -104,48 +104,48 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
assertNotNull(ed.getShape());
}
}
}
}
private void modifyContent(Workbook wb) {
/* a number of file fail because of various things: udf, unimplemented functions, ...
we would need quite a list of excludes and the large regression tests would probably
take a lot longer to run...
try {
// try to re-compute all formulas to find cases where parsing fails
wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
} catch (RuntimeException e) {
// only allow a specific exception which indicates that an external
// reference was not found
if(!e.getMessage().contains("Could not resolve external workbook name")) {
throw e;
}
private void modifyContent(Workbook wb) {
/* a number of file fail because of various things: udf, unimplemented functions, ...
we would need quite a list of excludes and the large regression tests would probably
take a lot longer to run...
try {
// try to re-compute all formulas to find cases where parsing fails
wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
} catch (RuntimeException e) {
// only allow a specific exception which indicates that an external
// reference was not found
if(!e.getMessage().contains("Could not resolve external workbook name")) {
throw e;
}
}*/
}*/
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
// clone for chart-sheets is not supported
continue;
}
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
// clone for chart-sheets is not supported
continue;
}
try {
wb.cloneSheet(i);
} catch (RecordFormatException e) {
if (e.getCause() instanceof CloneNotSupportedException) {
// ignore me
continue;
}
throw e;
} catch (RuntimeException e) {
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
"CountryRecord not found".equals(e.getMessage()) ||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
// ignore these here for now
continue;
try {
wb.cloneSheet(i);
} catch (RecordFormatException e) {
if (e.getCause() instanceof CloneNotSupportedException) {
// ignore me
continue;
}
throw e;
}
}
}
throw e;
} catch (RuntimeException e) {
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
"CountryRecord not found".equals(e.getMessage()) ||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
// ignore these here for now
continue;
}
throw e;
}
}
}
}

View File

@ -31,55 +31,55 @@ import org.apache.poi.xslf.usermodel.XSLFSlideShow;
import org.junit.jupiter.api.Test;
class XSLFFileHandler extends SlideShowHandler {
@Override
@Override
public void handleFile(InputStream stream, String path) throws Exception {
try (XMLSlideShow slide = new XMLSlideShow(stream);
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
;
assertNotNull(slideInner.getPresentation());
assertNotNull(slideInner.getSlideMasterReferences());
assertNotNull(slideInner.getSlideReferences());
try (XMLSlideShow slide = new XMLSlideShow(stream);
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
;
assertNotNull(slideInner.getPresentation());
assertNotNull(slideInner.getSlideMasterReferences());
assertNotNull(slideInner.getSlideReferences());
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
handleSlideShow(slide);
} catch (POIXMLException e) {
Exception cause = (Exception)e.getCause();
throw cause == null ? e : cause;
}
}
handleSlideShow(slide);
} catch (POIXMLException e) {
Exception cause = (Exception)e.getCause();
throw cause == null ? e : cause;
}
}
@Override
@Override
public void handleExtracting(File file) throws Exception {
super.handleExtracting(file);
// additionally try the other getText() methods
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
assertNotNull(extractor);
extractor.setSlidesByDefault(true);
extractor.setNotesByDefault(true);
extractor.setMasterByDefault(true);
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
assertNotNull(extractor);
extractor.setSlidesByDefault(true);
extractor.setNotesByDefault(true);
extractor.setMasterByDefault(true);
assertNotNull(extractor.getText());
assertNotNull(extractor.getText());
extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(false);
extractor.setMasterByDefault(false);
extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(false);
extractor.setMasterByDefault(false);
assertEquals("", extractor.getText(), "With all options disabled we should not get text");
}
assertEquals("", extractor.getText(), "With all options disabled we should not get text");
}
}
// a test-case to test this locally without executing the full TestAllFiles
@Override
@Override
@Test
void test() throws Exception {
void test() throws Exception {
File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx");
try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath());
}
try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath());
}
handleExtracting(file);
}
handleExtracting(file);
}
}