convert tabs to spaces

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1890125 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2021-05-22 22:08:50 +00:00
parent 212a7b9655
commit 0eb475ee3a
14 changed files with 500 additions and 500 deletions

View File

@ -46,7 +46,7 @@ public abstract class AbstractFileHandler implements FileHandler {
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>(); public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>();
static { static {
// password protected files without password // password protected files without password
// ... currently none ... // ... currently none ...
// unsupported file-types, no supported OLE2 parts // unsupported file-types, no supported OLE2 parts
EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat"); EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
@ -75,9 +75,9 @@ public abstract class AbstractFileHandler implements FileHandler {
/* Did fail for some documents with special XML contents... /* Did fail for some documents with special XML contents...
try { try {
OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
"/tmp/pretty-" + file.getName() }); "/tmp/pretty-" + file.getName() });
} catch (ZipException e) { } catch (ZipException e) {
// ignore, not a Zip/OOXML file // ignore, not a Zip/OOXML file
}*/ }*/
} }

View File

@ -38,128 +38,128 @@ import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
* *
*/ */
public class BaseIntegrationTest { public class BaseIntegrationTest {
private final File rootDir; private final File rootDir;
private final String file; private final String file;
private FileHandler handler; private FileHandler handler;
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) { public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
this.rootDir = rootDir; this.rootDir = rootDir;
this.file = file; this.file = file;
this.handler = handler; this.handler = handler;
} }
/** /**
* Keep this public so it can be used by the regression-tests * Keep this public so it can be used by the regression-tests
*/ */
public void test() throws Exception { public void test() throws Exception {
assertNotNull( handler, "Unknown file extension for file: " + file ); assertNotNull( handler, "Unknown file extension for file: " + file );
testOneFile(new File(rootDir, file)); testOneFile(new File(rootDir, file));
} }
protected void testOneFile(File inputFile) throws Exception { protected void testOneFile(File inputFile) throws Exception {
try { try {
handleFile(inputFile); handleFile(inputFile);
} catch (OfficeXmlFileException e) { } catch (OfficeXmlFileException e) {
// switch XWPF and HWPF and so forth depending on the error message // switch XWPF and HWPF and so forth depending on the error message
handleWrongOLE2XMLExtension(inputFile, e); handleWrongOLE2XMLExtension(inputFile, e);
} catch (OldFileFormatException e) { } catch (OldFileFormatException e) {
// Not even text extraction is supported for these: handler.handleExtracting(inputFile); // Not even text extraction is supported for these: handler.handleExtracting(inputFile);
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" ); assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
} catch (EncryptedDocumentException e) { } catch (EncryptedDocumentException e) {
// Do not try to read encrypted files // Do not try to read encrypted files
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
} catch (ZipException e) { } catch (ZipException e) {
// some files are corrupted // some files are corrupted
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) { if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
} }
throw e; throw e;
} catch (IOException e) { } catch (IOException e) {
// ignore some other ways of corrupted files // ignore some other ways of corrupted files
String message = e.getMessage(); String message = e.getMessage();
if(message != null && message.contains("Truncated ZIP file")) { if(message != null && message.contains("Truncated ZIP file")) {
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
} }
// sometimes binary format has XML-format-extension... // sometimes binary format has XML-format-extension...
if(message != null && message.contains("rong file format or file extension for OO XML file")) { if(message != null && message.contains("rong file format or file extension for OO XML file")) {
handleWrongOLE2XMLExtension(inputFile, e); handleWrongOLE2XMLExtension(inputFile, e);
return; return;
} }
throw e; throw e;
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
// ignore errors for documents with incorrect extension // ignore errors for documents with incorrect extension
String message = e.getMessage(); String message = e.getMessage();
if(message != null && (message.equals("The document is really a RTF file") || if(message != null && (message.equals("The document is really a RTF file") ||
message.equals("The document is really a PDF file") || message.equals("The document is really a PDF file") ||
message.equals("The document is really a HTML file"))) { message.equals("The document is really a HTML file"))) {
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
} }
if(message != null && message.equals("The document is really a OOXML file")) { if(message != null && message.equals("The document is really a OOXML file")) {
handleWrongOLE2XMLExtension(inputFile, e); handleWrongOLE2XMLExtension(inputFile, e);
return; return;
} }
throw e; throw e;
} }
try { try {
handler.handleExtracting(inputFile); handler.handleExtracting(inputFile);
} catch (EncryptedDocumentException e) { } catch (EncryptedDocumentException e) {
// Do not try to read encrypted files // Do not try to read encrypted files
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
} }
} }
void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception { void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception {
// we sometimes have wrong extensions, so for some exceptions we try to handle it // we sometimes have wrong extensions, so for some exceptions we try to handle it
// with the correct FileHandler instead // with the correct FileHandler instead
String message = e.getMessage(); String message = e.getMessage();
// ignore some file-types that we do not want to handle here // ignore some file-types that we do not want to handle here
assumeFalse( message != null && (message.equals("The document is really a RTF file") || assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
message.equals("The document is really a PDF file") || message.equals("The document is really a PDF file") ||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
if(message != null && (message.equals("The document is really a XLS file"))) { if(message != null && (message.equals("The document is really a XLS file"))) {
handler = new HSSFFileHandler(); handler = new HSSFFileHandler();
} else if(message != null && (message.equals("The document is really a PPT file"))) { } else if(message != null && (message.equals("The document is really a PPT file"))) {
handler = new HSLFFileHandler(); handler = new HSLFFileHandler();
} else if(message != null && (message.equals("The document is really a DOC file"))) { } else if(message != null && (message.equals("The document is really a DOC file"))) {
handler = new HWPFFileHandler(); handler = new HWPFFileHandler();
} else if(message != null && (message.equals("The document is really a VSD file"))) { } else if(message != null && (message.equals("The document is really a VSD file"))) {
handler = new HDGFFileHandler(); handler = new HDGFFileHandler();
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
} else if (handler instanceof HWPFFileHandler) { } else if (handler instanceof HWPFFileHandler) {
handler = new XWPFFileHandler(); handler = new XWPFFileHandler();
} else if (handler instanceof HSSFFileHandler) { } else if (handler instanceof HSSFFileHandler) {
handler = new XSSFFileHandler(); handler = new XSSFFileHandler();
} else if (handler instanceof HSLFFileHandler) { } else if (handler instanceof HSLFFileHandler) {
handler = new XSLFFileHandler(); handler = new XSLFFileHandler();
// and the other way around, use HWPF instead of XWPF and so forth // and the other way around, use HWPF instead of XWPF and so forth
} else if(handler instanceof XWPFFileHandler) { } else if(handler instanceof XWPFFileHandler) {
handler = new HWPFFileHandler(); handler = new HWPFFileHandler();
} else if(handler instanceof XSSFFileHandler) { } else if(handler instanceof XSSFFileHandler) {
handler = new HSSFFileHandler(); handler = new HSSFFileHandler();
} else if(handler instanceof XSLFFileHandler) { } else if(handler instanceof XSLFFileHandler) {
handler = new HSLFFileHandler(); handler = new HSLFFileHandler();
} else { } else {
// nothing matched => throw the exception to the outside // nothing matched => throw the exception to the outside
throw e; throw e;
} }
// we found a different handler to try processing again // we found a different handler to try processing again
handleFile(inputFile); handleFile(inputFile);
} }
private void handleFile(File inputFile) throws Exception { private void handleFile(File inputFile) throws Exception {
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) { try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
handler.handleFile(newStream, inputFile.getAbsolutePath()); handler.handleFile(newStream, inputFile.getAbsolutePath());
} }
} }
} }

View File

@ -24,28 +24,28 @@ import java.io.InputStream;
* used in the stress testing. * used in the stress testing.
*/ */
public interface FileHandler { public interface FileHandler {
/** /**
* The FileHandler receives a stream ready for reading the * The FileHandler receives a stream ready for reading the
* file and should handle the content that is provided and * file and should handle the content that is provided and
* try to read and interpret the data. * try to read and interpret the data.
* *
* Closing is handled by the framework outside this call. * Closing is handled by the framework outside this call.
* *
* @param stream The input stream to read the file from. * @param stream The input stream to read the file from.
* @param path the relative path to the file * @param path the relative path to the file
* @throws Exception If an error happens in the file-specific handler * @throws Exception If an error happens in the file-specific handler
*/ */
void handleFile(InputStream stream, String path) throws Exception; void handleFile(InputStream stream, String path) throws Exception;
/** /**
* Ensures that extracting text from the given file * Ensures that extracting text from the given file
* is returning some text. * is returning some text.
*/ */
void handleExtracting(File file) throws Exception; void handleExtracting(File file) throws Exception;
/** /**
* Allows to perform some additional work, e.g. run * Allows to perform some additional work, e.g. run
* some of the example applications * some of the example applications
*/ */
void handleAdditional(File file) throws Exception; void handleAdditional(File file) throws Exception;
} }

View File

@ -32,47 +32,47 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
class HDGFFileHandler extends POIFSFileHandler { class HDGFFileHandler extends POIFSFileHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws IOException { public void handleFile(InputStream stream, String path) throws IOException {
POIFSFileSystem poifs = new POIFSFileSystem(stream); POIFSFileSystem poifs = new POIFSFileSystem(stream);
HDGFDiagram diagram = new HDGFDiagram(poifs); HDGFDiagram diagram = new HDGFDiagram(poifs);
Stream[] topLevelStreams = diagram.getTopLevelStreams(); Stream[] topLevelStreams = diagram.getTopLevelStreams();
assertNotNull(topLevelStreams); assertNotNull(topLevelStreams);
for(Stream str : topLevelStreams) { for(Stream str : topLevelStreams) {
assertTrue(str.getPointer().getLength() >= 0); assertTrue(str.getPointer().getLength() >= 0);
} }
TrailerStream trailerStream = diagram.getTrailerStream(); TrailerStream trailerStream = diagram.getTrailerStream();
assertNotNull(trailerStream); assertNotNull(trailerStream);
assertTrue(trailerStream.getPointer().getLength() >= 0); assertTrue(trailerStream.getPointer().getLength() >= 0);
diagram.close(); diagram.close();
poifs.close(); poifs.close();
// writing is not yet implemented... handlePOIDocument(diagram); // writing is not yet implemented... handlePOIDocument(diagram);
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Override @Override
@Test @Test
void test() throws Exception { void test() throws Exception {
File file = new File("test-data/diagram/44501.vsd"); File file = new File("test-data/diagram/44501.vsd");
InputStream stream = new FileInputStream(file); InputStream stream = new FileInputStream(file);
try { try {
handleFile(stream, file.getPath()); handleFile(stream, file.getPath());
} finally { } finally {
stream.close(); stream.close();
} }
handleExtracting(file); handleExtracting(file);
stream = new FileInputStream(file); stream = new FileInputStream(file);
try { try {
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) { try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
assertNotNull(extractor.getText()); assertNotNull(extractor.getText());
} }
} finally { } finally {
stream.close(); stream.close();
} }
} }
} }

View File

@ -34,58 +34,58 @@ import org.junit.jupiter.api.Test;
class HMEFFileHandler extends AbstractFileHandler { class HMEFFileHandler extends AbstractFileHandler {
@Override @Override
public void handleExtracting(File file) throws Exception { public void handleExtracting(File file) throws Exception {
FileMagic fm = FileMagic.valueOf(file); FileMagic fm = FileMagic.valueOf(file);
if (fm == FileMagic.OLE2) { if (fm == FileMagic.OLE2) {
super.handleExtracting(file); super.handleExtracting(file);
} }
} }
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
HMEFMessage msg = new HMEFMessage(stream); HMEFMessage msg = new HMEFMessage(stream);
// there are test-files that have no body... // there are test-files that have no body...
String[] HTML_BODY = { String[] HTML_BODY = {
"Testing TNEF Message", "TNEF test message with attachments", "Test" "Testing TNEF Message", "TNEF test message with attachments", "Test"
}; };
String bodyStr; String bodyStr;
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) { if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML); MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
assertNotNull(bodyHtml); assertNotNull(bodyHtml);
bodyStr = new String(bodyHtml.getData(), getEncoding(msg)); bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
} else { } else {
bodyStr = msg.getBody(); bodyStr = msg.getBody();
} }
assertNotNull( bodyStr, "Body is not set" ); assertNotNull( bodyStr, "Body is not set" );
assertNotNull( msg.getSubject(), "Subject is not set" ); assertNotNull( msg.getSubject(), "Subject is not set" );
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Test @Test
void test() throws Exception { void test() throws Exception {
String path = "test-data/hmef/quick-winmail.dat"; String path = "test-data/hmef/quick-winmail.dat";
try (InputStream stream = new FileInputStream(path)) { try (InputStream stream = new FileInputStream(path)) {
handleFile(stream, path); handleFile(stream, path);
} }
} }
private String getEncoding(HMEFMessage tnefDat) { private String getEncoding(HMEFMessage tnefDat) {
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE); TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID); MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
int codePage = 1252; int codePage = 1252;
if (oemCP != null) { if (oemCP != null) {
codePage = LittleEndian.getInt(oemCP.getData()); codePage = LittleEndian.getInt(oemCP.getData());
} else if (cpId != null) { } else if (cpId != null) {
codePage = LittleEndian.getInt(cpId.getData()); codePage = LittleEndian.getInt(cpId.getData());
} }
switch (codePage) { switch (codePage) {
// see http://en.wikipedia.org/wiki/Code_page for more // see http://en.wikipedia.org/wiki/Code_page for more
case 1252: return "Windows-1252"; case 1252: return "Windows-1252";
case 20127: return "US-ASCII"; case 20127: return "US-ASCII";
default: return "cp"+codePage; default: return "cp"+codePage;
} }
} }
} }

View File

@ -28,40 +28,40 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
class HPBFFileHandler extends POIFSFileHandler { class HPBFFileHandler extends POIFSFileHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream)); HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
assertNotNull(pub.getEscherDelayStm()); assertNotNull(pub.getEscherDelayStm());
assertNotNull(pub.getMainContents()); assertNotNull(pub.getMainContents());
assertNotNull(pub.getQuillContents()); assertNotNull(pub.getQuillContents());
// writing is not yet implemented... handlePOIDocument(pub); // writing is not yet implemented... handlePOIDocument(pub);
pub.close(); pub.close();
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Override @Override
@Test @Test
void test() throws Exception { void test() throws Exception {
File file = new File("test-data/publisher/SampleBrochure.pub"); File file = new File("test-data/publisher/SampleBrochure.pub");
InputStream stream = new FileInputStream(file); InputStream stream = new FileInputStream(file);
try { try {
handleFile(stream, file.getPath()); handleFile(stream, file.getPath());
} finally { } finally {
stream.close(); stream.close();
} }
handleExtracting(file); handleExtracting(file);
stream = new FileInputStream(file); stream = new FileInputStream(file);
try { try {
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) { try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
assertNotNull(extractor.getText()); assertNotNull(extractor.getText());
} }
} finally { } finally {
stream.close(); stream.close();
} }
} }
} }

View File

@ -66,30 +66,30 @@ class HPSFFileHandler extends POIFSFileHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
POIFSFileSystem poifs = new POIFSFileSystem(stream); POIFSFileSystem poifs = new POIFSFileSystem(stream);
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs); HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation(); DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
SummaryInformation si = hpsf.getSummaryInformation(); SummaryInformation si = hpsf.getSummaryInformation();
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME); boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME); boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
assertEquals(hasDSI, dsi != null); assertEquals(hasDSI, dsi != null);
assertEquals(hasSI, si != null); assertEquals(hasSI, si != null);
handlePOIDocument(hpsf); handlePOIDocument(hpsf);
} }
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException { private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
DirectoryNode root = poifs.getRoot(); DirectoryNode root = poifs.getRoot();
if (!root.hasEntry(streamName)) { if (!root.hasEntry(streamName)) {
return false; return false;
} }
try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) { try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) {
return PropertySet.isPropertySetStream(dis); return PropertySet.isPropertySetStream(dis);
} }
} }
private static File getTempFile() { private static File getTempFile() {
File f = null; File f = null;
try { try {
f = TempFile.createTempFile("hpsfCopy", "out"); f = TempFile.createTempFile("hpsfCopy", "out");
@ -112,16 +112,16 @@ class HPSFFileHandler extends POIFSFileHandler {
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Override @Override
@Test @Test
@SuppressWarnings("java:S2699") @SuppressWarnings("java:S2699")
void test() throws Exception { void test() throws Exception {
String path = "test-data/diagram/44501.vsd"; String path = "test-data/diagram/44501.vsd";
try (InputStream stream = new FileInputStream(path)) { try (InputStream stream = new FileInputStream(path)) {
handleFile(stream, path); handleFile(stream, path);
} }
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Test @Test

View File

@ -28,61 +28,61 @@ import org.apache.poi.hsmf.datatypes.DirectoryChunk;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
class HSMFFileHandler extends POIFSFileHandler { class HSMFFileHandler extends POIFSFileHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
MAPIMessage mapi = new MAPIMessage(stream); MAPIMessage mapi = new MAPIMessage(stream);
assertNotNull(mapi.getAttachmentFiles()); assertNotNull(mapi.getAttachmentFiles());
assertNotNull(mapi.getDisplayBCC()); assertNotNull(mapi.getDisplayBCC());
assertNotNull(mapi.getMessageDate()); assertNotNull(mapi.getMessageDate());
AttachmentChunks[] attachments = mapi.getAttachmentFiles(); AttachmentChunks[] attachments = mapi.getAttachmentFiles();
for(AttachmentChunks attachment : attachments) { for(AttachmentChunks attachment : attachments) {
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory(); DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
if(chunkDirectory != null) { if(chunkDirectory != null) {
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage(); MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
assertNotNull(attachmentMSG); assertNotNull(attachmentMSG);
String body = attachmentMSG.getTextBody(); String body = attachmentMSG.getTextBody();
assertNotNull(body); assertNotNull(body);
} }
} }
/* => Writing isn't yet supported... /* => Writing isn't yet supported...
// write out the file // write out the file
File file = TempFile.createTempFile("StressTest", ".msg"); File file = TempFile.createTempFile("StressTest", ".msg");
writeToFile(mapi, file); writeToFile(mapi, file);
MAPIMessage read = new MAPIMessage(file.getAbsolutePath()); MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
assertNotNull(read.getAttachmentFiles()); assertNotNull(read.getAttachmentFiles());
assertNotNull(read.getDisplayBCC()); assertNotNull(read.getDisplayBCC());
assertNotNull(read.getMessageDate()); assertNotNull(read.getMessageDate());
*/ */
// writing is not yet supported... handlePOIDocument(mapi); // writing is not yet supported... handlePOIDocument(mapi);
mapi.close(); mapi.close();
} }
// private void writeToFile(MAPIMessage mapi, File file) // private void writeToFile(MAPIMessage mapi, File file)
// throws FileNotFoundException, IOException { // throws FileNotFoundException, IOException {
// OutputStream stream = new FileOutputStream(file); // OutputStream stream = new FileOutputStream(file);
// try { // try {
// mapi.write(stream); // mapi.write(stream);
// } finally { // } finally {
// stream.close(); // stream.close();
// } // }
// } // }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Override @Override
@Test @Test
void test() throws Exception { void test() throws Exception {
File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg"); File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg");
try (InputStream stream = new FileInputStream(file)) { try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath()); handleFile(stream, file.getPath());
} }
handleExtracting(file); handleExtracting(file);
} }
} }

View File

@ -37,90 +37,90 @@ import org.apache.commons.io.output.NullPrintStream;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
class HSSFFileHandler extends SpreadsheetHandler { class HSSFFileHandler extends SpreadsheetHandler {
private final POIFSFileHandler delegate = new POIFSFileHandler(); private final POIFSFileHandler delegate = new POIFSFileHandler();
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
HSSFWorkbook wb = new HSSFWorkbook(stream); HSSFWorkbook wb = new HSSFWorkbook(stream);
handleWorkbook(wb); handleWorkbook(wb);
// TODO: some documents fail currently... // TODO: some documents fail currently...
// Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating // Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating
// IntersectionPtg. However it is still not capable of parsing it. // IntersectionPtg. However it is still not capable of parsing it.
// So FormulaEvalTestData.xls now contains a few formulas that produce errors here. // So FormulaEvalTestData.xls now contains a few formulas that produce errors here.
//HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb); //HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
//evaluator.evaluateAll(); //evaluator.evaluateAll();
delegate.handlePOIDocument(wb); delegate.handlePOIDocument(wb);
// also try to see if some of the Records behave incorrectly // also try to see if some of the Records behave incorrectly
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb); // TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
HSSFOptimiser.optimiseCellStyles(wb); HSSFOptimiser.optimiseCellStyles(wb);
for(Sheet sheet : wb) { for(Sheet sheet : wb) {
for (Row row : sheet) { for (Row row : sheet) {
for (Cell cell : row) { for (Cell cell : row) {
assertNotNull(cell.getCellStyle()); assertNotNull(cell.getCellStyle());
} }
} }
} }
HSSFOptimiser.optimiseFonts(wb); HSSFOptimiser.optimiseFonts(wb);
} }
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>(); private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
static { static {
// encrypted // encrypted
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
// broken files // broken files
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
// TODO: ok to ignore? // TODO: ok to ignore?
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls"); EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
} }
@Override @Override
public void handleAdditional(File file) throws Exception { public void handleAdditional(File file) throws Exception {
// redirect stdout as the examples often write lots of text // redirect stdout as the examples often write lots of text
PrintStream oldOut = System.out; PrintStream oldOut = System.out;
String fileWithParent = file.getParentFile().getName() + "/" + file.getName(); String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
try { try {
System.setOut(new NullPrintStream()); System.setOut(new NullPrintStream());
BiffViewer.main(new String[]{file.getAbsolutePath()}); BiffViewer.main(new String[]{file.getAbsolutePath()});
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" ); assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
} catch (OldExcelFormatException e) { } catch (OldExcelFormatException e) {
// old excel formats are not supported here // old excel formats are not supported here
} catch (RuntimeException e) { } catch (RuntimeException e) {
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) { if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
throw e; throw e;
} }
} finally { } finally {
System.setOut(oldOut); System.setOut(oldOut);
} }
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Test @Test
void test() throws Exception { void test() throws Exception {
File file = new File("../test-data/spreadsheet/59074.xls"); File file = new File("../test-data/spreadsheet/59074.xls");
try (InputStream stream = new FileInputStream(file)) { try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath()); handleFile(stream, file.getPath());
} }
handleExtracting(file); handleExtracting(file);
handleAdditional(file); handleAdditional(file);
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Test @Test
@SuppressWarnings("java:S2699") @SuppressWarnings("java:S2699")
void testExtractor() throws Exception { void testExtractor() throws Exception {
handleExtracting(new File("../test-data/spreadsheet/59074.xls")); handleExtracting(new File("../test-data/spreadsheet/59074.xls"));
} }

View File

@ -31,7 +31,7 @@ import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
class OPCFileHandler extends AbstractFileHandler { class OPCFileHandler extends AbstractFileHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
// ignore password protected files // ignore password protected files
if (POIXMLDocumentHandler.isEncrypted(stream)) return; if (POIXMLDocumentHandler.isEncrypted(stream)) return;
@ -59,15 +59,15 @@ class OPCFileHandler extends AbstractFileHandler {
// text-extraction is not possible currently for these types of files // text-extraction is not possible currently for these types of files
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Test @Test
void test() throws Exception { void test() throws Exception {
File file = new File("test-data/diagram/test.vsdx"); File file = new File("test-data/diagram/test.vsdx");
try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) { try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) {
handleFile(stream, file.getPath()); handleFile(stream, file.getPath());
} }
handleExtracting(file); handleExtracting(file);
} }
} }

View File

@ -31,15 +31,15 @@ import org.junit.jupiter.api.Test;
class POIFSFileHandler extends AbstractFileHandler { class POIFSFileHandler extends AbstractFileHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
try (POIFSFileSystem fs = new POIFSFileSystem(stream)) { try (POIFSFileSystem fs = new POIFSFileSystem(stream)) {
handlePOIFSFileSystem(fs); handlePOIFSFileSystem(fs);
handleHPSFProperties(fs); handleHPSFProperties(fs);
} }
} }
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException { private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) { try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) {
// can be null // can be null
ext.getDocSummaryInformation(); ext.getDocSummaryInformation();
@ -52,12 +52,12 @@ class POIFSFileHandler extends AbstractFileHandler {
} }
private void handlePOIFSFileSystem(POIFSFileSystem fs) { private void handlePOIFSFileSystem(POIFSFileSystem fs) {
assertNotNull(fs); assertNotNull(fs);
assertNotNull(fs.getRoot()); assertNotNull(fs.getRoot());
} }
protected void handlePOIDocument(POIDocument doc) throws Exception { protected void handlePOIDocument(POIDocument doc) throws Exception {
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) { try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) {
doc.write(out); doc.write(out);
try (InputStream in = out.toInputStream(); try (InputStream in = out.toInputStream();
@ -65,7 +65,7 @@ class POIFSFileHandler extends AbstractFileHandler {
handlePOIFSFileSystem(fs); handlePOIFSFileSystem(fs);
} }
} }
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Test @Test

View File

@ -29,13 +29,13 @@ import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject; import org.apache.xmlbeans.XmlObject;
public final class POIXMLDocumentHandler { public final class POIXMLDocumentHandler {
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception { protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
assertNotNull(doc.getAllEmbeddedParts()); assertNotNull(doc.getAllEmbeddedParts());
assertNotNull(doc.getPackage()); assertNotNull(doc.getPackage());
assertNotNull(doc.getPackagePart()); assertNotNull(doc.getPackagePart());
assertNotNull(doc.getProperties()); assertNotNull(doc.getProperties());
assertNotNull(doc.getRelations()); assertNotNull(doc.getRelations());
} }
protected static boolean isEncrypted(InputStream stream) throws IOException { protected static boolean isEncrypted(InputStream stream) throws IOException {
if (FileMagic.valueOf(stream) == FileMagic.OLE2) { if (FileMagic.valueOf(stream) == FileMagic.OLE2) {

View File

@ -33,68 +33,68 @@ import org.apache.poi.util.RecordFormatException;
import org.apache.poi.xssf.usermodel.XSSFChartSheet; import org.apache.poi.xssf.usermodel.XSSFChartSheet;
public abstract class SpreadsheetHandler extends AbstractFileHandler { public abstract class SpreadsheetHandler extends AbstractFileHandler {
public void handleWorkbook(Workbook wb) throws IOException { public void handleWorkbook(Workbook wb) throws IOException {
// try to access some of the content // try to access some of the content
readContent(wb); readContent(wb);
// write out the file // write out the file
writeToArray(wb); writeToArray(wb);
// access some more content (we had cases where writing corrupts the data in memory) // access some more content (we had cases where writing corrupts the data in memory)
readContent(wb); readContent(wb);
// write once more // write once more
UnsynchronizedByteArrayOutputStream out = writeToArray(wb); UnsynchronizedByteArrayOutputStream out = writeToArray(wb);
// read in the written file // read in the written file
Workbook read = WorkbookFactory.create(out.toInputStream()); Workbook read = WorkbookFactory.create(out.toInputStream());
assertNotNull(read); assertNotNull(read);
readContent(read); readContent(read);
extractEmbedded(read); extractEmbedded(read);
modifyContent(read); modifyContent(read);
read.close(); read.close();
} }
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException { private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream(); UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream();
wb.write(stream); wb.write(stream);
return stream; return stream;
} }
private void readContent(Workbook wb) { private void readContent(Workbook wb) {
for(int i = 0;i < wb.getNumberOfSheets();i++) { for(int i = 0;i < wb.getNumberOfSheets();i++) {
Sheet sheet = wb.getSheetAt(i); Sheet sheet = wb.getSheetAt(i);
assertNotNull(wb.getSheet(sheet.getSheetName())); assertNotNull(wb.getSheet(sheet.getSheetName()));
sheet.groupColumn((short) 4, (short) 5); sheet.groupColumn((short) 4, (short) 5);
sheet.setColumnGroupCollapsed(4, true); sheet.setColumnGroupCollapsed(4, true);
sheet.setColumnGroupCollapsed(4, false); sheet.setColumnGroupCollapsed(4, false);
// don't do this for very large sheets as it will take a long time // don't do this for very large sheets as it will take a long time
if(sheet.getPhysicalNumberOfRows() > 1000) { if(sheet.getPhysicalNumberOfRows() > 1000) {
continue; continue;
} }
for(Row row : sheet) { for(Row row : sheet) {
for(Cell cell : row) { for(Cell cell : row) {
assertNotNull(cell.toString()); assertNotNull(cell.toString());
} }
} }
} }
for (Name name : wb.getAllNames()) { for (Name name : wb.getAllNames()) {
// this sometimes caused exceptions // this sometimes caused exceptions
if(!name.isFunctionName()) { if(!name.isFunctionName()) {
name.getRefersToFormula(); name.getRefersToFormula();
} }
} }
} }
private void extractEmbedded(Workbook wb) throws IOException { private void extractEmbedded(Workbook wb) throws IOException {
EmbeddedExtractor ee = new EmbeddedExtractor(); EmbeddedExtractor ee = new EmbeddedExtractor();
for (Sheet s : wb) { for (Sheet s : wb) {
@ -104,48 +104,48 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
assertNotNull(ed.getShape()); assertNotNull(ed.getShape());
} }
} }
} }
private void modifyContent(Workbook wb) { private void modifyContent(Workbook wb) {
/* a number of file fail because of various things: udf, unimplemented functions, ... /* a number of file fail because of various things: udf, unimplemented functions, ...
we would need quite a list of excludes and the large regression tests would probably we would need quite a list of excludes and the large regression tests would probably
take a lot longer to run... take a lot longer to run...
try { try {
// try to re-compute all formulas to find cases where parsing fails // try to re-compute all formulas to find cases where parsing fails
wb.getCreationHelper().createFormulaEvaluator().evaluateAll(); wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
} catch (RuntimeException e) { } catch (RuntimeException e) {
// only allow a specific exception which indicates that an external // only allow a specific exception which indicates that an external
// reference was not found // reference was not found
if(!e.getMessage().contains("Could not resolve external workbook name")) { if(!e.getMessage().contains("Could not resolve external workbook name")) {
throw e; throw e;
} }
}*/ }*/
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) { for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
if(wb.getSheetAt(i) instanceof XSSFChartSheet) { if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
// clone for chart-sheets is not supported // clone for chart-sheets is not supported
continue; continue;
} }
try { try {
wb.cloneSheet(i); wb.cloneSheet(i);
} catch (RecordFormatException e) { } catch (RecordFormatException e) {
if (e.getCause() instanceof CloneNotSupportedException) { if (e.getCause() instanceof CloneNotSupportedException) {
// ignore me // ignore me
continue; continue;
}
throw e;
} catch (RuntimeException e) {
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
"CountryRecord not found".equals(e.getMessage()) ||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
// ignore these here for now
continue;
} }
throw e; throw e;
} } catch (RuntimeException e) {
} if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
} "CountryRecord not found".equals(e.getMessage()) ||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
// ignore these here for now
continue;
}
throw e;
}
}
}
} }

View File

@ -31,55 +31,55 @@ import org.apache.poi.xslf.usermodel.XSLFSlideShow;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
class XSLFFileHandler extends SlideShowHandler { class XSLFFileHandler extends SlideShowHandler {
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
try (XMLSlideShow slide = new XMLSlideShow(stream); try (XMLSlideShow slide = new XMLSlideShow(stream);
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) { XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
; ;
assertNotNull(slideInner.getPresentation()); assertNotNull(slideInner.getPresentation());
assertNotNull(slideInner.getSlideMasterReferences()); assertNotNull(slideInner.getSlideMasterReferences());
assertNotNull(slideInner.getSlideReferences()); assertNotNull(slideInner.getSlideReferences());
new POIXMLDocumentHandler().handlePOIXMLDocument(slide); new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
handleSlideShow(slide); handleSlideShow(slide);
} catch (POIXMLException e) { } catch (POIXMLException e) {
Exception cause = (Exception)e.getCause(); Exception cause = (Exception)e.getCause();
throw cause == null ? e : cause; throw cause == null ? e : cause;
} }
} }
@Override @Override
public void handleExtracting(File file) throws Exception { public void handleExtracting(File file) throws Exception {
super.handleExtracting(file); super.handleExtracting(file);
// additionally try the other getText() methods // additionally try the other getText() methods
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) { try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
assertNotNull(extractor); assertNotNull(extractor);
extractor.setSlidesByDefault(true); extractor.setSlidesByDefault(true);
extractor.setNotesByDefault(true); extractor.setNotesByDefault(true);
extractor.setMasterByDefault(true); extractor.setMasterByDefault(true);
assertNotNull(extractor.getText()); assertNotNull(extractor.getText());
extractor.setSlidesByDefault(false); extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(false); extractor.setNotesByDefault(false);
extractor.setMasterByDefault(false); extractor.setMasterByDefault(false);
assertEquals("", extractor.getText(), "With all options disabled we should not get text"); assertEquals("", extractor.getText(), "With all options disabled we should not get text");
} }
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Override @Override
@Test @Test
void test() throws Exception { void test() throws Exception {
File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx"); File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx");
try (InputStream stream = new FileInputStream(file)) { try (InputStream stream = new FileInputStream(file)) {
handleFile(stream, file.getPath()); handleFile(stream, file.getPath());
} }
handleExtracting(file); handleExtracting(file);
} }
} }