#63955 - HMEFContentsExtractor fails to extract content from winmail.dat

fixed integration test

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872523 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2020-01-08 23:49:31 +00:00
parent f7fe4b0d59
commit 11b2c7e898
2 changed files with 65 additions and 25 deletions

View File

@ -18,35 +18,57 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.util.Arrays;
import org.apache.poi.hmef.HMEFMessage; import org.apache.poi.hmef.HMEFMessage;
import org.apache.poi.hmef.attribute.MAPIAttribute; import org.apache.poi.hmef.attribute.MAPIAttribute;
import org.apache.poi.hmef.attribute.MAPIStringAttribute; import org.apache.poi.hmef.attribute.MAPIStringAttribute;
import org.apache.poi.hmef.attribute.TNEFAttribute;
import org.apache.poi.hmef.attribute.TNEFProperty;
import org.apache.poi.hsmf.datatypes.MAPIProperty;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.util.LittleEndian;
import org.junit.Test; import org.junit.Test;
public class HMEFFileHandler extends AbstractFileHandler { public class HMEFFileHandler extends AbstractFileHandler {
@Override
public void handleExtracting(File file) throws Exception {
FileMagic fm = FileMagic.valueOf(file);
if (fm == FileMagic.OLE2) {
super.handleExtracting(file);
}
}
@Override @Override
public void handleFile(InputStream stream, String path) throws Exception { public void handleFile(InputStream stream, String path) throws Exception {
HMEFMessage msg = new HMEFMessage(stream); HMEFMessage msg = new HMEFMessage(stream);
// list all properties // list all properties
StringBuilder props = new StringBuilder(); StringBuilder props = new StringBuilder();
for(MAPIAttribute att : msg.getMessageMAPIAttributes()) { for(MAPIAttribute att : msg.getMessageMAPIAttributes()) {
props.append(att.getType()).append(": ").append(MAPIStringAttribute.getAsString( att)).append("\n"); props.append(att.getType()).append(": ").append(MAPIStringAttribute.getAsString( att)).append("\n");
} }
// there are two test-files that have no body... // there are two test-files that have no body...
if(!msg.getSubject().equals("Testing TNEF Message") && !msg.getSubject().equals("TNEF test message with attachments")) { String[] HTML_BODY = {
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props, "Testing TNEF Message", "TNEF test message with attachments", "Test"
msg.getBody()); };
String bodyStr;
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
assertNotNull(bodyHtml);
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
} else {
bodyStr = msg.getBody();
} }
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props, assertNotNull("Body is not set", bodyStr);
msg.getSubject()); assertNotNull("Subject is not set", msg.getSubject());
} }
// a test-case to test this locally without executing the full TestAllFiles // a test-case to test this locally without executing the full TestAllFiles
@Test @Test
public void test() throws Exception { public void test() throws Exception {
@ -55,4 +77,22 @@ public class HMEFFileHandler extends AbstractFileHandler {
handleFile(stream, path); handleFile(stream, path);
} }
} }
private String getEncoding(HMEFMessage tnefDat) {
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
int codePage = 1252;
if (oemCP != null) {
codePage = LittleEndian.getInt(oemCP.getData());
} else if (cpId != null) {
codePage = LittleEndian.getInt(cpId.getData());
}
switch (codePage) {
// see http://en.wikipedia.org/wiki/Code_page for more
case 1252: return "Windows-1252";
case 20127: return "US-ASCII";
default: return "cp"+codePage;
}
}
} }

View File

@ -66,7 +66,7 @@ import org.apache.xmlbeans.XmlException;
/** /**
* Figures out the correct POITextExtractor for your supplied * Figures out the correct POITextExtractor for your supplied
* document, and returns it. * document, and returns it.
* *
* <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is * <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
* not present on the runtime classpath</p> * not present on the runtime classpath</p>
* <p>Note 2 - rather than using this, for most cases you would be better * <p>Note 2 - rather than using this, for most cases you would be better
@ -75,7 +75,7 @@ import org.apache.xmlbeans.XmlException;
@SuppressWarnings("WeakerAccess") @SuppressWarnings("WeakerAccess")
public final class ExtractorFactory { public final class ExtractorFactory {
private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class); private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT; public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT; private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT; private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
@ -146,7 +146,7 @@ public final class ExtractorFactory {
} catch (NotOLE2FileException ne) { } catch (NotOLE2FileException ne) {
// ensure file-handle release // ensure file-handle release
IOUtils.closeQuietly(fs); IOUtils.closeQuietly(fs);
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file", ne);
} catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR
// ensure file-handle release // ensure file-handle release
IOUtils.closeQuietly(fs); IOUtils.closeQuietly(fs);
@ -158,11 +158,11 @@ public final class ExtractorFactory {
InputStream is = FileMagic.prepareToCheckMagic(inp); InputStream is = FileMagic.prepareToCheckMagic(inp);
FileMagic fm = FileMagic.valueOf(is); FileMagic fm = FileMagic.valueOf(is);
switch (fm) { switch (fm) {
case OLE2: case OLE2:
POIFSFileSystem fs = new POIFSFileSystem(is); POIFSFileSystem fs = new POIFSFileSystem(is);
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs); return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
case OOXML: case OOXML:
return createExtractor(OPCPackage.open(is)); return createExtractor(OPCPackage.open(is));
@ -176,8 +176,8 @@ public final class ExtractorFactory {
* *
* @param pkg An {@link OPCPackage}. * @param pkg An {@link OPCPackage}.
* @return A {@link POIXMLTextExtractor} for the given file. * @return A {@link POIXMLTextExtractor} for the given file.
* @throws IOException If an error occurs while reading the file * @throws IOException If an error occurs while reading the file
* @throws OpenXML4JException If an error parsing the OpenXML file format is found. * @throws OpenXML4JException If an error parsing the OpenXML file format is found.
* @throws XmlException If an XML parsing error occurs. * @throws XmlException If an XML parsing error occurs.
* @throws IllegalArgumentException If no matching file type could be found. * @throws IllegalArgumentException If no matching file type could be found.
*/ */
@ -186,7 +186,7 @@ public final class ExtractorFactory {
// Check for the normal Office core document // Check for the normal Office core document
PackageRelationshipCollection core; PackageRelationshipCollection core;
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL); core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
// If nothing was found, try some of the other OOXML-based core types // If nothing was found, try some of the other OOXML-based core types
if (core.size() == 0) { if (core.size() == 0) {
// Could it be an OOXML-Strict one? // Could it be an OOXML-Strict one?
@ -198,16 +198,16 @@ public final class ExtractorFactory {
if (core.size() == 1) if (core.size() == 1)
return new XDGFVisioExtractor(pkg); return new XDGFVisioExtractor(pkg);
} }
// Should just be a single core document, complain if not // Should just be a single core document, complain if not
if (core.size() != 1) { if (core.size() != 1) {
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
} }
// Grab the core document part, and try to identify from that // Grab the core document part, and try to identify from that
final PackagePart corePart = pkg.getPart(core.getRelationship(0)); final PackagePart corePart = pkg.getPart(core.getRelationship(0));
final String contentType = corePart.getContentType(); final String contentType = corePart.getContentType();
// Is it XSSF? // Is it XSSF?
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) { if ( rel.getContentType().equals( contentType ) ) {
@ -217,22 +217,22 @@ public final class ExtractorFactory {
return new XSSFExcelExtractor(pkg); return new XSSFExcelExtractor(pkg);
} }
} }
// Is it XWPF? // Is it XWPF?
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) { for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) { if ( rel.getContentType().equals( contentType ) ) {
return new XWPFWordExtractor(pkg); return new XWPFWordExtractor(pkg);
} }
} }
// Is it XSLF? // Is it XSLF?
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) { if ( rel.getContentType().equals( contentType ) ) {
return new SlideShowExtractor<>(new XMLSlideShow(pkg)); return new SlideShowExtractor<>(new XMLSlideShow(pkg));
} }
} }
// special handling for SlideShow-Theme-files, // special handling for SlideShow-Theme-files,
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) { if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
return new SlideShowExtractor<>(new XMLSlideShow(pkg)); return new SlideShowExtractor<>(new XMLSlideShow(pkg));
} }
@ -380,14 +380,14 @@ public final class ExtractorFactory {
public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) { public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
throw new IllegalStateException("Not yet supported"); throw new IllegalStateException("Not yet supported");
} }
private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs) private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs)
throws IOException { throws IOException {
String pass = Biff8EncryptionKey.getCurrentUserPassword(); String pass = Biff8EncryptionKey.getCurrentUserPassword();
if (pass == null) { if (pass == null) {
pass = Decryptor.DEFAULT_PASSWORD; pass = Decryptor.DEFAULT_PASSWORD;
} }
EncryptionInfo ei = new EncryptionInfo(fs); EncryptionInfo ei = new EncryptionInfo(fs);
Decryptor dec = ei.getDecryptor(); Decryptor dec = ei.getDecryptor();
InputStream is = null; InputStream is = null;