#63955 - HMEFContentsExtractor fails to extract content from winmail.dat

fixed integration test

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872523 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2020-01-08 23:49:31 +00:00
parent f7fe4b0d59
commit 11b2c7e898
2 changed files with 65 additions and 25 deletions

View File

@ -18,35 +18,57 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Arrays;
import org.apache.poi.hmef.HMEFMessage;
import org.apache.poi.hmef.attribute.MAPIAttribute;
import org.apache.poi.hmef.attribute.MAPIStringAttribute;
import org.apache.poi.hmef.attribute.TNEFAttribute;
import org.apache.poi.hmef.attribute.TNEFProperty;
import org.apache.poi.hsmf.datatypes.MAPIProperty;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.util.LittleEndian;
import org.junit.Test;
public class HMEFFileHandler extends AbstractFileHandler {
@Override
public void handleExtracting(File file) throws Exception {
FileMagic fm = FileMagic.valueOf(file);
if (fm == FileMagic.OLE2) {
super.handleExtracting(file);
}
}
@Override
public void handleFile(InputStream stream, String path) throws Exception {
HMEFMessage msg = new HMEFMessage(stream);
// list all properties
StringBuilder props = new StringBuilder();
for(MAPIAttribute att : msg.getMessageMAPIAttributes()) {
props.append(att.getType()).append(": ").append(MAPIStringAttribute.getAsString( att)).append("\n");
}
// there are two test-files that have no body...
if(!msg.getSubject().equals("Testing TNEF Message") && !msg.getSubject().equals("TNEF test message with attachments")) {
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props,
msg.getBody());
String[] HTML_BODY = {
"Testing TNEF Message", "TNEF test message with attachments", "Test"
};
String bodyStr;
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
assertNotNull(bodyHtml);
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
} else {
bodyStr = msg.getBody();
}
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props,
msg.getSubject());
assertNotNull("Body is not set", bodyStr);
assertNotNull("Subject is not set", msg.getSubject());
}
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void test() throws Exception {
@ -55,4 +77,22 @@ public class HMEFFileHandler extends AbstractFileHandler {
handleFile(stream, path);
}
}
private String getEncoding(HMEFMessage tnefDat) {
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
int codePage = 1252;
if (oemCP != null) {
codePage = LittleEndian.getInt(oemCP.getData());
} else if (cpId != null) {
codePage = LittleEndian.getInt(cpId.getData());
}
switch (codePage) {
// see http://en.wikipedia.org/wiki/Code_page for more
case 1252: return "Windows-1252";
case 20127: return "US-ASCII";
default: return "cp"+codePage;
}
}
}

View File

@ -66,7 +66,7 @@ import org.apache.xmlbeans.XmlException;
/**
* Figures out the correct POITextExtractor for your supplied
* document, and returns it.
*
*
* <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
* not present on the runtime classpath</p>
* <p>Note 2 - rather than using this, for most cases you would be better
@ -75,7 +75,7 @@ import org.apache.xmlbeans.XmlException;
@SuppressWarnings("WeakerAccess")
public final class ExtractorFactory {
private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
@ -146,7 +146,7 @@ public final class ExtractorFactory {
} catch (NotOLE2FileException ne) {
// ensure file-handle release
IOUtils.closeQuietly(fs);
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file", ne);
} catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR
// ensure file-handle release
IOUtils.closeQuietly(fs);
@ -158,11 +158,11 @@ public final class ExtractorFactory {
InputStream is = FileMagic.prepareToCheckMagic(inp);
FileMagic fm = FileMagic.valueOf(is);
switch (fm) {
case OLE2:
POIFSFileSystem fs = new POIFSFileSystem(is);
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
case OOXML:
return createExtractor(OPCPackage.open(is));
@ -176,8 +176,8 @@ public final class ExtractorFactory {
*
* @param pkg An {@link OPCPackage}.
* @return A {@link POIXMLTextExtractor} for the given file.
* @throws IOException If an error occurs while reading the file
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
* @throws IOException If an error occurs while reading the file
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
* @throws XmlException If an XML parsing error occurs.
* @throws IllegalArgumentException If no matching file type could be found.
*/
@ -186,7 +186,7 @@ public final class ExtractorFactory {
// Check for the normal Office core document
PackageRelationshipCollection core;
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
// If nothing was found, try some of the other OOXML-based core types
if (core.size() == 0) {
// Could it be an OOXML-Strict one?
@ -198,16 +198,16 @@ public final class ExtractorFactory {
if (core.size() == 1)
return new XDGFVisioExtractor(pkg);
}
// Should just be a single core document, complain if not
if (core.size() != 1) {
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
}
// Grab the core document part, and try to identify from that
final PackagePart corePart = pkg.getPart(core.getRelationship(0));
final String contentType = corePart.getContentType();
// Is it XSSF?
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) {
@ -217,22 +217,22 @@ public final class ExtractorFactory {
return new XSSFExcelExtractor(pkg);
}
}
// Is it XWPF?
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) {
return new XWPFWordExtractor(pkg);
}
}
// Is it XSLF?
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) {
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
}
}
// special handling for SlideShow-Theme-files,
// special handling for SlideShow-Theme-files,
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
}
@ -380,14 +380,14 @@ public final class ExtractorFactory {
public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
throw new IllegalStateException("Not yet supported");
}
private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs)
throws IOException {
String pass = Biff8EncryptionKey.getCurrentUserPassword();
if (pass == null) {
pass = Decryptor.DEFAULT_PASSWORD;
}
EncryptionInfo ei = new EncryptionInfo(fs);
Decryptor dec = ei.getDecryptor();
InputStream is = null;