mirror of https://github.com/apache/poi.git
#63955 - HMEFContentsExtractor fails to extract content from winmail.dat
fixed integration test git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872523 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f7fe4b0d59
commit
11b2c7e898
|
@ -18,35 +18,57 @@ package org.apache.poi.stress;
|
|||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.poi.hmef.HMEFMessage;
|
||||
import org.apache.poi.hmef.attribute.MAPIAttribute;
|
||||
import org.apache.poi.hmef.attribute.MAPIStringAttribute;
|
||||
import org.apache.poi.hmef.attribute.TNEFAttribute;
|
||||
import org.apache.poi.hmef.attribute.TNEFProperty;
|
||||
import org.apache.poi.hsmf.datatypes.MAPIProperty;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.junit.Test;
|
||||
|
||||
public class HMEFFileHandler extends AbstractFileHandler {
|
||||
|
||||
@Override
|
||||
public void handleExtracting(File file) throws Exception {
|
||||
FileMagic fm = FileMagic.valueOf(file);
|
||||
if (fm == FileMagic.OLE2) {
|
||||
super.handleExtracting(file);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
HMEFMessage msg = new HMEFMessage(stream);
|
||||
|
||||
|
||||
// list all properties
|
||||
StringBuilder props = new StringBuilder();
|
||||
for(MAPIAttribute att : msg.getMessageMAPIAttributes()) {
|
||||
props.append(att.getType()).append(": ").append(MAPIStringAttribute.getAsString( att)).append("\n");
|
||||
}
|
||||
|
||||
|
||||
// there are two test-files that have no body...
|
||||
if(!msg.getSubject().equals("Testing TNEF Message") && !msg.getSubject().equals("TNEF test message with attachments")) {
|
||||
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props,
|
||||
msg.getBody());
|
||||
String[] HTML_BODY = {
|
||||
"Testing TNEF Message", "TNEF test message with attachments", "Test"
|
||||
};
|
||||
String bodyStr;
|
||||
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
|
||||
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
|
||||
assertNotNull(bodyHtml);
|
||||
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
|
||||
} else {
|
||||
bodyStr = msg.getBody();
|
||||
}
|
||||
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props,
|
||||
msg.getSubject());
|
||||
assertNotNull("Body is not set", bodyStr);
|
||||
assertNotNull("Subject is not set", msg.getSubject());
|
||||
}
|
||||
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
public void test() throws Exception {
|
||||
|
@ -55,4 +77,22 @@ public class HMEFFileHandler extends AbstractFileHandler {
|
|||
handleFile(stream, path);
|
||||
}
|
||||
}
|
||||
|
||||
private String getEncoding(HMEFMessage tnefDat) {
|
||||
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
|
||||
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
|
||||
int codePage = 1252;
|
||||
if (oemCP != null) {
|
||||
codePage = LittleEndian.getInt(oemCP.getData());
|
||||
} else if (cpId != null) {
|
||||
codePage = LittleEndian.getInt(cpId.getData());
|
||||
}
|
||||
switch (codePage) {
|
||||
// see http://en.wikipedia.org/wiki/Code_page for more
|
||||
case 1252: return "Windows-1252";
|
||||
case 20127: return "US-ASCII";
|
||||
default: return "cp"+codePage;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -66,7 +66,7 @@ import org.apache.xmlbeans.XmlException;
|
|||
/**
|
||||
* Figures out the correct POITextExtractor for your supplied
|
||||
* document, and returns it.
|
||||
*
|
||||
*
|
||||
* <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
|
||||
* not present on the runtime classpath</p>
|
||||
* <p>Note 2 - rather than using this, for most cases you would be better
|
||||
|
@ -75,7 +75,7 @@ import org.apache.xmlbeans.XmlException;
|
|||
@SuppressWarnings("WeakerAccess")
|
||||
public final class ExtractorFactory {
|
||||
private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
|
||||
|
||||
|
||||
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
|
||||
private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
|
||||
private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
|
||||
|
@ -146,7 +146,7 @@ public final class ExtractorFactory {
|
|||
} catch (NotOLE2FileException ne) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
|
||||
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file", ne);
|
||||
} catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
|
@ -158,11 +158,11 @@ public final class ExtractorFactory {
|
|||
InputStream is = FileMagic.prepareToCheckMagic(inp);
|
||||
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
|
||||
|
||||
switch (fm) {
|
||||
case OLE2:
|
||||
POIFSFileSystem fs = new POIFSFileSystem(is);
|
||||
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
|
||||
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
|
||||
return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
|
||||
case OOXML:
|
||||
return createExtractor(OPCPackage.open(is));
|
||||
|
@ -176,8 +176,8 @@ public final class ExtractorFactory {
|
|||
*
|
||||
* @param pkg An {@link OPCPackage}.
|
||||
* @return A {@link POIXMLTextExtractor} for the given file.
|
||||
* @throws IOException If an error occurs while reading the file
|
||||
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
|
||||
* @throws IOException If an error occurs while reading the file
|
||||
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
|
||||
* @throws XmlException If an XML parsing error occurs.
|
||||
* @throws IllegalArgumentException If no matching file type could be found.
|
||||
*/
|
||||
|
@ -186,7 +186,7 @@ public final class ExtractorFactory {
|
|||
// Check for the normal Office core document
|
||||
PackageRelationshipCollection core;
|
||||
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||
|
||||
|
||||
// If nothing was found, try some of the other OOXML-based core types
|
||||
if (core.size() == 0) {
|
||||
// Could it be an OOXML-Strict one?
|
||||
|
@ -198,16 +198,16 @@ public final class ExtractorFactory {
|
|||
if (core.size() == 1)
|
||||
return new XDGFVisioExtractor(pkg);
|
||||
}
|
||||
|
||||
|
||||
// Should just be a single core document, complain if not
|
||||
if (core.size() != 1) {
|
||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||
}
|
||||
|
||||
|
||||
// Grab the core document part, and try to identify from that
|
||||
final PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||
final String contentType = corePart.getContentType();
|
||||
|
||||
|
||||
// Is it XSSF?
|
||||
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
||||
if ( rel.getContentType().equals( contentType ) ) {
|
||||
|
@ -217,22 +217,22 @@ public final class ExtractorFactory {
|
|||
return new XSSFExcelExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Is it XWPF?
|
||||
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
||||
if ( rel.getContentType().equals( contentType ) ) {
|
||||
return new XWPFWordExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Is it XSLF?
|
||||
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||
if ( rel.getContentType().equals( contentType ) ) {
|
||||
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
|
||||
}
|
||||
}
|
||||
|
||||
// special handling for SlideShow-Theme-files,
|
||||
|
||||
// special handling for SlideShow-Theme-files,
|
||||
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
|
||||
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
|
||||
}
|
||||
|
@ -380,14 +380,14 @@ public final class ExtractorFactory {
|
|||
public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
|
||||
throw new IllegalStateException("Not yet supported");
|
||||
}
|
||||
|
||||
|
||||
private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs)
|
||||
throws IOException {
|
||||
String pass = Biff8EncryptionKey.getCurrentUserPassword();
|
||||
if (pass == null) {
|
||||
pass = Decryptor.DEFAULT_PASSWORD;
|
||||
}
|
||||
|
||||
|
||||
EncryptionInfo ei = new EncryptionInfo(fs);
|
||||
Decryptor dec = ei.getDecryptor();
|
||||
InputStream is = null;
|
||||
|
|
Loading…
Reference in New Issue