mirror of https://github.com/apache/poi.git
#63955 - HMEFContentsExtractor fails to extract content from winmail.dat
fixed integration test git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872523 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f7fe4b0d59
commit
11b2c7e898
|
@ -18,35 +18,57 @@ package org.apache.poi.stress;
|
||||||
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.poi.hmef.HMEFMessage;
|
import org.apache.poi.hmef.HMEFMessage;
|
||||||
import org.apache.poi.hmef.attribute.MAPIAttribute;
|
import org.apache.poi.hmef.attribute.MAPIAttribute;
|
||||||
import org.apache.poi.hmef.attribute.MAPIStringAttribute;
|
import org.apache.poi.hmef.attribute.MAPIStringAttribute;
|
||||||
|
import org.apache.poi.hmef.attribute.TNEFAttribute;
|
||||||
|
import org.apache.poi.hmef.attribute.TNEFProperty;
|
||||||
|
import org.apache.poi.hsmf.datatypes.MAPIProperty;
|
||||||
|
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class HMEFFileHandler extends AbstractFileHandler {
|
public class HMEFFileHandler extends AbstractFileHandler {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void handleExtracting(File file) throws Exception {
|
||||||
|
FileMagic fm = FileMagic.valueOf(file);
|
||||||
|
if (fm == FileMagic.OLE2) {
|
||||||
|
super.handleExtracting(file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
HMEFMessage msg = new HMEFMessage(stream);
|
HMEFMessage msg = new HMEFMessage(stream);
|
||||||
|
|
||||||
// list all properties
|
// list all properties
|
||||||
StringBuilder props = new StringBuilder();
|
StringBuilder props = new StringBuilder();
|
||||||
for(MAPIAttribute att : msg.getMessageMAPIAttributes()) {
|
for(MAPIAttribute att : msg.getMessageMAPIAttributes()) {
|
||||||
props.append(att.getType()).append(": ").append(MAPIStringAttribute.getAsString( att)).append("\n");
|
props.append(att.getType()).append(": ").append(MAPIStringAttribute.getAsString( att)).append("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// there are two test-files that have no body...
|
// there are two test-files that have no body...
|
||||||
if(!msg.getSubject().equals("Testing TNEF Message") && !msg.getSubject().equals("TNEF test message with attachments")) {
|
String[] HTML_BODY = {
|
||||||
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props,
|
"Testing TNEF Message", "TNEF test message with attachments", "Test"
|
||||||
msg.getBody());
|
};
|
||||||
|
String bodyStr;
|
||||||
|
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
|
||||||
|
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
|
||||||
|
assertNotNull(bodyHtml);
|
||||||
|
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
|
||||||
|
} else {
|
||||||
|
bodyStr = msg.getBody();
|
||||||
}
|
}
|
||||||
assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props,
|
assertNotNull("Body is not set", bodyStr);
|
||||||
msg.getSubject());
|
assertNotNull("Subject is not set", msg.getSubject());
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Test
|
@Test
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
|
@ -55,4 +77,22 @@ public class HMEFFileHandler extends AbstractFileHandler {
|
||||||
handleFile(stream, path);
|
handleFile(stream, path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getEncoding(HMEFMessage tnefDat) {
|
||||||
|
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
|
||||||
|
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
|
||||||
|
int codePage = 1252;
|
||||||
|
if (oemCP != null) {
|
||||||
|
codePage = LittleEndian.getInt(oemCP.getData());
|
||||||
|
} else if (cpId != null) {
|
||||||
|
codePage = LittleEndian.getInt(cpId.getData());
|
||||||
|
}
|
||||||
|
switch (codePage) {
|
||||||
|
// see http://en.wikipedia.org/wiki/Code_page for more
|
||||||
|
case 1252: return "Windows-1252";
|
||||||
|
case 20127: return "US-ASCII";
|
||||||
|
default: return "cp"+codePage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,7 +66,7 @@ import org.apache.xmlbeans.XmlException;
|
||||||
/**
|
/**
|
||||||
* Figures out the correct POITextExtractor for your supplied
|
* Figures out the correct POITextExtractor for your supplied
|
||||||
* document, and returns it.
|
* document, and returns it.
|
||||||
*
|
*
|
||||||
* <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
|
* <p>Note 1 - will fail for many file formats if the POI Scratchpad jar is
|
||||||
* not present on the runtime classpath</p>
|
* not present on the runtime classpath</p>
|
||||||
* <p>Note 2 - rather than using this, for most cases you would be better
|
* <p>Note 2 - rather than using this, for most cases you would be better
|
||||||
|
@ -75,7 +75,7 @@ import org.apache.xmlbeans.XmlException;
|
||||||
@SuppressWarnings("WeakerAccess")
|
@SuppressWarnings("WeakerAccess")
|
||||||
public final class ExtractorFactory {
|
public final class ExtractorFactory {
|
||||||
private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
|
private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class);
|
||||||
|
|
||||||
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
|
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
|
||||||
private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
|
private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
|
||||||
private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
|
private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
|
||||||
|
@ -146,7 +146,7 @@ public final class ExtractorFactory {
|
||||||
} catch (NotOLE2FileException ne) {
|
} catch (NotOLE2FileException ne) {
|
||||||
// ensure file-handle release
|
// ensure file-handle release
|
||||||
IOUtils.closeQuietly(fs);
|
IOUtils.closeQuietly(fs);
|
||||||
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
|
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file", ne);
|
||||||
} catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR
|
} catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR
|
||||||
// ensure file-handle release
|
// ensure file-handle release
|
||||||
IOUtils.closeQuietly(fs);
|
IOUtils.closeQuietly(fs);
|
||||||
|
@ -158,11 +158,11 @@ public final class ExtractorFactory {
|
||||||
InputStream is = FileMagic.prepareToCheckMagic(inp);
|
InputStream is = FileMagic.prepareToCheckMagic(inp);
|
||||||
|
|
||||||
FileMagic fm = FileMagic.valueOf(is);
|
FileMagic fm = FileMagic.valueOf(is);
|
||||||
|
|
||||||
switch (fm) {
|
switch (fm) {
|
||||||
case OLE2:
|
case OLE2:
|
||||||
POIFSFileSystem fs = new POIFSFileSystem(is);
|
POIFSFileSystem fs = new POIFSFileSystem(is);
|
||||||
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
|
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
|
||||||
return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
|
return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs);
|
||||||
case OOXML:
|
case OOXML:
|
||||||
return createExtractor(OPCPackage.open(is));
|
return createExtractor(OPCPackage.open(is));
|
||||||
|
@ -176,8 +176,8 @@ public final class ExtractorFactory {
|
||||||
*
|
*
|
||||||
* @param pkg An {@link OPCPackage}.
|
* @param pkg An {@link OPCPackage}.
|
||||||
* @return A {@link POIXMLTextExtractor} for the given file.
|
* @return A {@link POIXMLTextExtractor} for the given file.
|
||||||
* @throws IOException If an error occurs while reading the file
|
* @throws IOException If an error occurs while reading the file
|
||||||
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
|
* @throws OpenXML4JException If an error parsing the OpenXML file format is found.
|
||||||
* @throws XmlException If an XML parsing error occurs.
|
* @throws XmlException If an XML parsing error occurs.
|
||||||
* @throws IllegalArgumentException If no matching file type could be found.
|
* @throws IllegalArgumentException If no matching file type could be found.
|
||||||
*/
|
*/
|
||||||
|
@ -186,7 +186,7 @@ public final class ExtractorFactory {
|
||||||
// Check for the normal Office core document
|
// Check for the normal Office core document
|
||||||
PackageRelationshipCollection core;
|
PackageRelationshipCollection core;
|
||||||
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||||
|
|
||||||
// If nothing was found, try some of the other OOXML-based core types
|
// If nothing was found, try some of the other OOXML-based core types
|
||||||
if (core.size() == 0) {
|
if (core.size() == 0) {
|
||||||
// Could it be an OOXML-Strict one?
|
// Could it be an OOXML-Strict one?
|
||||||
|
@ -198,16 +198,16 @@ public final class ExtractorFactory {
|
||||||
if (core.size() == 1)
|
if (core.size() == 1)
|
||||||
return new XDGFVisioExtractor(pkg);
|
return new XDGFVisioExtractor(pkg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Should just be a single core document, complain if not
|
// Should just be a single core document, complain if not
|
||||||
if (core.size() != 1) {
|
if (core.size() != 1) {
|
||||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grab the core document part, and try to identify from that
|
// Grab the core document part, and try to identify from that
|
||||||
final PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
final PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||||
final String contentType = corePart.getContentType();
|
final String contentType = corePart.getContentType();
|
||||||
|
|
||||||
// Is it XSSF?
|
// Is it XSSF?
|
||||||
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
||||||
if ( rel.getContentType().equals( contentType ) ) {
|
if ( rel.getContentType().equals( contentType ) ) {
|
||||||
|
@ -217,22 +217,22 @@ public final class ExtractorFactory {
|
||||||
return new XSSFExcelExtractor(pkg);
|
return new XSSFExcelExtractor(pkg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is it XWPF?
|
// Is it XWPF?
|
||||||
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
||||||
if ( rel.getContentType().equals( contentType ) ) {
|
if ( rel.getContentType().equals( contentType ) ) {
|
||||||
return new XWPFWordExtractor(pkg);
|
return new XWPFWordExtractor(pkg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is it XSLF?
|
// Is it XSLF?
|
||||||
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||||
if ( rel.getContentType().equals( contentType ) ) {
|
if ( rel.getContentType().equals( contentType ) ) {
|
||||||
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
|
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// special handling for SlideShow-Theme-files,
|
// special handling for SlideShow-Theme-files,
|
||||||
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
|
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
|
||||||
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
|
return new SlideShowExtractor<>(new XMLSlideShow(pkg));
|
||||||
}
|
}
|
||||||
|
@ -380,14 +380,14 @@ public final class ExtractorFactory {
|
||||||
public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
|
public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
|
||||||
throw new IllegalStateException("Not yet supported");
|
throw new IllegalStateException("Not yet supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs)
|
private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String pass = Biff8EncryptionKey.getCurrentUserPassword();
|
String pass = Biff8EncryptionKey.getCurrentUserPassword();
|
||||||
if (pass == null) {
|
if (pass == null) {
|
||||||
pass = Decryptor.DEFAULT_PASSWORD;
|
pass = Decryptor.DEFAULT_PASSWORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
EncryptionInfo ei = new EncryptionInfo(fs);
|
EncryptionInfo ei = new EncryptionInfo(fs);
|
||||||
Decryptor dec = ei.getDecryptor();
|
Decryptor dec = ei.getDecryptor();
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
|
|
Loading…
Reference in New Issue