diff --git a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java index c61a001376..908600979f 100644 --- a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java @@ -18,35 +18,57 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; +import java.util.Arrays; import org.apache.poi.hmef.HMEFMessage; import org.apache.poi.hmef.attribute.MAPIAttribute; import org.apache.poi.hmef.attribute.MAPIStringAttribute; +import org.apache.poi.hmef.attribute.TNEFAttribute; +import org.apache.poi.hmef.attribute.TNEFProperty; +import org.apache.poi.hsmf.datatypes.MAPIProperty; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.util.LittleEndian; import org.junit.Test; public class HMEFFileHandler extends AbstractFileHandler { + @Override + public void handleExtracting(File file) throws Exception { + FileMagic fm = FileMagic.valueOf(file); + if (fm == FileMagic.OLE2) { + super.handleExtracting(file); + } + } + @Override public void handleFile(InputStream stream, String path) throws Exception { HMEFMessage msg = new HMEFMessage(stream); - + // list all properties StringBuilder props = new StringBuilder(); for(MAPIAttribute att : msg.getMessageMAPIAttributes()) { props.append(att.getType()).append(": ").append(MAPIStringAttribute.getAsString( att)).append("\n"); } - + // there are two test-files that have no body... - if(!msg.getSubject().equals("Testing TNEF Message") && !msg.getSubject().equals("TNEF test message with attachments")) { - assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props, - msg.getBody()); + String[] HTML_BODY = { + "Testing TNEF Message", "TNEF test message with attachments", "Test" + }; + String bodyStr; + if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) { + MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML); + assertNotNull(bodyHtml); + bodyStr = new String(bodyHtml.getData(), getEncoding(msg)); + } else { + bodyStr = msg.getBody(); } - assertNotNull("Had: " + msg.getBody() + ", " + msg.getSubject() + ", " + msg.getAttachments() + ": " + props, - msg.getSubject()); + assertNotNull("Body is not set", bodyStr); + assertNotNull("Subject is not set", msg.getSubject()); } - + // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { @@ -55,4 +77,22 @@ public class HMEFFileHandler extends AbstractFileHandler { handleFile(stream, path); } } + + private String getEncoding(HMEFMessage tnefDat) { + TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE); + MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID); + int codePage = 1252; + if (oemCP != null) { + codePage = LittleEndian.getInt(oemCP.getData()); + } else if (cpId != null) { + codePage = LittleEndian.getInt(cpId.getData()); + } + switch (codePage) { + // see http://en.wikipedia.org/wiki/Code_page for more + case 1252: return "Windows-1252"; + case 20127: return "US-ASCII"; + default: return "cp"+codePage; + } + } + } diff --git a/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java index bf8771bda2..a5a501a3eb 100644 --- a/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/ooxml/extractor/ExtractorFactory.java @@ -66,7 +66,7 @@ import org.apache.xmlbeans.XmlException; /** * Figures out the correct POITextExtractor for your supplied * document, and returns it. - * + * *

Note 1 - will fail for many file formats if the POI Scratchpad jar is * not present on the runtime classpath

*

Note 2 - rather than using this, for most cases you would be better @@ -75,7 +75,7 @@ import org.apache.xmlbeans.XmlException; @SuppressWarnings("WeakerAccess") public final class ExtractorFactory { private static final POILogger logger = POILogFactory.getLogger(ExtractorFactory.class); - + public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT; private static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT; private static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT; @@ -146,7 +146,7 @@ public final class ExtractorFactory { } catch (NotOLE2FileException ne) { // ensure file-handle release IOUtils.closeQuietly(fs); - throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file"); + throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file", ne); } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) { // NOSONAR // ensure file-handle release IOUtils.closeQuietly(fs); @@ -158,11 +158,11 @@ public final class ExtractorFactory { InputStream is = FileMagic.prepareToCheckMagic(inp); FileMagic fm = FileMagic.valueOf(is); - + switch (fm) { case OLE2: POIFSFileSystem fs = new POIFSFileSystem(is); - boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); + boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); return isEncrypted ? createEncryptedOOXMLExtractor(fs) : createExtractor(fs); case OOXML: return createExtractor(OPCPackage.open(is)); @@ -176,8 +176,8 @@ public final class ExtractorFactory { * * @param pkg An {@link OPCPackage}. * @return A {@link POIXMLTextExtractor} for the given file. - * @throws IOException If an error occurs while reading the file - * @throws OpenXML4JException If an error parsing the OpenXML file format is found. + * @throws IOException If an error occurs while reading the file + * @throws OpenXML4JException If an error parsing the OpenXML file format is found. * @throws XmlException If an XML parsing error occurs. * @throws IllegalArgumentException If no matching file type could be found. */ @@ -186,7 +186,7 @@ public final class ExtractorFactory { // Check for the normal Office core document PackageRelationshipCollection core; core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL); - + // If nothing was found, try some of the other OOXML-based core types if (core.size() == 0) { // Could it be an OOXML-Strict one? @@ -198,16 +198,16 @@ public final class ExtractorFactory { if (core.size() == 1) return new XDGFVisioExtractor(pkg); } - + // Should just be a single core document, complain if not if (core.size() != 1) { throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); } - + // Grab the core document part, and try to identify from that final PackagePart corePart = pkg.getPart(core.getRelationship(0)); final String contentType = corePart.getContentType(); - + // Is it XSSF? for (XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) { if ( rel.getContentType().equals( contentType ) ) { @@ -217,22 +217,22 @@ public final class ExtractorFactory { return new XSSFExcelExtractor(pkg); } } - + // Is it XWPF? for (XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) { if ( rel.getContentType().equals( contentType ) ) { return new XWPFWordExtractor(pkg); } } - + // Is it XSLF? for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { if ( rel.getContentType().equals( contentType ) ) { return new SlideShowExtractor<>(new XMLSlideShow(pkg)); } } - - // special handling for SlideShow-Theme-files, + + // special handling for SlideShow-Theme-files, if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) { return new SlideShowExtractor<>(new XMLSlideShow(pkg)); } @@ -380,14 +380,14 @@ public final class ExtractorFactory { public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) { throw new IllegalStateException("Not yet supported"); } - + private static POITextExtractor createEncryptedOOXMLExtractor(POIFSFileSystem fs) throws IOException { String pass = Biff8EncryptionKey.getCurrentUserPassword(); if (pass == null) { pass = Decryptor.DEFAULT_PASSWORD; } - + EncryptionInfo ei = new EncryptionInfo(fs); Decryptor dec = ei.getDecryptor(); InputStream is = null;