diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 9dca32e47c..bbcbe99f7b 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Improve HSMF MAPIMessage access to the HTML and RTF versions of the message body (where available) Add new method to HSMF of MAPIMessage.has7BitEncodingStrings() to make it easier to decide when encoding guessing is needed OutlookTextExtractor now requests 7 bit encoding guessing Improve HSMF encoding guessing for 7 bit fields in MAPIMessage diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java index 207f89e0e2..1b3d493980 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java @@ -29,11 +29,14 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.poi.POIDocument; +import org.apache.poi.hmef.attribute.MAPIRtfAttribute; import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter; +import org.apache.poi.hsmf.datatypes.ByteChunk; import org.apache.poi.hsmf.datatypes.Chunk; import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.Chunks; +import org.apache.poi.hsmf.datatypes.MAPIProperty; import org.apache.poi.hsmf.datatypes.NameIdChunks; import org.apache.poi.hsmf.datatypes.RecipientChunks; import org.apache.poi.hsmf.datatypes.Types; @@ -184,7 +187,36 @@ public class MAPIMessage extends POIDocument { * @throws ChunkNotFoundException */ public String getHmtlBody() throws ChunkNotFoundException { - return getStringFromChunk(mainChunks.htmlBodyChunk); + if(mainChunks.htmlBodyChunkBinary != null) { + return mainChunks.htmlBodyChunkBinary.getAs7bitString(); + } + return getStringFromChunk(mainChunks.htmlBodyChunkString); + } + + /** + * Gets the RTF Rich Message body of this Outlook Message, if this email + * contains a RTF (rich) version. + * @return The string representation of the 'RTF' version of the body, if available. + * @throws ChunkNotFoundException + */ + public String getRtfBody() throws ChunkNotFoundException { + ByteChunk chunk = mainChunks.rtfBodyChunk; + if(chunk == null) { + if(returnNullOnMissingChunk) { + return null; + } else { + throw new ChunkNotFoundException(); + } + } + + try { + MAPIRtfAttribute rtf = new MAPIRtfAttribute( + MAPIProperty.RTF_COMPRESSED, Types.BINARY, chunk.getValue() + ); + return rtf.getDataString(); + } catch(IOException e) { + throw new RuntimeException("Shouldn't happen", e); + } } /** diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java index d9a060fe4a..51a88bcaf8 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/Chunks.java @@ -38,7 +38,10 @@ public final class Chunks implements ChunkGroup { /** BODY Chunk, for plain/text messages */ public StringChunk textBodyChunk; /** BODY Html Chunk, for html messages */ - public StringChunk htmlBodyChunk; + public StringChunk htmlBodyChunkString; + public ByteChunk htmlBodyChunkBinary; + /** BODY Rtf Chunk, for Rtf (Rich) messages */ + public ByteChunk rtfBodyChunk; /** Subject link chunk, in plain/text */ public StringChunk subjectChunk; /** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */ @@ -119,9 +122,16 @@ public final class Chunks implements ChunkGroup { else if(chunk.getChunkId() == MAPIProperty.BODY.id) { textBodyChunk = (StringChunk)chunk; } - else if(chunk.getChunkId() == MAPIProperty.BODY_HTML.id && - chunk instanceof StringChunk) { - htmlBodyChunk = (StringChunk)chunk; + else if(chunk.getChunkId() == MAPIProperty.BODY_HTML.id) { + if(chunk instanceof StringChunk) { + htmlBodyChunkString = (StringChunk)chunk; + } + if(chunk instanceof ByteChunk) { + htmlBodyChunkBinary = (ByteChunk)chunk; + } + } + else if(chunk.getChunkId() == MAPIProperty.RTF_COMPRESSED.id) { + rtfBodyChunk = (ByteChunk)chunk; } // And add to the main list diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestOutlook30FileRead.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestOutlook30FileRead.java index 113a214eb0..9c2b5d0f65 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestOutlook30FileRead.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestOutlook30FileRead.java @@ -120,7 +120,6 @@ private MAPIMessage mapiMessage; TestCase.assertEquals("IN-SPIRE servers going down for a bit, back up around 8am", obtained); } - /** * Check if we can read the subject line of the blank message, we expect "" * @@ -130,7 +129,17 @@ private MAPIMessage mapiMessage; String obtained = mapiMessage.getMessageClass(); TestCase.assertEquals("IPM.Note", obtained); } - - - + + /** + * Ensure we can get the HTML and RTF versions + */ + public void testReadBodyContents() throws Exception { + String html = mapiMessage.getHmtlBody(); + String rtf = mapiMessage.getRtfBody(); + assertNotNull(html); + assertNotNull(rtf); + + assertTrue("Wrong text:\n" + html, html.startsWith("