NPE fix for text extraction from MSG files with only a short name

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1694255 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2015-08-05 15:58:43 +00:00
parent c90272d65e
commit 735760f21e
3 changed files with 33 additions and 3 deletions

View File

@ -25,6 +25,7 @@ import java.util.TimeZone;
import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hsmf.MAPIMessage; import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
@ -65,6 +66,7 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
new NPOIFSFileSystem(new File(filename)) new NPOIFSFileSystem(new File(filename))
); );
System.out.println( extractor.getText() ); System.out.println( extractor.getText() );
extractor.close();
} }
} }
@ -146,12 +148,15 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
// Display attachment names // Display attachment names
// To get the attachments, use ExtractorFactory // To get the attachments, use ExtractorFactory
for(AttachmentChunks att : msg.getAttachmentFiles()) { for(AttachmentChunks att : msg.getAttachmentFiles()) {
String ats = att.attachLongFileName.getValue(); StringChunk name = att.attachLongFileName;
if (name == null) name = att.attachFileName;
String attName = name.getValue();
if(att.attachMimeTag != null && if(att.attachMimeTag != null &&
att.attachMimeTag.getValue() != null) { att.attachMimeTag.getValue() != null) {
ats = att.attachMimeTag.getValue() + " = " + ats; attName = att.attachMimeTag.getValue() + " = " + attName;
} }
s.append("Attachment: " + ats + "\n"); s.append("Attachment: " + attName + "\n");
} }
try { try {

View File

@ -209,6 +209,25 @@ public final class TestOutlookTextExtractor extends POITestCase {
ext.close(); ext.close();
} }
public void testWithAttachedMessage() throws Exception {
POIFSFileSystem simple = new POIFSFileSystem(
new FileInputStream(samples.getFile("58214_with_attachment.msg"))
);
MAPIMessage msg = new MAPIMessage(simple);
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
// Check we got bits from the main message
assertContains(text, "Master mail");
assertContains(text, "ante in lacinia euismod");
// But not the attached message
assertNotContained(text, "Test mail attachment");
assertNotContained(text, "Lorem ipsum dolor sit");
ext.close();
}
public void testEncodings() throws Exception { public void testEncodings() throws Exception {
POIFSFileSystem simple = new POIFSFileSystem( POIFSFileSystem simple = new POIFSFileSystem(
new FileInputStream(samples.getFile("chinese-traditional.msg")) new FileInputStream(samples.getFile("chinese-traditional.msg"))

View File

@ -33,6 +33,12 @@ public class POITestCase extends TestCase {
haystack.contains(needle) haystack.contains(needle)
); );
} }
public static void assertNotContained(String haystack, String needle) {
assertFalse(
"Unexpectedly found text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle)
);
}
public static <T> void assertEquals(T[] expected, T[] actual) public static <T> void assertEquals(T[] expected, T[] actual)
{ {