mirror of https://github.com/apache/poi.git
NPE fix for text extraction from MSG files with only a short name
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1694255 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c90272d65e
commit
735760f21e
|
@ -25,6 +25,7 @@ import java.util.TimeZone;
|
|||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hsmf.MAPIMessage;
|
||||
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
|
||||
import org.apache.poi.hsmf.datatypes.StringChunk;
|
||||
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
|
@ -65,6 +66,7 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
|
|||
new NPOIFSFileSystem(new File(filename))
|
||||
);
|
||||
System.out.println( extractor.getText() );
|
||||
extractor.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -146,12 +148,15 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
|
|||
// Display attachment names
|
||||
// To get the attachments, use ExtractorFactory
|
||||
for(AttachmentChunks att : msg.getAttachmentFiles()) {
|
||||
String ats = att.attachLongFileName.getValue();
|
||||
StringChunk name = att.attachLongFileName;
|
||||
if (name == null) name = att.attachFileName;
|
||||
String attName = name.getValue();
|
||||
|
||||
if(att.attachMimeTag != null &&
|
||||
att.attachMimeTag.getValue() != null) {
|
||||
ats = att.attachMimeTag.getValue() + " = " + ats;
|
||||
attName = att.attachMimeTag.getValue() + " = " + attName;
|
||||
}
|
||||
s.append("Attachment: " + ats + "\n");
|
||||
s.append("Attachment: " + attName + "\n");
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
@ -209,6 +209,25 @@ public final class TestOutlookTextExtractor extends POITestCase {
|
|||
ext.close();
|
||||
}
|
||||
|
||||
public void testWithAttachedMessage() throws Exception {
|
||||
POIFSFileSystem simple = new POIFSFileSystem(
|
||||
new FileInputStream(samples.getFile("58214_with_attachment.msg"))
|
||||
);
|
||||
MAPIMessage msg = new MAPIMessage(simple);
|
||||
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
|
||||
String text = ext.getText();
|
||||
|
||||
// Check we got bits from the main message
|
||||
assertContains(text, "Master mail");
|
||||
assertContains(text, "ante in lacinia euismod");
|
||||
|
||||
// But not the attached message
|
||||
assertNotContained(text, "Test mail attachment");
|
||||
assertNotContained(text, "Lorem ipsum dolor sit");
|
||||
|
||||
ext.close();
|
||||
}
|
||||
|
||||
public void testEncodings() throws Exception {
|
||||
POIFSFileSystem simple = new POIFSFileSystem(
|
||||
new FileInputStream(samples.getFile("chinese-traditional.msg"))
|
||||
|
|
|
@ -33,6 +33,12 @@ public class POITestCase extends TestCase {
|
|||
haystack.contains(needle)
|
||||
);
|
||||
}
|
||||
public static void assertNotContained(String haystack, String needle) {
|
||||
assertFalse(
|
||||
"Unexpectedly found text '" + needle + "' in text:\n" + haystack,
|
||||
haystack.contains(needle)
|
||||
);
|
||||
}
|
||||
|
||||
public static <T> void assertEquals(T[] expected, T[] actual)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue