List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@950595 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-06-02 15:24:11 +00:00
parent fe3cb0b4e3
commit a007df3ef9
3 changed files with 45 additions and 0 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
<action dev="POI-DEVELOPERS" type="add">List attachment names in the output of OutlookTextExtractor (to get attachment contents, use ExtractorFactory as normal)</action>
<action dev="POI-DEVELOPERS" type="fix">48872 - allow DateFormatter.formatRawCellContents to handle 1904 as well as 1900 dates</action>
<action dev="POI-DEVELOPERS" type="fix">48872 - handle MMMMM and elapsed time formatting rules in DataFormatter</action>
<action dev="POI-DEVELOPERS" type="fix">48872 - handle zero formatting rules, and better color detection in DataFormatter</action>

View File

@ -22,6 +22,7 @@ import java.text.SimpleDateFormat;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -93,6 +94,18 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
try {
s.append("Subject: " + msg.getSubject() + "\n");
} catch(ChunkNotFoundException e) {}
// Display attachment names
// To get the attachments, use ExtractorFactory
for(AttachmentChunks att : msg.getAttachmentFiles()) {
String ats = att.attachLongFileName.getValue();
if(att.attachMimeTag != null &&
att.attachMimeTag.getValue() != null) {
ats = att.attachMimeTag.getValue() + " = " + ats;
}
s.append("Attachment: " + ats + "\n");
}
try {
s.append("\n" + msg.getTextBody() + "\n");
} catch(ChunkNotFoundException e) {}

View File

@ -26,6 +26,9 @@ import java.util.GregorianCalendar;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.extractor.TestExtractorFactory;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -59,6 +62,7 @@ public final class TestOutlookTextExtractor extends TestCase {
assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n");
assertEquals(-1, text.indexOf("CC:"));
assertEquals(-1, text.indexOf("BCC:"));
assertEquals(-1, text.indexOf("Attachment:"));
assertContains(text, "Subject: Test the content transformer\n");
Calendar cal = new GregorianCalendar(2007, 5, 14, 9, 42, 55);
SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
@ -171,4 +175,31 @@ public final class TestOutlookTextExtractor extends TestCase {
assertContains(text, "The quick brown fox jumps over the lazy dog");
}
}
/**
* See also {@link TestExtractorFactory#testEmbeded()}
*/
public void testWithAttachments() throws Exception {
POIFSFileSystem simple = new POIFSFileSystem(
new FileInputStream(samples.getFile("attachment_test_msg.msg"))
);
MAPIMessage msg = new MAPIMessage(simple);
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
// Check the normal bits
String text = ext.getText();
assertContains(text, "From: Nicolas1");
assertContains(text, "To: 'nicolas1.23456@free.fr'");
assertEquals(-1, text.indexOf("CC:"));
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: test");
assertEquals(-1, text.indexOf("Date:"));
assertContains(text, "Attachment: test-unicode.doc\n");
assertContains(text, "Attachment: pj1.txt\n");
assertContains(text, "contenu");
// Embeded bits are checked in
// TestExtractorFactory
}
}