Improved how HSMF handles multiple recipients

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@898295 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-01-12 12:02:18 +00:00
parent ababd504b5
commit 2a4805f938
17 changed files with 817 additions and 217 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
<action dev="POI-DEVELOPERS" type="add">Improved how HSMF handles multiple recipients</action>
<action dev="POI-DEVELOPERS" type="add">Add PublisherTextExtractor support to ExtractorFactory</action>
<action dev="POI-DEVELOPERS" type="add">Add XSLF support for text extraction from tables</action>
<action dev="POI-DEVELOPERS" type="add">Support attachments as embeded documents within the new OutlookTextExtractor</action>

View File

@ -20,6 +20,7 @@ package org.apache.poi.util;
import java.io.UnsupportedEncodingException;
import java.text.FieldPosition;
import java.text.NumberFormat;
import java.util.Iterator;
import org.apache.poi.hssf.record.RecordInputStream;
/**
@ -392,4 +393,30 @@ public class StringUtil {
return true;
}
}
/**
* An Iterator over an array of Strings.
*/
public static class StringsIterator implements Iterator<String> {
private String[] strings;
private int position = 0;
public StringsIterator(String[] strings) {
if(strings != null) {
this.strings = strings;
} else {
this.strings = new String[0];
}
}
public boolean hasNext() {
return position < strings.length;
}
public String next() {
int ourPos = position++;
if(ourPos >= strings.length)
throw new ArrayIndexOutOfBoundsException(ourPos);
return strings[ourPos];
}
public void remove() {}
}
}

View File

@ -23,14 +23,17 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import org.apache.poi.POIDocument;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.parsers.POIFSChunkParser;
@ -46,47 +49,47 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* [MS-OXCMSG]: Message and Attachment Object Protocol Specification
*/
public class MAPIMessage extends POIDocument {
private Chunks mainChunks;
private NameIdChunks nameIdChunks;
private RecipientChunks recipientChunks;
private AttachmentChunks[] attachmentChunks;
private boolean returnNullOnMissingChunk = false;
private Chunks mainChunks;
private NameIdChunks nameIdChunks;
private RecipientChunks[] recipientChunks;
private AttachmentChunks[] attachmentChunks;
/**
* Constructor for creating new files.
*
*/
public MAPIMessage() {
// TODO - make writing possible
super(new POIFSFileSystem());
}
private boolean returnNullOnMissingChunk = false;
/**
* Constructor for creating new files.
*
*/
public MAPIMessage() {
// TODO - make writing possible
super(new POIFSFileSystem());
}
/**
* Constructor for reading MSG Files from the file system.
* @param filename
* @throws IOException
*/
public MAPIMessage(String filename) throws IOException {
this(new FileInputStream(new File(filename)));
}
/**
* Constructor for reading MSG Files from the file system.
* @param filename
* @throws IOException
*/
public MAPIMessage(String filename) throws IOException {
this(new FileInputStream(new File(filename)));
}
/**
* Constructor for reading MSG Files from an input stream.
* @param in
* @throws IOException
*/
public MAPIMessage(InputStream in) throws IOException {
this(new POIFSFileSystem(in));
}
/**
* Constructor for reading MSG Files from an input stream.
* @param in
* @throws IOException
*/
public MAPIMessage(InputStream in) throws IOException {
this(new POIFSFileSystem(in));
}
/**
* Constructor for reading MSG Files from a POIFS filesystem
* @param in
* @throws IOException
*/
public MAPIMessage(POIFSFileSystem fs) throws IOException {
this(fs.getRoot(), fs);
this(fs.getRoot(), fs);
}
/**
* Constructor for reading MSG Files from a certain
@ -96,178 +99,254 @@ public class MAPIMessage extends POIDocument {
*/
public MAPIMessage(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException {
super(poifsDir, fs);
// Grab all the chunks
ChunkGroup[] chunkGroups = POIFSChunkParser.parse(poifsDir);
// Grab interesting bits
ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
for(ChunkGroup group : chunkGroups) {
// Should only ever be one of these
if(group instanceof Chunks) {
mainChunks = (Chunks)group;
} else if(group instanceof NameIdChunks) {
nameIdChunks = (NameIdChunks)group;
} else if(group instanceof RecipientChunks) {
recipientChunks = (RecipientChunks)group;
}
// Add to list(s)
if(group instanceof AttachmentChunks) {
attachments.add((AttachmentChunks)group);
}
}
attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]);
}
// Grab all the chunks
ChunkGroup[] chunkGroups = POIFSChunkParser.parse(poifsDir);
// Grab interesting bits
ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
ArrayList<RecipientChunks> recipients = new ArrayList<RecipientChunks>();
for(ChunkGroup group : chunkGroups) {
// Should only ever be one of these
if(group instanceof Chunks) {
mainChunks = (Chunks)group;
} else if(group instanceof NameIdChunks) {
nameIdChunks = (NameIdChunks)group;
} else if(group instanceof RecipientChunks) {
recipients.add( (RecipientChunks)group );
}
// Add to list(s)
if(group instanceof AttachmentChunks) {
attachments.add( (AttachmentChunks)group );
}
}
attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]);
recipientChunks = recipients.toArray(new RecipientChunks[recipients.size()]);
// Now sort these chunks lists so they're in ascending order,
// rather than in random filesystem order
Arrays.sort(attachmentChunks, new AttachmentChunksSorter());
Arrays.sort(recipientChunks, new RecipientChunksSorter());
}
/**
* Gets a string value based on the passed chunk.
* @throws ChunkNotFoundException if the chunk isn't there
*/
public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException {
if(chunk == null) {
if(returnNullOnMissingChunk) {
return null;
} else {
throw new ChunkNotFoundException();
}
}
return chunk.getValue();
}
/**
* Gets a string value based on the passed chunk.
* @throws ChunkNotFoundException if the chunk isn't there
*/
public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException {
if(chunk == null) {
if(returnNullOnMissingChunk) {
return null;
} else {
throw new ChunkNotFoundException();
}
}
return chunk.getValue();
}
/**
* Gets the plain text body of this Outlook Message
* @return The string representation of the 'text' version of the body, if available.
* @throws ChunkNotFoundException
*/
public String getTextBody() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.textBodyChunk);
}
/**
* Gets the plain text body of this Outlook Message
* @return The string representation of the 'text' version of the body, if available.
* @throws ChunkNotFoundException
*/
public String getTextBody() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.textBodyChunk);
}
/**
* Gets the subject line of the Outlook Message
* @throws ChunkNotFoundException
*/
public String getSubject() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.subjectChunk);
}
/**
* Gets the subject line of the Outlook Message
* @throws ChunkNotFoundException
*/
public String getSubject() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.subjectChunk);
}
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* @throws ChunkNotFoundException
*/
public String getDisplayTo() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayToChunk);
}
/**
* Gets the display value of the "FROM" line of the outlook message
* This is not the actual address that was sent from but the formated display of the user name.
* @throws ChunkNotFoundException
*/
public String getDisplayFrom() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayFromChunk);
}
/**
* Gets the display value of the "FROM" line of the outlook message
* This is not the actual address that was sent from but the formated display of the user name.
* @throws ChunkNotFoundException
*/
public String getDisplayFrom() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayFromChunk);
}
/**
* Gets the display value of the "TO" line of the outlook message.
* If there are multiple recipients, they will be separated
* by semicolons.
* This is not the actual list of addresses/values that will be
* sent to if you click Reply in the email - those are stored
* in {@link RecipientChunks}.
* @throws ChunkNotFoundException
*/
public String getDisplayTo() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayToChunk);
}
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* @throws ChunkNotFoundException
*/
public String getDisplayCC() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayCCChunk);
}
/**
* Gets the display value of the "CC" line of the outlook message.
* If there are multiple recipients, they will be separated
* by semicolons.
* This is not the actual list of addresses/values that will be
* sent to if you click Reply in the email - those are stored
* in {@link RecipientChunks}.
* @throws ChunkNotFoundException
*/
public String getDisplayCC() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayCCChunk);
}
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* @throws ChunkNotFoundException
*/
public String getDisplayBCC() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayBCCChunk);
}
/**
* Returns the recipient's email address, checking all the
* likely chunks in search of it.
*/
public String getRecipientEmailAddress() throws ChunkNotFoundException {
if(recipientChunks == null) {
throw new ChunkNotFoundException("No recipients section present");
}
String email = recipientChunks.getRecipientEmailAddress();
if(email != null) {
return email;
} else {
throw new ChunkNotFoundException();
}
}
/**
* Gets the display value of the "BCC" line of the outlook message.
* If there are multiple recipients, they will be separated
* by semicolons.
* This is not the actual list of addresses/values that will be
* sent to if you click Reply in the email - those are stored
* in {@link RecipientChunks}.
* This will only be present in sent emails, not received ones!
* @throws ChunkNotFoundException
*/
public String getDisplayBCC() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayBCCChunk);
}
/**
* Returns all the recipients' email address, separated by
* semicolons. Checks all the likely chunks in search of
* the addresses.
*/
public String getRecipientEmailAddress() throws ChunkNotFoundException {
return toSemicolonList(getRecipientEmailAddressList());
}
/**
* Returns an array of all the recipient's email address, normally
* in TO then CC then BCC order.
* Checks all the likely chunks in search of the addresses.
*/
public String[] getRecipientEmailAddressList() throws ChunkNotFoundException {
if(recipientChunks == null || recipientChunks.length == 0) {
throw new ChunkNotFoundException("No recipients section present");
}
String[] emails = new String[recipientChunks.length];
for(int i=0; i<emails.length; i++) {
RecipientChunks rc = recipientChunks[i];
String email = rc.getRecipientEmailAddress();
if(email != null) {
emails[i] = email;
} else {
throw new ChunkNotFoundException("No email address holding chunks found for the " + (i+1) + "th recipient");
}
}
return emails;
}
/**
* Gets the conversation topic of the parsed Outlook Message.
* This is the part of the subject line that is after the RE: and FWD:
* @throws ChunkNotFoundException
*/
public String getConversationTopic() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.conversationTopic);
}
/**
* Returns all the recipients' names, separated by
* semicolons. Checks all the likely chunks in search of
* the names.
* See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
* and {@link #getDisplayBCC()}.
*/
public String getRecipientNames() throws ChunkNotFoundException {
return toSemicolonList(getRecipientNamesList());
}
/**
* Returns an array of all the recipient's names, normally
* in TO then CC then BCC order.
* Checks all the likely chunks in search of the names.
* See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
* and {@link #getDisplayBCC()}.
*/
public String[] getRecipientNamesList() throws ChunkNotFoundException {
if(recipientChunks == null || recipientChunks.length == 0) {
throw new ChunkNotFoundException("No recipients section present");
}
/**
* Gets the message class of the parsed Outlook Message.
* (Yes, you can use this to determine if a message is a calendar item, note, or actual outlook Message)
* For emails the class will be IPM.Note
*
* @throws ChunkNotFoundException
*/
public String getMessageClass() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.messageClass);
}
/**
* Gets the date that the message was accepted by the
* server on.
*/
public Calendar getMessageDate() throws ChunkNotFoundException {
if(mainChunks.submissionChunk != null) {
return mainChunks.submissionChunk.getAcceptedAtTime();
}
if(returnNullOnMissingChunk)
return null;
throw new ChunkNotFoundException();
}
String[] names = new String[recipientChunks.length];
for(int i=0; i<names.length; i++) {
RecipientChunks rc = recipientChunks[i];
String name = rc.getRecipientName();
if(name != null) {
names[i] = name;
} else {
throw new ChunkNotFoundException("No display name holding chunks found for the " + (i+1) + "th recipient");
}
}
/**
* Gets the main, core details chunks
*/
public Chunks getMainChunks() {
return mainChunks;
}
/**
* Gets the recipient details chunks, or
* null if there aren't any
*/
public RecipientChunks getRecipientDetailsChunks() {
return recipientChunks;
}
/**
* Gets the Name ID chunks, or
return names;
}
/**
* Gets the conversation topic of the parsed Outlook Message.
* This is the part of the subject line that is after the RE: and FWD:
* @throws ChunkNotFoundException
*/
public String getConversationTopic() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.conversationTopic);
}
/**
* Gets the message class of the parsed Outlook Message.
* (Yes, you can use this to determine if a message is a calendar
* item, note, or actual outlook Message)
* For emails the class will be IPM.Note
*
* @throws ChunkNotFoundException
*/
public String getMessageClass() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.messageClass);
}
/**
* Gets the date that the message was accepted by the
* server on.
*/
public Calendar getMessageDate() throws ChunkNotFoundException {
if(mainChunks.submissionChunk != null) {
return mainChunks.submissionChunk.getAcceptedAtTime();
}
if(returnNullOnMissingChunk)
return null;
throw new ChunkNotFoundException();
}
/**
* Gets the main, core details chunks
*/
public Chunks getMainChunks() {
return mainChunks;
}
/**
* Gets all the recipient details chunks.
* These will normally be in the order of:
* * TO recipients, in the order returned by {@link #getDisplayTo()}
* * CC recipients, in the order returned by {@link #getDisplayCC()}
* * BCC recipients, in the order returned by {@link #getDisplayBCC()}
*/
public RecipientChunks[] getRecipientDetailsChunks() {
return recipientChunks;
}
/**
* Gets the Name ID chunks, or
* null if there aren't any
*/
public NameIdChunks getNameIdChunks() {
return nameIdChunks;
}
/**
* Gets the message attachments.
*/
public AttachmentChunks[] getAttachmentFiles() {
return attachmentChunks;
}
*/
public NameIdChunks getNameIdChunks() {
return nameIdChunks;
}
/**
* Gets the message attachments.
*/
public AttachmentChunks[] getAttachmentFiles() {
return attachmentChunks;
}
/**
@ -295,6 +374,21 @@ public class MAPIMessage extends POIDocument {
public void setReturnNullOnMissingChunk(boolean returnNullOnMissingChunk) {
this.returnNullOnMissingChunk = returnNullOnMissingChunk;
}
private String toSemicolonList(String[] l) {
StringBuffer list = new StringBuffer();
boolean first = true;
for(String s : l) {
if(first) {
first = false;
} else {
list.append("; ");
}
list.append(s);
}
return list.toString();
}
}

View File

@ -17,6 +17,7 @@
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
/**
@ -99,4 +100,14 @@ public class AttachmentChunks implements ChunkGroup {
// And add to the main list
allChunks.add(chunk);
}
/**
* Orders by the attachment number.
*/
public static class AttachmentChunksSorter implements Comparator<AttachmentChunks> {
@Override
public int compare(AttachmentChunks a, AttachmentChunks b) {
return a.poifsName.compareTo(b.poifsName);
}
}
}

View File

@ -18,20 +18,29 @@
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
/**
* Collection of convenience chunks for the
* Recip(ient) part of an outlook file
* Recip(ient) part of an outlook file.
*
* If a message has multiple recipients, there will be
* several of these.
*/
public final class RecipientChunks implements ChunkGroup {
public static final String PREFIX = "__recip_version1.0_#";
public static final int RECIPIENT_NAME = 0x3001;
public static final int DELIVERY_TYPE = 0x3002;
public static final int RECIPIENT_SEARCH = 0x300B;
public static final int RECIPIENT_EMAIL = 0x39FE;
public static final int RECIPIENT_EMAIL_ADDRESS = 0x3003;
public static final int RECIPIENT_SEARCH = 0x300B;
public static final int RECIPIENT_SMTP_ADDRESS = 0x39FE;
public static final int RECIPIENT_DISPLAY_NAME = 0x5FF6;
/** Our 0 based position in the list of recipients */
public int recipientNumber;
/** TODO */
public ByteChunk recipientSearchChunk;
@ -42,27 +51,84 @@ public final class RecipientChunks implements ChunkGroup {
*/
public StringChunk recipientNameChunk;
/**
* The email address of the recipient, but
* The email address of the recipient, which
* could be in SMTP or SEARCH format, but
* isn't always present...
*/
public StringChunk recipientEmailChunk;
/**
* The smtp destination email address of
* the recipient, but isn't always present...
*/
public StringChunk recipientSMTPChunk;
/**
* Normally EX or SMTP. Will generally affect
* where the email address ends up.
*/
public StringChunk deliveryTypeChunk;
/**
* The display name of the recipient.
* Normally seems to hold the same value
* as in recipientNameChunk
*/
public StringChunk recipientDisplayNameChunk;
public RecipientChunks(String name) {
recipientNumber = -1;
int splitAt = name.lastIndexOf('#');
if(splitAt > -1) {
String number = name.substring(splitAt+1);
try {
recipientNumber = Integer.parseInt(number, 16);
} catch(NumberFormatException e) {
System.err.println("Invalid recipient number in name " + name);
}
}
}
/**
* Tries to find their name,
* in whichever chunk holds it.
*/
public String getRecipientName() {
if(recipientNameChunk != null) {
return recipientNameChunk.getValue();
}
if(recipientDisplayNameChunk != null) {
return recipientDisplayNameChunk.getValue();
}
// Can't find it
return null;
}
/**
* Tries to find their email address, in
* whichever chunk holds it given the
* delivery type.
*/
public String getRecipientEmailAddress() {
if(recipientEmailChunk != null) {
return recipientEmailChunk.getValue();
// If we have this, it really has the email
if(recipientSMTPChunk != null) {
return recipientSMTPChunk.getValue();
}
// Probably in the name field
// This might be a real email, or might be
// in CN=... format
if(recipientEmailChunk != null) {
String email = recipientEmailChunk.getValue();
int cne = email.indexOf("/CN=");
if(cne == -1) {
// Normal smtp address
return email;
} else {
// /O=..../CN=em@ail
return email.substring(cne+4);
}
}
// Might be in the name field, check there
if(recipientNameChunk != null) {
String name = recipientNameChunk.getValue();
if(name.indexOf('@') > -1) {
@ -73,13 +139,16 @@ public final class RecipientChunks implements ChunkGroup {
return name;
}
}
// Check the search chunk
// Check the search chunk, see if it's
// encoded as a SMTP destination in there.
if(recipientSearchChunk != null) {
String search = recipientSearchChunk.getAs7bitString();
if(search.indexOf("SMTP:") != -1) {
return search.substring(search.indexOf("SMTP:") + 5);
}
}
// Can't find it
return null;
}
@ -104,11 +173,17 @@ public final class RecipientChunks implements ChunkGroup {
recipientSearchChunk = (ByteChunk)chunk;
break;
case RECIPIENT_NAME:
recipientDisplayNameChunk = (StringChunk)chunk;
break;
case RECIPIENT_DISPLAY_NAME:
recipientNameChunk = (StringChunk)chunk;
break;
case RECIPIENT_EMAIL:
case RECIPIENT_EMAIL_ADDRESS:
recipientEmailChunk = (StringChunk)chunk;
break;
case RECIPIENT_SMTP_ADDRESS:
recipientSMTPChunk = (StringChunk)chunk;
break;
case DELIVERY_TYPE:
deliveryTypeChunk = (StringChunk)chunk;
break;
@ -117,4 +192,18 @@ public final class RecipientChunks implements ChunkGroup {
// And add to the main list
allChunks.add(chunk);
}
/**
* Orders by the recipient number.
*/
public static class RecipientChunksSorter implements Comparator<RecipientChunks> {
@Override
public int compare(RecipientChunks a, RecipientChunks b) {
if(a.recipientNumber < b.recipientNumber)
return -1;
if(a.recipientNumber > b.recipientNumber)
return +1;
return 0;
}
}
}

View File

@ -25,6 +25,7 @@ import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.StringUtil.StringsIterator;
/**
* A text extractor for HSMF (Outlook) .msg files.
@ -50,7 +51,7 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
public MAPIMessage getMAPIMessage() {
return (MAPIMessage)document;
}
/**
* Outputs something a little like a RFC822 email
*/
@ -58,20 +59,33 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
MAPIMessage msg = (MAPIMessage)document;
StringBuffer s = new StringBuffer();
StringsIterator emails;
try {
emails = new StringsIterator(
msg.getRecipientEmailAddressList()
);
} catch(ChunkNotFoundException e) {
emails = new StringsIterator(new String[0]);
}
try {
s.append("From: " + msg.getDisplayFrom() + "\n");
} catch(ChunkNotFoundException e) {}
// For To, CC and BCC, try to match the names
// up with their email addresses. Relies on the
// Recipient Chunks being in the same order as
// people in To + CC + BCC.
try {
s.append("To: " + msg.getDisplayTo() + "\n");
handleEmails(s, "To", msg.getDisplayTo(), emails);
} catch(ChunkNotFoundException e) {}
try {
if(msg.getDisplayCC().length() > 0)
s.append("CC: " + msg.getDisplayCC() + "\n");
handleEmails(s, "CC", msg.getDisplayCC(), emails);
} catch(ChunkNotFoundException e) {}
try {
if(msg.getDisplayBCC().length() > 0)
s.append("BCC: " + msg.getDisplayBCC() + "\n");
handleEmails(s, "BCC", msg.getDisplayBCC(), emails);
} catch(ChunkNotFoundException e) {}
try {
SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
@ -85,4 +99,38 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
return s.toString();
}
/**
* Takes a Display focused string, eg "Nick; Jim" and an iterator
* of emails, and does its best to return something like
* "Nick <nick@example.com>; Jim <jim@example.com>"
*/
protected void handleEmails(StringBuffer s, String type, String displayText, StringsIterator emails) {
if(displayText == null || displayText.length() == 0) {
return;
}
String[] names = displayText.split(";\\s*");
boolean first = true;
s.append(type + ": ");
for(String name : names) {
if(first) {
first = false;
} else {
s.append("; ");
}
s.append(name);
if(emails.hasNext()) {
String email = emails.next();
// Append the email address in <>, assuming
// the name wasn't already the email address
if(! email.equals(name)) {
s.append( " <" + email + ">");
}
}
}
s.append("\n");
}
}

View File

@ -67,7 +67,7 @@ public final class POIFSChunkParser {
group = new NameIdChunks();
}
if(dir.getName().startsWith(RecipientChunks.PREFIX)) {
group = new RecipientChunks();
group = new RecipientChunks(dir.getName());
}
if(group != null) {

View File

@ -21,6 +21,7 @@ import junit.framework.Test;
import junit.framework.TestSuite;
import org.apache.poi.hsmf.datatypes.*;
import org.apache.poi.hsmf.extractor.TestOutlookTextExtractor;
import org.apache.poi.hsmf.parsers.*;
public final class AllHSMFTests {
@ -34,7 +35,10 @@ public final class AllHSMFTests {
suite.addTestSuite(TestChunkData.class);
suite.addTestSuite(TestTypes.class);
suite.addTestSuite(TestSorters.class);
suite.addTestSuite(TestOutlookTextExtractor.class);
suite.addTestSuite(TestPOIFSChunkParser.class);
return suite;

View File

@ -52,8 +52,17 @@ public final class TestBasics extends TestCase {
public void testRecipientEmail() throws Exception {
assertEquals("travis@overwrittenstack.com", simple.getRecipientEmailAddress());
assertEquals("kevin.roast@alfresco.org", quick.getRecipientEmailAddress());
assertEquals("randall.scarberry@pnl.gov", outlook30.getRecipientEmailAddress());
assertEquals("nicolas1.23456@free.fr", attachments.getRecipientEmailAddress());
// This one has lots...
assertEquals(18, outlook30.getRecipientEmailAddressList().length);
assertEquals("shawn.bohn@pnl.gov; gus.calapristi@pnl.gov; Richard.Carter@pnl.gov; " +
"barb.cheney@pnl.gov; nick.cramer@pnl.gov; vern.crow@pnl.gov; Laura.Curtis@pnl.gov; " +
"julie.dunkle@pnl.gov; david.gillen@pnl.gov; michelle@pnl.gov; Jereme.Haack@pnl.gov; " +
"Michelle.Hart@pnl.gov; ranata.johnson@pnl.gov; grant.nakamura@pnl.gov; " +
"debbie.payne@pnl.gov; stuart.rose@pnl.gov; randall.scarberry@pnl.gov; Leigh.Williams@pnl.gov",
outlook30.getRecipientEmailAddress()
);
}
/**

View File

@ -0,0 +1,97 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.datatypes;
import java.util.Arrays;
import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import junit.framework.TestCase;
/**
* Checks that the sorters on the chunk groups order
* chunks properly.
*/
public final class TestSorters extends TestCase {
public void testAttachmentChunksSorter() {
AttachmentChunks[] chunks;
// Simple
chunks = new AttachmentChunks[] {
new AttachmentChunks("__attach_version1.0_#00000001"),
new AttachmentChunks("__attach_version1.0_#00000000"),
};
Arrays.sort(chunks, new AttachmentChunksSorter());
assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName());
assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName());
// Lots, with gaps
chunks = new AttachmentChunks[] {
new AttachmentChunks("__attach_version1.0_#00000101"),
new AttachmentChunks("__attach_version1.0_#00000001"),
new AttachmentChunks("__attach_version1.0_#00000002"),
new AttachmentChunks("__attach_version1.0_#00000005"),
new AttachmentChunks("__attach_version1.0_#00000026"),
new AttachmentChunks("__attach_version1.0_#00000000"),
new AttachmentChunks("__attach_version1.0_#000000AB"),
};
Arrays.sort(chunks, new AttachmentChunksSorter());
assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName());
assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName());
assertEquals("__attach_version1.0_#00000002", chunks[2].getPOIFSName());
assertEquals("__attach_version1.0_#00000005", chunks[3].getPOIFSName());
assertEquals("__attach_version1.0_#00000026", chunks[4].getPOIFSName());
assertEquals("__attach_version1.0_#000000AB", chunks[5].getPOIFSName());
assertEquals("__attach_version1.0_#00000101", chunks[6].getPOIFSName());
}
public void testRecipientChunksSorter() {
RecipientChunks[] chunks;
// Simple
chunks = new RecipientChunks[] {
new RecipientChunks("__recip_version1.0_#00000001"),
new RecipientChunks("__recip_version1.0_#00000000"),
};
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals(0, chunks[0].recipientNumber);
assertEquals(1, chunks[1].recipientNumber);
// Lots, with gaps
chunks = new RecipientChunks[] {
new RecipientChunks("__recip_version1.0_#00020001"),
new RecipientChunks("__recip_version1.0_#000000FF"),
new RecipientChunks("__recip_version1.0_#00000205"),
new RecipientChunks("__recip_version1.0_#00000001"),
new RecipientChunks("__recip_version1.0_#00000005"),
new RecipientChunks("__recip_version1.0_#00000009"),
new RecipientChunks("__recip_version1.0_#00000404"),
new RecipientChunks("__recip_version1.0_#00000000"),
};
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals(0, chunks[0].recipientNumber);
assertEquals(1, chunks[1].recipientNumber);
assertEquals(5, chunks[2].recipientNumber);
assertEquals(9, chunks[3].recipientNumber);
assertEquals(0xFF, chunks[4].recipientNumber);
assertEquals(0x205, chunks[5].recipientNumber);
assertEquals(0x404, chunks[6].recipientNumber);
assertEquals(0x20001, chunks[7].recipientNumber);
}
}

View File

@ -53,7 +53,7 @@ public final class TestOutlookTextExtractor extends TestCase {
String text = ext.getText();
assertContains(text, "From: Kevin Roast\n");
assertContains(text, "To: Kevin Roast\n");
assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n");
assertEquals(-1, text.indexOf("CC:"));
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: Test the content transformer\n");
@ -92,4 +92,77 @@ public final class TestOutlookTextExtractor extends TestCase {
assertEquals(inp, poifs);
assertEquals(inp, mapi);
}
/**
* Test that we correctly handle multiple To+CC+BCC
* recipients in an email we sent.
*/
public void testSentWithMulipleRecipients() throws Exception {
// To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>,
// 'Paul Holmes-Higgin' <paul.hh@alfresco.com>,
// 'Mike Farman' <mikef@alfresco.com>
// Cc: nickb@alfresco.com, nick.burch@alfresco.com,
// 'Roy Wetherall' <roy.wetherall@alfresco.com>
// Bcc: 'David Caruana' <dave.caruana@alfresco.com>,
// 'Vonka Jan' <roy.wetherall@alfresco.com>
String[] files = new String[] {
"example_sent_regular.msg", "example_sent_unicode.msg"
};
for(String file : files) {
MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
new FileInputStream(samples.getFile(file))
));
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
assertContains(text, "From: Mike Farman\n");
assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
"'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
assertContains(text, "CC: 'nickb@alfresco.com' <nickb@alfresco.com>; " +
"'nick.burch@alfresco.com' <nick.burch@alfresco.com>; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " +
"'Vonka Jan' <jan.vonka@alfresco.com>\n");
assertContains(text, "Subject: This is a test message please ignore\n");
assertEquals(-1, text.indexOf("Date:"));
assertContains(text, "The quick brown fox jumps over the lazy dog");
}
}
/**
* Test that we correctly handle multiple To+CC
* recipients in an email we received.
*/
public void testReceivedWithMultipleRecipients() throws Exception {
// To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>,
// 'Paul Holmes-Higgin' <paul.hh@alfresco.com>,
// 'Mike Farman' <mikef@alfresco.com>
// Cc: nickb@alfresco.com, nick.burch@alfresco.com,
// 'Roy Wetherall' <roy.wetherall@alfresco.com>
// (No BCC shown)
String[] files = new String[] {
"example_received_regular.msg", "example_received_unicode.msg"
};
for(String file : files) {
MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
new FileInputStream(samples.getFile(file))
));
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
assertContains(text, "From: Mike Farman\n");
assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
"'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
assertContains(text, "CC: nickb@alfresco.com; " +
"nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: This is a test message please ignore\n");
assertEquals(-1, text.indexOf("Date:"));
assertContains(text, "The quick brown fox jumps over the lazy dog");
}
}
}

View File

@ -21,6 +21,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import org.apache.poi.hsmf.MAPIMessage;
@ -29,6 +30,7 @@ import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
@ -81,7 +83,7 @@ public final class TestPOIFSChunkParser extends TestCase {
}
}
public void testFindsRecips() throws IOException {
public void testFindsRecips() throws IOException, ChunkNotFoundException {
POIFSFileSystem simple = new POIFSFileSystem(
new FileInputStream(samples.getFile("quick.msg"))
);
@ -95,7 +97,9 @@ public final class TestPOIFSChunkParser extends TestCase {
assertTrue(groups[2] instanceof NameIdChunks);
RecipientChunks recips = (RecipientChunks)groups[1];
assertEquals("kevin.roast@alfresco.org", recips.recipientEmailChunk.getValue());
assertEquals("kevin.roast@alfresco.org", recips.recipientSMTPChunk.getValue());
assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben",
recips.recipientEmailChunk.getValue());
String search = new String(recips.recipientSearchChunk.getValue(), "ASCII");
assertEquals("CN=KEVIN.ROAST@BEN\0", search.substring(search.length()-19));
@ -103,20 +107,123 @@ public final class TestPOIFSChunkParser extends TestCase {
// Now via MAPIMessage
MAPIMessage msg = new MAPIMessage(simple);
assertNotNull(msg.getRecipientDetailsChunks());
assertEquals(1, msg.getRecipientDetailsChunks().length);
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].getRecipientEmailAddress());
assertEquals("Kevin Roast", msg.getRecipientDetailsChunks()[0].getRecipientName());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientEmailAddress());
// Try both SMTP and EX files for recipient
assertEquals("EX", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue());
assertEquals("EX", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue());
assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben",
msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue());
// Now look at another message
msg = new MAPIMessage(new POIFSFileSystem(
new FileInputStream(samples.getFile("simple_test_msg.msg"))
));
assertEquals("SMTP", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue());
assertEquals(null, msg.getRecipientDetailsChunks().recipientEmailChunk);
assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks().recipientNameChunk.getValue());
assertNotNull(msg.getRecipientDetailsChunks());
assertEquals(1, msg.getRecipientDetailsChunks().length);
assertEquals("SMTP", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue());
assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientSMTPChunk);
assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientNameChunk);
assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue());
assertEquals("travis@overwrittenstack.com", msg.getRecipientEmailAddress());
}
public void testFindsMultipleRecipients() throws IOException, ChunkNotFoundException {
POIFSFileSystem multiple = new POIFSFileSystem(
new FileInputStream(samples.getFile("example_received_unicode.msg"))
);
multiple.getRoot().getEntry("__recip_version1.0_#00000000");
multiple.getRoot().getEntry("__recip_version1.0_#00000001");
multiple.getRoot().getEntry("__recip_version1.0_#00000002");
multiple.getRoot().getEntry("__recip_version1.0_#00000003");
multiple.getRoot().getEntry("__recip_version1.0_#00000004");
multiple.getRoot().getEntry("__recip_version1.0_#00000005");
ChunkGroup[] groups = POIFSChunkParser.parse(multiple.getRoot());
assertEquals(9, groups.length);
assertTrue(groups[0] instanceof Chunks);
assertTrue(groups[1] instanceof RecipientChunks);
assertTrue(groups[2] instanceof AttachmentChunks);
assertTrue(groups[3] instanceof RecipientChunks);
assertTrue(groups[4] instanceof RecipientChunks);
assertTrue(groups[5] instanceof RecipientChunks);
assertTrue(groups[6] instanceof RecipientChunks);
assertTrue(groups[7] instanceof RecipientChunks);
assertTrue(groups[8] instanceof NameIdChunks);
// In FS order initially
RecipientChunks[] chunks = new RecipientChunks[] {
(RecipientChunks)groups[1],
(RecipientChunks)groups[3],
(RecipientChunks)groups[4],
(RecipientChunks)groups[5],
(RecipientChunks)groups[6],
(RecipientChunks)groups[7],
};
assertEquals(6, chunks.length);
assertEquals(0, chunks[0].recipientNumber);
assertEquals(4, chunks[1].recipientNumber);
assertEquals(3, chunks[2].recipientNumber);
assertEquals(2, chunks[3].recipientNumber);
assertEquals(1, chunks[4].recipientNumber);
assertEquals(5, chunks[5].recipientNumber);
// Check
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[2].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[3].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[4].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
// Now sort, and re-check
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[1].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[2].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
// Finally check on message
MAPIMessage msg = new MAPIMessage(multiple);
assertEquals(6, msg.getRecipientEmailAddressList().length);
assertEquals(6, msg.getRecipientNamesList().length);
assertEquals("'Ashutosh Dandavate'", msg.getRecipientNamesList()[0]);
assertEquals("'Paul Holmes-Higgin'", msg.getRecipientNamesList()[1]);
assertEquals("'Mike Farman'", msg.getRecipientNamesList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientNamesList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientNamesList()[4]);
assertEquals("'Roy Wetherall'", msg.getRecipientNamesList()[5]);
assertEquals("ashutosh.dandavate@alfresco.com", msg.getRecipientEmailAddressList()[0]);
assertEquals("paul.hh@alfresco.com", msg.getRecipientEmailAddressList()[1]);
assertEquals("mikef@alfresco.com", msg.getRecipientEmailAddressList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientEmailAddressList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientEmailAddressList()[4]);
assertEquals("roy.wetherall@alfresco.com", msg.getRecipientEmailAddressList()[5]);
}
public void testFindsNameId() throws IOException {

View File

@ -20,6 +20,8 @@ package org.apache.poi.util;
import java.io.UnsupportedEncodingException;
import java.text.NumberFormat;
import org.apache.poi.util.StringUtil.StringsIterator;
import junit.framework.TestCase;
/**
@ -158,5 +160,43 @@ public final class TestStringUtil extends TestCase {
return nf.format( num );
}
public void testStringsIterator() {
StringsIterator i;
i = new StringsIterator(new String[0]);
assertFalse(i.hasNext());
try {
i.next();
fail();
} catch(ArrayIndexOutOfBoundsException e) {}
i = new StringsIterator(new String[] {"1"});
assertTrue(i.hasNext());
assertEquals("1", i.next());
assertFalse(i.hasNext());
try {
i.next();
fail();
} catch(ArrayIndexOutOfBoundsException e) {}
i = new StringsIterator(new String[] {"1","2","3"});
assertTrue(i.hasNext());
assertEquals("1", i.next());
assertTrue(i.hasNext());
assertEquals("2", i.next());
assertTrue(i.hasNext());
assertEquals("3", i.next());
assertFalse(i.hasNext());
try {
i.next();
fail();
} catch(ArrayIndexOutOfBoundsException e) {}
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.