diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 5ee7b31f2f..986be3b352 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Improved how HSMF handles multiple recipients Add PublisherTextExtractor support to ExtractorFactory Add XSLF support for text extraction from tables Support attachments as embeded documents within the new OutlookTextExtractor diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index b08a979fa4..62e42c2a2f 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -20,6 +20,7 @@ package org.apache.poi.util; import java.io.UnsupportedEncodingException; import java.text.FieldPosition; import java.text.NumberFormat; +import java.util.Iterator; import org.apache.poi.hssf.record.RecordInputStream; /** @@ -392,4 +393,30 @@ public class StringUtil { return true; } } + + /** + * An Iterator over an array of Strings. + */ + public static class StringsIterator implements Iterator { + private String[] strings; + private int position = 0; + public StringsIterator(String[] strings) { + if(strings != null) { + this.strings = strings; + } else { + this.strings = new String[0]; + } + } + + public boolean hasNext() { + return position < strings.length; + } + public String next() { + int ourPos = position++; + if(ourPos >= strings.length) + throw new ArrayIndexOutOfBoundsException(ourPos); + return strings[ourPos]; + } + public void remove() {} + } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java index 911119b19e..05c14482a8 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java @@ -23,14 +23,17 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Calendar; import org.apache.poi.POIDocument; import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter; import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.Chunks; import org.apache.poi.hsmf.datatypes.NameIdChunks; import org.apache.poi.hsmf.datatypes.RecipientChunks; +import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter; import org.apache.poi.hsmf.datatypes.StringChunk; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.hsmf.parsers.POIFSChunkParser; @@ -46,47 +49,47 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * [MS-OXCMSG]: Message and Attachment Object Protocol Specification */ public class MAPIMessage extends POIDocument { - private Chunks mainChunks; - private NameIdChunks nameIdChunks; - private RecipientChunks recipientChunks; - private AttachmentChunks[] attachmentChunks; - - private boolean returnNullOnMissingChunk = false; + private Chunks mainChunks; + private NameIdChunks nameIdChunks; + private RecipientChunks[] recipientChunks; + private AttachmentChunks[] attachmentChunks; - /** - * Constructor for creating new files. - * - */ - public MAPIMessage() { - // TODO - make writing possible - super(new POIFSFileSystem()); - } + private boolean returnNullOnMissingChunk = false; + + /** + * Constructor for creating new files. + * + */ + public MAPIMessage() { + // TODO - make writing possible + super(new POIFSFileSystem()); + } - /** - * Constructor for reading MSG Files from the file system. - * @param filename - * @throws IOException - */ - public MAPIMessage(String filename) throws IOException { - this(new FileInputStream(new File(filename))); - } + /** + * Constructor for reading MSG Files from the file system. + * @param filename + * @throws IOException + */ + public MAPIMessage(String filename) throws IOException { + this(new FileInputStream(new File(filename))); + } - /** - * Constructor for reading MSG Files from an input stream. - * @param in - * @throws IOException - */ - public MAPIMessage(InputStream in) throws IOException { - this(new POIFSFileSystem(in)); - } + /** + * Constructor for reading MSG Files from an input stream. + * @param in + * @throws IOException + */ + public MAPIMessage(InputStream in) throws IOException { + this(new POIFSFileSystem(in)); + } /** * Constructor for reading MSG Files from a POIFS filesystem * @param in * @throws IOException */ public MAPIMessage(POIFSFileSystem fs) throws IOException { - this(fs.getRoot(), fs); + this(fs.getRoot(), fs); } /** * Constructor for reading MSG Files from a certain @@ -96,178 +99,254 @@ public class MAPIMessage extends POIDocument { */ public MAPIMessage(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException { super(poifsDir, fs); - - // Grab all the chunks - ChunkGroup[] chunkGroups = POIFSChunkParser.parse(poifsDir); - - // Grab interesting bits - ArrayList attachments = new ArrayList(); - for(ChunkGroup group : chunkGroups) { - // Should only ever be one of these - if(group instanceof Chunks) { - mainChunks = (Chunks)group; - } else if(group instanceof NameIdChunks) { - nameIdChunks = (NameIdChunks)group; - } else if(group instanceof RecipientChunks) { - recipientChunks = (RecipientChunks)group; - } - - // Add to list(s) - if(group instanceof AttachmentChunks) { - attachments.add((AttachmentChunks)group); - } - } - attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]); - } + + // Grab all the chunks + ChunkGroup[] chunkGroups = POIFSChunkParser.parse(poifsDir); + + // Grab interesting bits + ArrayList attachments = new ArrayList(); + ArrayList recipients = new ArrayList(); + for(ChunkGroup group : chunkGroups) { + // Should only ever be one of these + if(group instanceof Chunks) { + mainChunks = (Chunks)group; + } else if(group instanceof NameIdChunks) { + nameIdChunks = (NameIdChunks)group; + } else if(group instanceof RecipientChunks) { + recipients.add( (RecipientChunks)group ); + } + + // Add to list(s) + if(group instanceof AttachmentChunks) { + attachments.add( (AttachmentChunks)group ); + } + } + attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]); + recipientChunks = recipients.toArray(new RecipientChunks[recipients.size()]); + + // Now sort these chunks lists so they're in ascending order, + // rather than in random filesystem order + Arrays.sort(attachmentChunks, new AttachmentChunksSorter()); + Arrays.sort(recipientChunks, new RecipientChunksSorter()); + } - /** - * Gets a string value based on the passed chunk. - * @throws ChunkNotFoundException if the chunk isn't there - */ - public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException { - if(chunk == null) { - if(returnNullOnMissingChunk) { - return null; - } else { - throw new ChunkNotFoundException(); - } - } - return chunk.getValue(); - } + /** + * Gets a string value based on the passed chunk. + * @throws ChunkNotFoundException if the chunk isn't there + */ + public String getStringFromChunk(StringChunk chunk) throws ChunkNotFoundException { + if(chunk == null) { + if(returnNullOnMissingChunk) { + return null; + } else { + throw new ChunkNotFoundException(); + } + } + return chunk.getValue(); + } - /** - * Gets the plain text body of this Outlook Message - * @return The string representation of the 'text' version of the body, if available. - * @throws ChunkNotFoundException - */ - public String getTextBody() throws ChunkNotFoundException { - return getStringFromChunk(mainChunks.textBodyChunk); - } + /** + * Gets the plain text body of this Outlook Message + * @return The string representation of the 'text' version of the body, if available. + * @throws ChunkNotFoundException + */ + public String getTextBody() throws ChunkNotFoundException { + return getStringFromChunk(mainChunks.textBodyChunk); + } - /** - * Gets the subject line of the Outlook Message - * @throws ChunkNotFoundException - */ - public String getSubject() throws ChunkNotFoundException { - return getStringFromChunk(mainChunks.subjectChunk); - } + /** + * Gets the subject line of the Outlook Message + * @throws ChunkNotFoundException + */ + public String getSubject() throws ChunkNotFoundException { + return getStringFromChunk(mainChunks.subjectChunk); + } - /** - * Gets the display value of the "TO" line of the outlook message - * This is not the actual list of addresses/values that will be sent to if you click Reply in the email. - * @throws ChunkNotFoundException - */ - public String getDisplayTo() throws ChunkNotFoundException { - return getStringFromChunk(mainChunks.displayToChunk); - } + /** + * Gets the display value of the "FROM" line of the outlook message + * This is not the actual address that was sent from but the formated display of the user name. + * @throws ChunkNotFoundException + */ + public String getDisplayFrom() throws ChunkNotFoundException { + return getStringFromChunk(mainChunks.displayFromChunk); + } - /** - * Gets the display value of the "FROM" line of the outlook message - * This is not the actual address that was sent from but the formated display of the user name. - * @throws ChunkNotFoundException - */ - public String getDisplayFrom() throws ChunkNotFoundException { - return getStringFromChunk(mainChunks.displayFromChunk); - } + /** + * Gets the display value of the "TO" line of the outlook message. + * If there are multiple recipients, they will be separated + * by semicolons. + * This is not the actual list of addresses/values that will be + * sent to if you click Reply in the email - those are stored + * in {@link RecipientChunks}. + * @throws ChunkNotFoundException + */ + public String getDisplayTo() throws ChunkNotFoundException { + return getStringFromChunk(mainChunks.displayToChunk); + } - /** - * Gets the display value of the "TO" line of the outlook message - * This is not the actual list of addresses/values that will be sent to if you click Reply in the email. - * @throws ChunkNotFoundException - */ - public String getDisplayCC() throws ChunkNotFoundException { - return getStringFromChunk(mainChunks.displayCCChunk); - } + /** + * Gets the display value of the "CC" line of the outlook message. + * If there are multiple recipients, they will be separated + * by semicolons. + * This is not the actual list of addresses/values that will be + * sent to if you click Reply in the email - those are stored + * in {@link RecipientChunks}. + * @throws ChunkNotFoundException + */ + public String getDisplayCC() throws ChunkNotFoundException { + return getStringFromChunk(mainChunks.displayCCChunk); + } - /** - * Gets the display value of the "TO" line of the outlook message - * This is not the actual list of addresses/values that will be sent to if you click Reply in the email. - * @throws ChunkNotFoundException - */ - public String getDisplayBCC() throws ChunkNotFoundException { - return getStringFromChunk(mainChunks.displayBCCChunk); - } - - - /** - * Returns the recipient's email address, checking all the - * likely chunks in search of it. - */ - public String getRecipientEmailAddress() throws ChunkNotFoundException { - if(recipientChunks == null) { - throw new ChunkNotFoundException("No recipients section present"); - } - String email = recipientChunks.getRecipientEmailAddress(); - if(email != null) { - return email; - } else { - throw new ChunkNotFoundException(); - } - } + /** + * Gets the display value of the "BCC" line of the outlook message. + * If there are multiple recipients, they will be separated + * by semicolons. + * This is not the actual list of addresses/values that will be + * sent to if you click Reply in the email - those are stored + * in {@link RecipientChunks}. + * This will only be present in sent emails, not received ones! + * @throws ChunkNotFoundException + */ + public String getDisplayBCC() throws ChunkNotFoundException { + return getStringFromChunk(mainChunks.displayBCCChunk); + } + + /** + * Returns all the recipients' email address, separated by + * semicolons. Checks all the likely chunks in search of + * the addresses. + */ + public String getRecipientEmailAddress() throws ChunkNotFoundException { + return toSemicolonList(getRecipientEmailAddressList()); + } + /** + * Returns an array of all the recipient's email address, normally + * in TO then CC then BCC order. + * Checks all the likely chunks in search of the addresses. + */ + public String[] getRecipientEmailAddressList() throws ChunkNotFoundException { + if(recipientChunks == null || recipientChunks.length == 0) { + throw new ChunkNotFoundException("No recipients section present"); + } + + String[] emails = new String[recipientChunks.length]; + for(int i=0; i { + @Override + public int compare(AttachmentChunks a, AttachmentChunks b) { + return a.poifsName.compareTo(b.poifsName); + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java index 15c35c069d..b20aba8a39 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/RecipientChunks.java @@ -18,20 +18,29 @@ package org.apache.poi.hsmf.datatypes; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; /** * Collection of convenience chunks for the - * Recip(ient) part of an outlook file + * Recip(ient) part of an outlook file. + * + * If a message has multiple recipients, there will be + * several of these. */ public final class RecipientChunks implements ChunkGroup { public static final String PREFIX = "__recip_version1.0_#"; public static final int RECIPIENT_NAME = 0x3001; public static final int DELIVERY_TYPE = 0x3002; - public static final int RECIPIENT_SEARCH = 0x300B; - public static final int RECIPIENT_EMAIL = 0x39FE; + public static final int RECIPIENT_EMAIL_ADDRESS = 0x3003; + public static final int RECIPIENT_SEARCH = 0x300B; + public static final int RECIPIENT_SMTP_ADDRESS = 0x39FE; + public static final int RECIPIENT_DISPLAY_NAME = 0x5FF6; + + /** Our 0 based position in the list of recipients */ + public int recipientNumber; /** TODO */ public ByteChunk recipientSearchChunk; @@ -42,27 +51,84 @@ public final class RecipientChunks implements ChunkGroup { */ public StringChunk recipientNameChunk; /** - * The email address of the recipient, but + * The email address of the recipient, which + * could be in SMTP or SEARCH format, but * isn't always present... */ public StringChunk recipientEmailChunk; + /** + * The smtp destination email address of + * the recipient, but isn't always present... + */ + public StringChunk recipientSMTPChunk; /** * Normally EX or SMTP. Will generally affect * where the email address ends up. */ public StringChunk deliveryTypeChunk; + /** + * The display name of the recipient. + * Normally seems to hold the same value + * as in recipientNameChunk + */ + public StringChunk recipientDisplayNameChunk; + public RecipientChunks(String name) { + recipientNumber = -1; + int splitAt = name.lastIndexOf('#'); + if(splitAt > -1) { + String number = name.substring(splitAt+1); + try { + recipientNumber = Integer.parseInt(number, 16); + } catch(NumberFormatException e) { + System.err.println("Invalid recipient number in name " + name); + } + } + } + + /** + * Tries to find their name, + * in whichever chunk holds it. + */ + public String getRecipientName() { + if(recipientNameChunk != null) { + return recipientNameChunk.getValue(); + } + if(recipientDisplayNameChunk != null) { + return recipientDisplayNameChunk.getValue(); + } + + // Can't find it + return null; + } + /** * Tries to find their email address, in * whichever chunk holds it given the * delivery type. */ public String getRecipientEmailAddress() { - if(recipientEmailChunk != null) { - return recipientEmailChunk.getValue(); + // If we have this, it really has the email + if(recipientSMTPChunk != null) { + return recipientSMTPChunk.getValue(); } - // Probably in the name field + + // This might be a real email, or might be + // in CN=... format + if(recipientEmailChunk != null) { + String email = recipientEmailChunk.getValue(); + int cne = email.indexOf("/CN="); + if(cne == -1) { + // Normal smtp address + return email; + } else { + // /O=..../CN=em@ail + return email.substring(cne+4); + } + } + + // Might be in the name field, check there if(recipientNameChunk != null) { String name = recipientNameChunk.getValue(); if(name.indexOf('@') > -1) { @@ -73,13 +139,16 @@ public final class RecipientChunks implements ChunkGroup { return name; } } - // Check the search chunk + + // Check the search chunk, see if it's + // encoded as a SMTP destination in there. if(recipientSearchChunk != null) { String search = recipientSearchChunk.getAs7bitString(); if(search.indexOf("SMTP:") != -1) { return search.substring(search.indexOf("SMTP:") + 5); } } + // Can't find it return null; } @@ -104,11 +173,17 @@ public final class RecipientChunks implements ChunkGroup { recipientSearchChunk = (ByteChunk)chunk; break; case RECIPIENT_NAME: + recipientDisplayNameChunk = (StringChunk)chunk; + break; + case RECIPIENT_DISPLAY_NAME: recipientNameChunk = (StringChunk)chunk; break; - case RECIPIENT_EMAIL: + case RECIPIENT_EMAIL_ADDRESS: recipientEmailChunk = (StringChunk)chunk; break; + case RECIPIENT_SMTP_ADDRESS: + recipientSMTPChunk = (StringChunk)chunk; + break; case DELIVERY_TYPE: deliveryTypeChunk = (StringChunk)chunk; break; @@ -117,4 +192,18 @@ public final class RecipientChunks implements ChunkGroup { // And add to the main list allChunks.add(chunk); } + + /** + * Orders by the recipient number. + */ + public static class RecipientChunksSorter implements Comparator { + @Override + public int compare(RecipientChunks a, RecipientChunks b) { + if(a.recipientNumber < b.recipientNumber) + return -1; + if(a.recipientNumber > b.recipientNumber) + return +1; + return 0; + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java index a6ada5bb95..8bbea40893 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java @@ -25,6 +25,7 @@ import org.apache.poi.hsmf.MAPIMessage; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.StringUtil.StringsIterator; /** * A text extractor for HSMF (Outlook) .msg files. @@ -50,7 +51,7 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor { public MAPIMessage getMAPIMessage() { return (MAPIMessage)document; } - + /** * Outputs something a little like a RFC822 email */ @@ -58,20 +59,33 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor { MAPIMessage msg = (MAPIMessage)document; StringBuffer s = new StringBuffer(); + StringsIterator emails; + try { + emails = new StringsIterator( + msg.getRecipientEmailAddressList() + ); + } catch(ChunkNotFoundException e) { + emails = new StringsIterator(new String[0]); + } + try { s.append("From: " + msg.getDisplayFrom() + "\n"); } catch(ChunkNotFoundException e) {} + + // For To, CC and BCC, try to match the names + // up with their email addresses. Relies on the + // Recipient Chunks being in the same order as + // people in To + CC + BCC. try { - s.append("To: " + msg.getDisplayTo() + "\n"); + handleEmails(s, "To", msg.getDisplayTo(), emails); } catch(ChunkNotFoundException e) {} try { - if(msg.getDisplayCC().length() > 0) - s.append("CC: " + msg.getDisplayCC() + "\n"); + handleEmails(s, "CC", msg.getDisplayCC(), emails); } catch(ChunkNotFoundException e) {} try { - if(msg.getDisplayBCC().length() > 0) - s.append("BCC: " + msg.getDisplayBCC() + "\n"); + handleEmails(s, "BCC", msg.getDisplayBCC(), emails); } catch(ChunkNotFoundException e) {} + try { SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss"); s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n"); @@ -85,4 +99,38 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor { return s.toString(); } + + /** + * Takes a Display focused string, eg "Nick; Jim" and an iterator + * of emails, and does its best to return something like + * "Nick ; Jim " + */ + protected void handleEmails(StringBuffer s, String type, String displayText, StringsIterator emails) { + if(displayText == null || displayText.length() == 0) { + return; + } + + String[] names = displayText.split(";\\s*"); + boolean first = true; + + s.append(type + ": "); + for(String name : names) { + if(first) { + first = false; + } else { + s.append("; "); + } + + s.append(name); + if(emails.hasNext()) { + String email = emails.next(); + // Append the email address in <>, assuming + // the name wasn't already the email address + if(! email.equals(name)) { + s.append( " <" + email + ">"); + } + } + } + s.append("\n"); + } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java index 3a38d1b3d0..2f2899f345 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java @@ -67,7 +67,7 @@ public final class POIFSChunkParser { group = new NameIdChunks(); } if(dir.getName().startsWith(RecipientChunks.PREFIX)) { - group = new RecipientChunks(); + group = new RecipientChunks(dir.getName()); } if(group != null) { diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/AllHSMFTests.java b/src/scratchpad/testcases/org/apache/poi/hsmf/AllHSMFTests.java index 80660aa05a..710d991d96 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/AllHSMFTests.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/AllHSMFTests.java @@ -21,6 +21,7 @@ import junit.framework.Test; import junit.framework.TestSuite; import org.apache.poi.hsmf.datatypes.*; +import org.apache.poi.hsmf.extractor.TestOutlookTextExtractor; import org.apache.poi.hsmf.parsers.*; public final class AllHSMFTests { @@ -34,7 +35,10 @@ public final class AllHSMFTests { suite.addTestSuite(TestChunkData.class); suite.addTestSuite(TestTypes.class); + suite.addTestSuite(TestSorters.class); + suite.addTestSuite(TestOutlookTextExtractor.class); + suite.addTestSuite(TestPOIFSChunkParser.class); return suite; diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java b/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java index 008a4edba4..25c793339f 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/TestBasics.java @@ -52,8 +52,17 @@ public final class TestBasics extends TestCase { public void testRecipientEmail() throws Exception { assertEquals("travis@overwrittenstack.com", simple.getRecipientEmailAddress()); assertEquals("kevin.roast@alfresco.org", quick.getRecipientEmailAddress()); - assertEquals("randall.scarberry@pnl.gov", outlook30.getRecipientEmailAddress()); assertEquals("nicolas1.23456@free.fr", attachments.getRecipientEmailAddress()); + + // This one has lots... + assertEquals(18, outlook30.getRecipientEmailAddressList().length); + assertEquals("shawn.bohn@pnl.gov; gus.calapristi@pnl.gov; Richard.Carter@pnl.gov; " + + "barb.cheney@pnl.gov; nick.cramer@pnl.gov; vern.crow@pnl.gov; Laura.Curtis@pnl.gov; " + + "julie.dunkle@pnl.gov; david.gillen@pnl.gov; michelle@pnl.gov; Jereme.Haack@pnl.gov; " + + "Michelle.Hart@pnl.gov; ranata.johnson@pnl.gov; grant.nakamura@pnl.gov; " + + "debbie.payne@pnl.gov; stuart.rose@pnl.gov; randall.scarberry@pnl.gov; Leigh.Williams@pnl.gov", + outlook30.getRecipientEmailAddress() + ); } /** diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/datatypes/TestSorters.java b/src/scratchpad/testcases/org/apache/poi/hsmf/datatypes/TestSorters.java new file mode 100644 index 0000000000..815fb2d661 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/datatypes/TestSorters.java @@ -0,0 +1,97 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hsmf.datatypes; + +import java.util.Arrays; + +import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter; +import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter; + +import junit.framework.TestCase; + +/** + * Checks that the sorters on the chunk groups order + * chunks properly. + */ +public final class TestSorters extends TestCase { + public void testAttachmentChunksSorter() { + AttachmentChunks[] chunks; + + // Simple + chunks = new AttachmentChunks[] { + new AttachmentChunks("__attach_version1.0_#00000001"), + new AttachmentChunks("__attach_version1.0_#00000000"), + }; + Arrays.sort(chunks, new AttachmentChunksSorter()); + assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName()); + assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName()); + + // Lots, with gaps + chunks = new AttachmentChunks[] { + new AttachmentChunks("__attach_version1.0_#00000101"), + new AttachmentChunks("__attach_version1.0_#00000001"), + new AttachmentChunks("__attach_version1.0_#00000002"), + new AttachmentChunks("__attach_version1.0_#00000005"), + new AttachmentChunks("__attach_version1.0_#00000026"), + new AttachmentChunks("__attach_version1.0_#00000000"), + new AttachmentChunks("__attach_version1.0_#000000AB"), + }; + Arrays.sort(chunks, new AttachmentChunksSorter()); + assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName()); + assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName()); + assertEquals("__attach_version1.0_#00000002", chunks[2].getPOIFSName()); + assertEquals("__attach_version1.0_#00000005", chunks[3].getPOIFSName()); + assertEquals("__attach_version1.0_#00000026", chunks[4].getPOIFSName()); + assertEquals("__attach_version1.0_#000000AB", chunks[5].getPOIFSName()); + assertEquals("__attach_version1.0_#00000101", chunks[6].getPOIFSName()); + } + + public void testRecipientChunksSorter() { + RecipientChunks[] chunks; + + // Simple + chunks = new RecipientChunks[] { + new RecipientChunks("__recip_version1.0_#00000001"), + new RecipientChunks("__recip_version1.0_#00000000"), + }; + Arrays.sort(chunks, new RecipientChunksSorter()); + assertEquals(0, chunks[0].recipientNumber); + assertEquals(1, chunks[1].recipientNumber); + + // Lots, with gaps + chunks = new RecipientChunks[] { + new RecipientChunks("__recip_version1.0_#00020001"), + new RecipientChunks("__recip_version1.0_#000000FF"), + new RecipientChunks("__recip_version1.0_#00000205"), + new RecipientChunks("__recip_version1.0_#00000001"), + new RecipientChunks("__recip_version1.0_#00000005"), + new RecipientChunks("__recip_version1.0_#00000009"), + new RecipientChunks("__recip_version1.0_#00000404"), + new RecipientChunks("__recip_version1.0_#00000000"), + }; + Arrays.sort(chunks, new RecipientChunksSorter()); + assertEquals(0, chunks[0].recipientNumber); + assertEquals(1, chunks[1].recipientNumber); + assertEquals(5, chunks[2].recipientNumber); + assertEquals(9, chunks[3].recipientNumber); + assertEquals(0xFF, chunks[4].recipientNumber); + assertEquals(0x205, chunks[5].recipientNumber); + assertEquals(0x404, chunks[6].recipientNumber); + assertEquals(0x20001, chunks[7].recipientNumber); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java index b15bbc7249..e8c9dfdc63 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java @@ -53,7 +53,7 @@ public final class TestOutlookTextExtractor extends TestCase { String text = ext.getText(); assertContains(text, "From: Kevin Roast\n"); - assertContains(text, "To: Kevin Roast\n"); + assertContains(text, "To: Kevin Roast \n"); assertEquals(-1, text.indexOf("CC:")); assertEquals(-1, text.indexOf("BCC:")); assertContains(text, "Subject: Test the content transformer\n"); @@ -92,4 +92,77 @@ public final class TestOutlookTextExtractor extends TestCase { assertEquals(inp, poifs); assertEquals(inp, mapi); } + + /** + * Test that we correctly handle multiple To+CC+BCC + * recipients in an email we sent. + */ + public void testSentWithMulipleRecipients() throws Exception { + // To: 'Ashutosh Dandavate' , + // 'Paul Holmes-Higgin' , + // 'Mike Farman' + // Cc: nickb@alfresco.com, nick.burch@alfresco.com, + // 'Roy Wetherall' + // Bcc: 'David Caruana' , + // 'Vonka Jan' + + String[] files = new String[] { + "example_sent_regular.msg", "example_sent_unicode.msg" + }; + for(String file : files) { + MAPIMessage msg = new MAPIMessage(new POIFSFileSystem( + new FileInputStream(samples.getFile(file)) + )); + + OutlookTextExtactor ext = new OutlookTextExtactor(msg); + String text = ext.getText(); + + assertContains(text, "From: Mike Farman\n"); + assertContains(text, "To: 'Ashutosh Dandavate' ; " + + "'Paul Holmes-Higgin' ; 'Mike Farman' \n"); + assertContains(text, "CC: 'nickb@alfresco.com' ; " + + "'nick.burch@alfresco.com' ; 'Roy Wetherall' \n"); + assertContains(text, "BCC: 'David Caruana' ; " + + "'Vonka Jan' \n"); + assertContains(text, "Subject: This is a test message please ignore\n"); + assertEquals(-1, text.indexOf("Date:")); + assertContains(text, "The quick brown fox jumps over the lazy dog"); + } + } + + /** + * Test that we correctly handle multiple To+CC + * recipients in an email we received. + */ + public void testReceivedWithMultipleRecipients() throws Exception { + // To: 'Ashutosh Dandavate' , + // 'Paul Holmes-Higgin' , + // 'Mike Farman' + // Cc: nickb@alfresco.com, nick.burch@alfresco.com, + // 'Roy Wetherall' + // (No BCC shown) + + + String[] files = new String[] { + "example_received_regular.msg", "example_received_unicode.msg" + }; + for(String file : files) { + MAPIMessage msg = new MAPIMessage(new POIFSFileSystem( + new FileInputStream(samples.getFile(file)) + )); + + OutlookTextExtactor ext = new OutlookTextExtactor(msg); + String text = ext.getText(); + + assertContains(text, "From: Mike Farman\n"); + assertContains(text, "To: 'Ashutosh Dandavate' ; " + + "'Paul Holmes-Higgin' ; 'Mike Farman' \n"); + assertContains(text, "CC: nickb@alfresco.com; " + + "nick.burch@alfresco.com; 'Roy Wetherall' \n"); + assertEquals(-1, text.indexOf("BCC:")); + assertContains(text, "Subject: This is a test message please ignore\n"); + assertEquals(-1, text.indexOf("Date:")); + assertContains(text, "The quick brown fox jumps over the lazy dog"); + } + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java b/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java index 5df734f1f2..68094e2ee8 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/parsers/TestPOIFSChunkParser.java @@ -21,6 +21,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.text.SimpleDateFormat; +import java.util.Arrays; import java.util.Calendar; import org.apache.poi.hsmf.MAPIMessage; @@ -29,6 +30,7 @@ import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.Chunks; import org.apache.poi.hsmf.datatypes.NameIdChunks; import org.apache.poi.hsmf.datatypes.RecipientChunks; +import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter; import org.apache.poi.hsmf.datatypes.StringChunk; import org.apache.poi.hsmf.datatypes.Types; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; @@ -81,7 +83,7 @@ public final class TestPOIFSChunkParser extends TestCase { } } - public void testFindsRecips() throws IOException { + public void testFindsRecips() throws IOException, ChunkNotFoundException { POIFSFileSystem simple = new POIFSFileSystem( new FileInputStream(samples.getFile("quick.msg")) ); @@ -95,7 +97,9 @@ public final class TestPOIFSChunkParser extends TestCase { assertTrue(groups[2] instanceof NameIdChunks); RecipientChunks recips = (RecipientChunks)groups[1]; - assertEquals("kevin.roast@alfresco.org", recips.recipientEmailChunk.getValue()); + assertEquals("kevin.roast@alfresco.org", recips.recipientSMTPChunk.getValue()); + assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben", + recips.recipientEmailChunk.getValue()); String search = new String(recips.recipientSearchChunk.getValue(), "ASCII"); assertEquals("CN=KEVIN.ROAST@BEN\0", search.substring(search.length()-19)); @@ -103,20 +107,123 @@ public final class TestPOIFSChunkParser extends TestCase { // Now via MAPIMessage MAPIMessage msg = new MAPIMessage(simple); assertNotNull(msg.getRecipientDetailsChunks()); + assertEquals(1, msg.getRecipientDetailsChunks().length); - assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue()); + assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue()); + assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].getRecipientEmailAddress()); + assertEquals("Kevin Roast", msg.getRecipientDetailsChunks()[0].getRecipientName()); + assertEquals("kevin.roast@alfresco.org", msg.getRecipientEmailAddress()); // Try both SMTP and EX files for recipient - assertEquals("EX", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue()); - assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue()); + assertEquals("EX", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue()); + assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue()); + assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben", + msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue()); + // Now look at another message msg = new MAPIMessage(new POIFSFileSystem( new FileInputStream(samples.getFile("simple_test_msg.msg")) )); - assertEquals("SMTP", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue()); - assertEquals(null, msg.getRecipientDetailsChunks().recipientEmailChunk); - assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks().recipientNameChunk.getValue()); + assertNotNull(msg.getRecipientDetailsChunks()); + assertEquals(1, msg.getRecipientDetailsChunks().length); + + assertEquals("SMTP", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue()); + assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientSMTPChunk); + assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientNameChunk); + assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue()); + assertEquals("travis@overwrittenstack.com", msg.getRecipientEmailAddress()); + } + + public void testFindsMultipleRecipients() throws IOException, ChunkNotFoundException { + POIFSFileSystem multiple = new POIFSFileSystem( + new FileInputStream(samples.getFile("example_received_unicode.msg")) + ); + + multiple.getRoot().getEntry("__recip_version1.0_#00000000"); + multiple.getRoot().getEntry("__recip_version1.0_#00000001"); + multiple.getRoot().getEntry("__recip_version1.0_#00000002"); + multiple.getRoot().getEntry("__recip_version1.0_#00000003"); + multiple.getRoot().getEntry("__recip_version1.0_#00000004"); + multiple.getRoot().getEntry("__recip_version1.0_#00000005"); + + ChunkGroup[] groups = POIFSChunkParser.parse(multiple.getRoot()); + assertEquals(9, groups.length); + assertTrue(groups[0] instanceof Chunks); + assertTrue(groups[1] instanceof RecipientChunks); + assertTrue(groups[2] instanceof AttachmentChunks); + assertTrue(groups[3] instanceof RecipientChunks); + assertTrue(groups[4] instanceof RecipientChunks); + assertTrue(groups[5] instanceof RecipientChunks); + assertTrue(groups[6] instanceof RecipientChunks); + assertTrue(groups[7] instanceof RecipientChunks); + assertTrue(groups[8] instanceof NameIdChunks); + + // In FS order initially + RecipientChunks[] chunks = new RecipientChunks[] { + (RecipientChunks)groups[1], + (RecipientChunks)groups[3], + (RecipientChunks)groups[4], + (RecipientChunks)groups[5], + (RecipientChunks)groups[6], + (RecipientChunks)groups[7], + }; + assertEquals(6, chunks.length); + assertEquals(0, chunks[0].recipientNumber); + assertEquals(4, chunks[1].recipientNumber); + assertEquals(3, chunks[2].recipientNumber); + assertEquals(2, chunks[3].recipientNumber); + assertEquals(1, chunks[4].recipientNumber); + assertEquals(5, chunks[5].recipientNumber); + + // Check + assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName()); + assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress()); + assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientName()); + assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientEmailAddress()); + assertEquals("nickb@alfresco.com", chunks[2].getRecipientName()); + assertEquals("nickb@alfresco.com", chunks[2].getRecipientEmailAddress()); + assertEquals("'Mike Farman'", chunks[3].getRecipientName()); + assertEquals("mikef@alfresco.com", chunks[3].getRecipientEmailAddress()); + assertEquals("'Paul Holmes-Higgin'", chunks[4].getRecipientName()); + assertEquals("paul.hh@alfresco.com", chunks[4].getRecipientEmailAddress()); + assertEquals("'Roy Wetherall'", chunks[5].getRecipientName()); + assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress()); + + // Now sort, and re-check + Arrays.sort(chunks, new RecipientChunksSorter()); + + assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName()); + assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress()); + assertEquals("'Paul Holmes-Higgin'", chunks[1].getRecipientName()); + assertEquals("paul.hh@alfresco.com", chunks[1].getRecipientEmailAddress()); + assertEquals("'Mike Farman'", chunks[2].getRecipientName()); + assertEquals("mikef@alfresco.com", chunks[2].getRecipientEmailAddress()); + assertEquals("nickb@alfresco.com", chunks[3].getRecipientName()); + assertEquals("nickb@alfresco.com", chunks[3].getRecipientEmailAddress()); + assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientName()); + assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientEmailAddress()); + assertEquals("'Roy Wetherall'", chunks[5].getRecipientName()); + assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress()); + + // Finally check on message + MAPIMessage msg = new MAPIMessage(multiple); + assertEquals(6, msg.getRecipientEmailAddressList().length); + assertEquals(6, msg.getRecipientNamesList().length); + + assertEquals("'Ashutosh Dandavate'", msg.getRecipientNamesList()[0]); + assertEquals("'Paul Holmes-Higgin'", msg.getRecipientNamesList()[1]); + assertEquals("'Mike Farman'", msg.getRecipientNamesList()[2]); + assertEquals("nickb@alfresco.com", msg.getRecipientNamesList()[3]); + assertEquals("nick.burch@alfresco.com", msg.getRecipientNamesList()[4]); + assertEquals("'Roy Wetherall'", msg.getRecipientNamesList()[5]); + + assertEquals("ashutosh.dandavate@alfresco.com", msg.getRecipientEmailAddressList()[0]); + assertEquals("paul.hh@alfresco.com", msg.getRecipientEmailAddressList()[1]); + assertEquals("mikef@alfresco.com", msg.getRecipientEmailAddressList()[2]); + assertEquals("nickb@alfresco.com", msg.getRecipientEmailAddressList()[3]); + assertEquals("nick.burch@alfresco.com", msg.getRecipientEmailAddressList()[4]); + assertEquals("roy.wetherall@alfresco.com", msg.getRecipientEmailAddressList()[5]); } public void testFindsNameId() throws IOException { diff --git a/src/testcases/org/apache/poi/util/TestStringUtil.java b/src/testcases/org/apache/poi/util/TestStringUtil.java index e05fe0611a..4f85929bd2 100644 --- a/src/testcases/org/apache/poi/util/TestStringUtil.java +++ b/src/testcases/org/apache/poi/util/TestStringUtil.java @@ -20,6 +20,8 @@ package org.apache.poi.util; import java.io.UnsupportedEncodingException; import java.text.NumberFormat; +import org.apache.poi.util.StringUtil.StringsIterator; + import junit.framework.TestCase; /** @@ -158,5 +160,43 @@ public final class TestStringUtil extends TestCase { return nf.format( num ); } + + public void testStringsIterator() { + StringsIterator i; + + + i = new StringsIterator(new String[0]); + assertFalse(i.hasNext()); + try { + i.next(); + fail(); + } catch(ArrayIndexOutOfBoundsException e) {} + + + i = new StringsIterator(new String[] {"1"}); + assertTrue(i.hasNext()); + assertEquals("1", i.next()); + + assertFalse(i.hasNext()); + try { + i.next(); + fail(); + } catch(ArrayIndexOutOfBoundsException e) {} + + + i = new StringsIterator(new String[] {"1","2","3"}); + assertTrue(i.hasNext()); + assertEquals("1", i.next()); + assertTrue(i.hasNext()); + assertEquals("2", i.next()); + assertTrue(i.hasNext()); + assertEquals("3", i.next()); + + assertFalse(i.hasNext()); + try { + i.next(); + fail(); + } catch(ArrayIndexOutOfBoundsException e) {} + } } diff --git a/test-data/hsmf/example_received_regular.msg b/test-data/hsmf/example_received_regular.msg new file mode 100644 index 0000000000..57c66b084e Binary files /dev/null and b/test-data/hsmf/example_received_regular.msg differ diff --git a/test-data/hsmf/example_received_unicode.msg b/test-data/hsmf/example_received_unicode.msg new file mode 100644 index 0000000000..08256f922f Binary files /dev/null and b/test-data/hsmf/example_received_unicode.msg differ diff --git a/test-data/hsmf/example_sent_regular.msg b/test-data/hsmf/example_sent_regular.msg new file mode 100644 index 0000000000..0c1c3f6dac Binary files /dev/null and b/test-data/hsmf/example_sent_regular.msg differ diff --git a/test-data/hsmf/example_sent_unicode.msg b/test-data/hsmf/example_sent_unicode.msg new file mode 100644 index 0000000000..76aa32434d Binary files /dev/null and b/test-data/hsmf/example_sent_unicode.msg differ