diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index cdb6bdfe85..34b1327028 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 47183 - Attachment support for HSMF 47154 - Handle the cell format @ as the same as General 47048 - Fixed evaluation of defined names with the 'complex' flag set 46953 - More tweaks to PageSettingsBlock parsing logic in Sheet constructor diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 2008d00b36..bfbefcfab0 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 47183 - Attachment support for HSMF 47154 - Handle the cell format @ as the same as General 47048 - Fixed evaluation of defined names with the 'complex' flag set 46953 - More tweaks to PageSettingsBlock parsing logic in Sheet constructor diff --git a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java index 46bebadcc6..760745e904 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/MAPIMessage.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Map; import org.apache.poi.hsmf.datatypes.Chunk; import org.apache.poi.hsmf.datatypes.Chunks; @@ -159,4 +160,13 @@ public class MAPIMessage { public String getMessageClass() throws ChunkNotFoundException { return getStringFromChunk(chunks.messageClass); } + + /** + * Gets the message attachments. + * + * @return a map containing attachment name (String) and data (ByteArrayInputStream) + */ + public Map getAttachmentFiles() { + return this.chunkParser.getAttachmentList(); + } } diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/AttachmentChunks.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/AttachmentChunks.java new file mode 100644 index 0000000000..549e2d1944 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/AttachmentChunks.java @@ -0,0 +1,45 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hsmf.datatypes; + +/** + * Collection of convenence chunks for standard parts of the MSG file attachment. + */ +public class AttachmentChunks { + + public static final String namePrefix = "__attach_version1.0_#"; + + /* String parts of Outlook Messages Attachments that are currently known */ + + public ByteChunk attachData; + public StringChunk attachExtension; + public StringChunk attachFileName; + public StringChunk attachLongFileName; + public StringChunk attachMimeTag; + + private AttachmentChunks(boolean newStringType) { + attachData = new ByteChunk(0x3701, 0x0102); + attachExtension = new StringChunk(0x3703, newStringType); + attachFileName = new StringChunk(0x3704, newStringType); + attachLongFileName = new StringChunk(0x3707, newStringType); + attachMimeTag = new StringChunk(0x370E, newStringType); + } + + public static AttachmentChunks getInstance(boolean newStringType) { + return new AttachmentChunks(newStringType); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunk.java b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunk.java new file mode 100644 index 0000000000..2ecb3f08bf --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hsmf/datatypes/ByteChunk.java @@ -0,0 +1,60 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hsmf.datatypes; + +import java.io.ByteArrayOutputStream; + +/** + * A Chunk made up of a ByteArrayOutputStream. + */ + +public class ByteChunk extends Chunk { + + private ByteArrayOutputStream value; + + /** + * Creates a Byte Chunk, for either the old + * or new style of string chunk types. + */ + public ByteChunk(int chunkId, boolean newStyleString) { + this(chunkId, getStringType(newStyleString)); + } + private static int getStringType(boolean newStyleString) { + if(newStyleString) + return Types.NEW_STRING; + return Types.OLD_STRING; + } + + /** + * Create a Byte Chunk, with the specified + * type. + */ + public ByteChunk(int chunkId, int type) { + this.chunkId = chunkId; + this.type = type; + } + + public ByteArrayOutputStream getValueByteArray() { + return this.value; + } + + public void setValue(ByteArrayOutputStream value) { + this.value = value; + } + + +} diff --git a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java index 108a28b4c0..5004285050 100644 --- a/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java +++ b/src/scratchpad/src/org/apache/poi/hsmf/parsers/POIFSChunkParser.java @@ -17,12 +17,16 @@ package org.apache.poi.hsmf.parsers; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.List; +import java.util.Map; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.Chunk; import org.apache.poi.hsmf.datatypes.Chunks; import org.apache.poi.hsmf.datatypes.Types; @@ -89,14 +93,30 @@ public class POIFSChunkParser { * appropriate for the chunks we find in the file. */ public Chunks identifyChunks() { + return Chunks.getInstance(this.isNewChunkVersion(this.directoryMap)); + } + + /** + * Returns a list of the standard chunk types, as + * appropriate for the chunks we find in the file attachment. + */ + private AttachmentChunks identifyAttachmentChunks(Map attachmentMap) { + return AttachmentChunks.getInstance(this.isNewChunkVersion(attachmentMap)); + } + + /** + * Return chunk version of the map in parameter + */ + private boolean isNewChunkVersion(Map map) { // Are they of the old or new type of strings? boolean hasOldStrings = false; boolean hasNewStrings = false; String oldStringEnd = Types.asFileEnding(Types.OLD_STRING); String newStringEnd = Types.asFileEnding(Types.NEW_STRING); - for(Iterator i = directoryMap.keySet().iterator(); i.hasNext();) { + for(Iterator i = map.keySet().iterator(); i.hasNext();) { String entry = (String)i.next(); + if(entry.endsWith( oldStringEnd )) { hasOldStrings = true; } @@ -108,9 +128,9 @@ public class POIFSChunkParser { if(hasOldStrings && hasNewStrings) { throw new IllegalStateException("Your file contains string chunks of both the old and new types. Giving up"); } else if(hasNewStrings) { - return Chunks.getInstance(true); + return true; } - return Chunks.getInstance(false); + return false; } /** @@ -165,6 +185,39 @@ public class POIFSChunkParser { return getDocumentNode(this.directoryMap, chunk); } + /** + * + * @return a map containing attachment name (String) and data (ByteArrayInputStream) + */ + public Map getAttachmentList() { + Map attachments = new HashMap(); + List attachmentList = new ArrayList(); + for(Iterator i = directoryMap.keySet().iterator(); i.hasNext();) { + String entry = (String)i.next(); + + if(entry.startsWith(AttachmentChunks.namePrefix)) { + String attachmentIdString = entry.replace(AttachmentChunks.namePrefix, ""); + try { + int attachmentId = Integer.parseInt(attachmentIdString); + attachmentList.add((HashMap)directoryMap.get(entry)); + } catch (NumberFormatException nfe) { + System.err.println("Invalid attachment id"); + } + } + } + for (Iterator iterator = attachmentList.iterator(); iterator.hasNext();) { + HashMap AttachmentChunkMap = (HashMap) iterator.next(); + AttachmentChunks attachmentChunks = this.identifyAttachmentChunks(AttachmentChunkMap); + try { + Chunk fileName = this.getDocumentNode(AttachmentChunkMap, attachmentChunks.attachLongFileName); + Chunk content = this.getDocumentNode(AttachmentChunkMap, attachmentChunks.attachData); + attachments.put(fileName.toString(), new ByteArrayInputStream(content.getValueByteArray().toByteArray())); + } catch (ChunkNotFoundException e) { + System.err.println("Invalid attachment chunk"); + } + } + return attachments; + } /** * Processes an iterator returned by a POIFS call to getRoot().getEntries() diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java b/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java index e117ab89ee..d6f072dfed 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/AllTests.java @@ -35,6 +35,7 @@ public class AllTests suite.addTestSuite(org.apache.poi.hsmf.model.TestSimpleFileRead.class); suite.addTestSuite(org.apache.poi.hsmf.model.TestOutlook30FileRead.class); suite.addTestSuite(org.apache.poi.hsmf.model.TestChunkData.class); + suite.addTestSuite(org.apache.poi.hsmf.model.TestFileWithAttachmentsRead.class); return suite; } diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/data/attachment_test_msg.msg b/src/scratchpad/testcases/org/apache/poi/hsmf/data/attachment_test_msg.msg new file mode 100644 index 0000000000..ea82d915ed Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hsmf/data/attachment_test_msg.msg differ diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestFileWithAttachmentsRead.java b/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestFileWithAttachmentsRead.java new file mode 100644 index 0000000000..df105dc0c8 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/model/TestFileWithAttachmentsRead.java @@ -0,0 +1,86 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hsmf.model; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; + +/** + * Tests to verify that we can read attachments from msg file + * + * @author Nicolas Bureau + */ +public class TestFileWithAttachmentsRead extends TestCase { + private MAPIMessage mapiMessage; + + /** + * Initialize this test, load up the attachment_test_msg.msg mapi message. + * + * @throws Exception + */ + public TestFileWithAttachmentsRead() throws IOException { + String dirname = System.getProperty("HSMF.testdata.path"); + this.mapiMessage = new MAPIMessage(dirname + "/attachment_test_msg.msg"); + } + + /** + * Test to see if we can retrieve attachments. + * + * @throws ChunkNotFoundException + * + */ + // public void testReadDisplayCC() throws ChunkNotFoundException { + public void testRetrieveAttachments() { + Map attachmentsMap = mapiMessage.getAttachmentFiles(); + int obtained = attachmentsMap.size(); + int expected = 2; + + TestCase.assertEquals(obtained, expected); + } + + /** + * Test to see if attachments are not empty. + * + * @throws ChunkNotFoundException + * + */ + public void testReadAttachments() throws IOException { + Map attachmentsMap = mapiMessage.getAttachmentFiles(); + + for (Iterator iterator = attachmentsMap.keySet().iterator(); iterator.hasNext();) { + String fileName = (String) iterator.next(); + ByteArrayInputStream fileStream = (ByteArrayInputStream) attachmentsMap.get(fileName); + ByteArrayOutputStream fileContent = new ByteArrayOutputStream(); + + while (fileStream.available() > 0) { + fileContent.write(fileStream.read()); + } + String obtained = new String(fileContent.toByteArray(), "UTF-8"); + assertTrue(obtained.trim().length() > 0); + } + } + +}