Fix inspired by bug #45804 - Update HSMF to handle Outlook 3.0 msg files, which have a different string chunk type

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@695649 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-09-15 21:51:14 +00:00
parent 45c9804d3a
commit 2ee2c75d25
11 changed files with 269 additions and 38 deletions

View File

@ -36,7 +36,8 @@
</devs> </devs>
<!-- Don't forget to update status.xml too! --> <!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45804 - Update HSMF to handle Outlook 3.0 msg files, which have a different string chunk type</action>
<action dev="POI-DEVELOPERS" type="add">Expose the name of Named Cell Styles via HSSFCellStyle (normally held on the parent style though)</action> <action dev="POI-DEVELOPERS" type="add">Expose the name of Named Cell Styles via HSSFCellStyle (normally held on the parent style though)</action>
<action dev="POI-DEVELOPERS" type="fix">45978 - Fixed IOOBE in Ref3DPtg.toFormulaString() due eager initialisation of SheetReferences</action> <action dev="POI-DEVELOPERS" type="fix">45978 - Fixed IOOBE in Ref3DPtg.toFormulaString() due eager initialisation of SheetReferences</action>
<action dev="POI-DEVELOPERS" type="add">Made HSSFFormulaEvaluator no longer require initialisation with sheet or row</action> <action dev="POI-DEVELOPERS" type="add">Made HSSFFormulaEvaluator no longer require initialisation with sheet or row</action>

View File

@ -33,7 +33,8 @@
<!-- Don't forget to update changes.xml too! --> <!-- Don't forget to update changes.xml too! -->
<changes> <changes>
<release version="3.1.1-alpha1" date="2008-??-??"> <release version="3.2-alpha1" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">45804 - Update HSMF to handle Outlook 3.0 msg files, which have a different string chunk type</action>
<action dev="POI-DEVELOPERS" type="add">Expose the name of Named Cell Styles via HSSFCellStyle (normally held on the parent style though)</action> <action dev="POI-DEVELOPERS" type="add">Expose the name of Named Cell Styles via HSSFCellStyle (normally held on the parent style though)</action>
<action dev="POI-DEVELOPERS" type="fix">45978 - Fixed IOOBE in Ref3DPtg.toFormulaString() due eager initialisation of SheetReferences</action> <action dev="POI-DEVELOPERS" type="fix">45978 - Fixed IOOBE in Ref3DPtg.toFormulaString() due eager initialisation of SheetReferences</action>
<action dev="POI-DEVELOPERS" type="add">Made HSSFFormulaEvaluator no longer require initialisation with sheet or row</action> <action dev="POI-DEVELOPERS" type="add">Made HSSFFormulaEvaluator no longer require initialisation with sheet or row</action>

View File

@ -37,6 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class MAPIMessage { public class MAPIMessage {
private POIFSChunkParser chunkParser; private POIFSChunkParser chunkParser;
private POIFSFileSystem fs; private POIFSFileSystem fs;
private Chunks chunks;
/** /**
* Constructor for creating new files. * Constructor for creating new files.
@ -64,6 +65,10 @@ public class MAPIMessage {
public MAPIMessage(InputStream in) throws IOException { public MAPIMessage(InputStream in) throws IOException {
this.fs = new POIFSFileSystem(in); this.fs = new POIFSFileSystem(in);
chunkParser = new POIFSChunkParser(this.fs); chunkParser = new POIFSChunkParser(this.fs);
// Figure out the right string type, based on
// the chunks present
chunks = chunkParser.identifyChunks();
} }
@ -87,7 +92,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getTextBody() throws IOException, ChunkNotFoundException { public String getTextBody() throws IOException, ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().textBodyChunk); return getStringFromChunk(chunks.textBodyChunk);
} }
/** /**
@ -96,7 +101,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getSubject() throws ChunkNotFoundException { public String getSubject() throws ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().subjectChunk); return getStringFromChunk(chunks.subjectChunk);
} }
@ -107,7 +112,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getDisplayTo() throws ChunkNotFoundException { public String getDisplayTo() throws ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().displayToChunk); return getStringFromChunk(chunks.displayToChunk);
} }
/** /**
@ -117,7 +122,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getDisplayFrom() throws ChunkNotFoundException { public String getDisplayFrom() throws ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().displayFromChunk); return getStringFromChunk(chunks.displayFromChunk);
} }
/** /**
@ -127,7 +132,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getDisplayCC() throws ChunkNotFoundException { public String getDisplayCC() throws ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().displayCCChunk); return getStringFromChunk(chunks.displayCCChunk);
} }
/** /**
@ -137,7 +142,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getDisplayBCC() throws ChunkNotFoundException { public String getDisplayBCC() throws ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().displayBCCChunk); return getStringFromChunk(chunks.displayBCCChunk);
} }
@ -148,7 +153,7 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getConversationTopic() throws ChunkNotFoundException { public String getConversationTopic() throws ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().conversationTopic); return getStringFromChunk(chunks.conversationTopic);
} }
/** /**
@ -160,6 +165,6 @@ public class MAPIMessage {
* @throws ChunkNotFoundException * @throws ChunkNotFoundException
*/ */
public String getMessageClass() throws ChunkNotFoundException { public String getMessageClass() throws ChunkNotFoundException {
return getStringFromChunk(Chunks.getInstance().messageClass); return getStringFromChunk(chunks.messageClass);
} }
} }

View File

@ -25,17 +25,39 @@ package org.apache.poi.hsmf.datatypes;
*/ */
public class Chunks { public class Chunks {
/* String parts of Outlook Messages that are currently known */ /* String parts of Outlook Messages that are currently known */
public StringChunk messageClass = new StringChunk(0x001A); //Type of message that the MSG represents (ie. IPM.Note)
public StringChunk textBodyChunk = new StringChunk(0x1000); //BODY Chunk, for plain/text messages
public StringChunk subjectChunk = new StringChunk(0x0037); //Subject link chunk, in plain/text
public StringChunk displayToChunk = new StringChunk(0x0E04); //Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes
public StringChunk displayFromChunk = new StringChunk(0x0C1A); //Value that is in the FROM field
public StringChunk displayCCChunk = new StringChunk(0x0E03); //value that shows in the CC field
public StringChunk displayBCCChunk = new StringChunk(0x0E02); //Value that shows in the BCC field
public StringChunk conversationTopic = new StringChunk(0x0070); //Sort of like the subject line, but without the RE: and FWD: parts.
public StringChunk sentByServerType = new StringChunk(0x0075); //Type of server that the message originated from (SMTP, etc).
public static Chunks getInstance() { /** Type of message that the MSG represents (ie. IPM.Note) */
return new Chunks(); public StringChunk messageClass;
/** BODY Chunk, for plain/text messages */
public StringChunk textBodyChunk;
/** Subject link chunk, in plain/text */
public StringChunk subjectChunk;
/** Value that is in the TO field (not actually the addresses as they are stored in recip directory nodes */
public StringChunk displayToChunk;
/** Value that is in the FROM field */
public StringChunk displayFromChunk;
/** value that shows in the CC field */
public StringChunk displayCCChunk;
/** Value that shows in the BCC field */
public StringChunk displayBCCChunk;
/** Sort of like the subject line, but without the RE: and FWD: parts. */
public StringChunk conversationTopic;
/** Type of server that the message originated from (SMTP, etc). */
public StringChunk sentByServerType;
private Chunks(boolean newStringType) {
messageClass = new StringChunk(0x001A, newStringType);
textBodyChunk = new StringChunk(0x1000, newStringType);
subjectChunk = new StringChunk(0x0037, newStringType);
displayToChunk = new StringChunk(0x0E04, newStringType);
displayFromChunk = new StringChunk(0x0C1A, newStringType);
displayCCChunk = new StringChunk(0x0E03, newStringType);
displayBCCChunk = new StringChunk(0x0E02, newStringType);
conversationTopic = new StringChunk(0x0070, newStringType);
sentByServerType = new StringChunk(0x0075, newStringType);
}
public static Chunks getInstance(boolean newStringType) {
return new Chunks(newStringType);
} }
} }

View File

@ -27,9 +27,26 @@ public class StringChunk extends Chunk {
private String value; private String value;
public StringChunk(int chunkId) { /**
* Creates a String Chunk, for either the old
* or new style of string chunk types.
*/
public StringChunk(int chunkId, boolean newStyleString) {
this(chunkId, getStringType(newStyleString));
}
private static int getStringType(boolean newStyleString) {
if(newStyleString)
return Types.NEW_STRING;
return Types.OLD_STRING;
}
/**
* Create a String Chunk, with the specified
* type.
*/
public StringChunk(int chunkId, int type) {
this.chunkId = chunkId; this.chunkId = chunkId;
this.type = Types.STRING; this.type = type;
} }
/* (non-Javadoc) /* (non-Javadoc)

View File

@ -19,8 +19,21 @@ package org.apache.poi.hsmf.datatypes;
public class Types { public class Types {
public static int BINARY = 0x0102; public static int BINARY = 0x0102;
public static int STRING = 0x001E;
/** A string, until Outlook 3.0 */
public static int OLD_STRING = 0x001E;
/** A string, from Outlook 3.0 onwards */
public static int NEW_STRING = 0x001F;
public static int LONG = 0x0003; public static int LONG = 0x0003;
public static int TIME = 0x0040; public static int TIME = 0x0040;
public static int BOOLEAN = 0x000B; public static int BOOLEAN = 0x000B;
public static String asFileEnding(int type) {
String str = Integer.toHexString(type).toUpperCase();
while(str.length() < 4) {
str = "0" + str;
}
return str;
}
} }

View File

@ -24,6 +24,8 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import org.apache.poi.hsmf.datatypes.Chunk; import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.exceptions.DirectoryChunkNotFoundException; import org.apache.poi.hsmf.exceptions.DirectoryChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
@ -83,6 +85,35 @@ public class POIFSChunkParser {
this.directoryMap = this.processPOIIterator(iter); this.directoryMap = this.processPOIIterator(iter);
} }
/**
* Returns a list of the standard chunk types, as
* appropriate for the chunks we find in the file.
*/
public Chunks identifyChunks() {
// Are they of the old or new type of strings?
boolean hasOldStrings = false;
boolean hasNewStrings = false;
String oldStringEnd = Types.asFileEnding(Types.OLD_STRING);
String newStringEnd = Types.asFileEnding(Types.NEW_STRING);
for(Iterator i = directoryMap.keySet().iterator(); i.hasNext();) {
String entry = (String)i.next();
if(entry.endsWith( oldStringEnd )) {
hasOldStrings = true;
}
if(entry.endsWith( newStringEnd )) {
hasNewStrings = true;
}
}
if(hasOldStrings && hasNewStrings) {
throw new IllegalStateException("Your file contains string chunks of both the old and new types. Giving up");
} else if(hasNewStrings) {
return Chunks.getInstance(true);
}
return Chunks.getInstance(false);
}
/** /**
* Pull the chunk data that's stored in this object's hashmap out and return it as a HashMap. * Pull the chunk data that's stored in this object's hashmap out and return it as a HashMap.
* @param entryName * @param entryName

View File

@ -33,6 +33,7 @@ public class AllTests
TestSuite suite = new TestSuite(); TestSuite suite = new TestSuite();
suite.addTestSuite(org.apache.poi.hsmf.model.TestBlankFileRead.class); suite.addTestSuite(org.apache.poi.hsmf.model.TestBlankFileRead.class);
suite.addTestSuite(org.apache.poi.hsmf.model.TestSimpleFileRead.class); suite.addTestSuite(org.apache.poi.hsmf.model.TestSimpleFileRead.class);
suite.addTestSuite(org.apache.poi.hsmf.model.TestOutlook30FileRead.class);
suite.addTestSuite(org.apache.poi.hsmf.model.TestChunkData.class); suite.addTestSuite(org.apache.poi.hsmf.model.TestChunkData.class);
return suite; return suite;

View File

@ -31,42 +31,47 @@ import junit.framework.TestCase;
* *
*/ */
public class TestChunkData extends TestCase { public class TestChunkData extends TestCase {
private Chunks chunks = Chunks.getInstance(false);
public void testChunkCreate() { public void testChunkCreate() {
StringChunk chunk = new StringChunk(0x0200); StringChunk chunk = new StringChunk(0x0200, false);
TestCase.assertEquals("__substg1.0_0200001E", chunk.getEntryName()); TestCase.assertEquals("__substg1.0_0200001E", chunk.getEntryName());
/* test the lower and upper limits of the chunk ids */ /* test the lower and upper limits of the chunk ids */
chunk = new StringChunk(0x0000); chunk = new StringChunk(0x0000, false);
TestCase.assertEquals("__substg1.0_0000001E", chunk.getEntryName()); TestCase.assertEquals("__substg1.0_0000001E", chunk.getEntryName());
chunk = new StringChunk(0xFFFF); chunk = new StringChunk(0xFFFF, false);
TestCase.assertEquals("__substg1.0_FFFF001E", chunk.getEntryName()); TestCase.assertEquals("__substg1.0_FFFF001E", chunk.getEntryName());
chunk = new StringChunk(0xFFFF, true);
TestCase.assertEquals("__substg1.0_FFFF001F", chunk.getEntryName());
} }
public void testTextBodyChunk() { public void testTextBodyChunk() {
StringChunk chunk = new StringChunk(0x1000); StringChunk chunk = new StringChunk(0x1000, false);
TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().textBodyChunk.getEntryName()); TestCase.assertEquals(chunk.getEntryName(), chunks.textBodyChunk.getEntryName());
} }
public void testDisplayToChunk() { public void testDisplayToChunk() {
StringChunk chunk = new StringChunk(0x0E04); StringChunk chunk = new StringChunk(0x0E04, false);
TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().displayToChunk.getEntryName()); TestCase.assertEquals(chunk.getEntryName(), chunks.displayToChunk.getEntryName());
} }
public void testDisplayCCChunk() { public void testDisplayCCChunk() {
StringChunk chunk = new StringChunk(0x0E03); StringChunk chunk = new StringChunk(0x0E03, false);
TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().displayCCChunk.getEntryName()); TestCase.assertEquals(chunk.getEntryName(), chunks.displayCCChunk.getEntryName());
} }
public void testDisplayBCCChunk() { public void testDisplayBCCChunk() {
StringChunk chunk = new StringChunk(0x0E02); StringChunk chunk = new StringChunk(0x0E02, false);
TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().displayBCCChunk.getEntryName()); TestCase.assertEquals(chunk.getEntryName(), chunks.displayBCCChunk.getEntryName());
} }
public void testSubjectChunk() { public void testSubjectChunk() {
Chunk chunk = new StringChunk(0x0037); Chunk chunk = new StringChunk(0x0037, false);
TestCase.assertEquals(chunk.getEntryName(), Chunks.getInstance().subjectChunk.getEntryName()); TestCase.assertEquals(chunk.getEntryName(), chunks.subjectChunk.getEntryName());
} }
} }

View File

@ -0,0 +1,135 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.model;
import java.io.IOException;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import junit.framework.TestCase;
/**
* Tests to verify that we can still work on the newer Outlook 3.0 files.
*/
public class TestOutlook30FileRead extends TestCase {
private MAPIMessage mapiMessage;
/**
* Initialize this test, load up the blank.msg mapi message.
* @throws Exception
*/
public TestOutlook30FileRead() throws IOException {
String dirname = System.getProperty("HSMF.testdata.path");
this.mapiMessage = new MAPIMessage(dirname + "/outlook_30_msg.msg");
}
/**
* Test to see if we can read the CC Chunk.
* @throws ChunkNotFoundException
*
*/
public void testReadDisplayCC() throws ChunkNotFoundException {
String obtained = mapiMessage.getDisplayCC();
String expected = "";
TestCase.assertEquals(obtained, expected);
}
/**
* Test to see if we can read the CC Chunk.
* @throws ChunkNotFoundException
*
*/
public void testReadDisplayTo() throws ChunkNotFoundException {
String obtained = mapiMessage.getDisplayTo();
assertTrue(obtained.startsWith("Bohn, Shawn"));
}
/**
* Test to see if we can read the From Chunk.
* @throws ChunkNotFoundException
*
*/
public void testReadDisplayFrom() throws ChunkNotFoundException {
String obtained = mapiMessage.getDisplayFrom();
String expected = "Cramer, Nick";
TestCase.assertEquals(obtained, expected);
}
/**
* Test to see if we can read the CC Chunk.
* @throws ChunkNotFoundException
*
*/
public void testReadDisplayBCC() throws ChunkNotFoundException {
String obtained = mapiMessage.getDisplayBCC();
String expected = "";
TestCase.assertEquals(obtained, expected);
}
/**
* Check if we can read the body of the blank message, we expect "".
*
* @throws Exception
*/
public void testReadBody() throws Exception {
String obtained = mapiMessage.getTextBody();
assertTrue(obtained.startsWith("I am shutting down"));
}
/**
* Check if we can read the subject line of the blank message, we expect ""
*
* @throws Exception
*/
public void testReadSubject() throws Exception {
String obtained = mapiMessage.getSubject();
String expected = "IN-SPIRE servers going down for a bit, back up around 8am";
TestCase.assertEquals(expected, obtained);
}
/**
* Check if we can read the subject line of the blank message, we expect ""
*
* @throws Exception
*/
public void testReadConversationTopic() throws Exception {
String obtained = mapiMessage.getConversationTopic();
TestCase.assertEquals("IN-SPIRE servers going down for a bit, back up around 8am", obtained);
}
/**
* Check if we can read the subject line of the blank message, we expect ""
*
* @throws Exception
*/
public void testReadMessageClass() throws Exception {
String obtained = mapiMessage.getMessageClass();
TestCase.assertEquals("IPM.Note", obtained);
}
}