[github-103] hsmf: support writing properties. Thanks to Dominik Hölzl. This closes #103

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1827173 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2018-03-19 09:10:38 +00:00
parent 61bd8bf911
commit 76c3f72e5d
7 changed files with 410 additions and 30 deletions

View File

@ -25,6 +25,9 @@ public class ChunkBasedPropertyValue extends PropertyValue {
public ChunkBasedPropertyValue(MAPIProperty property, long flags, byte[] offsetData) {
super(property, flags, offsetData);
}
public ChunkBasedPropertyValue(MAPIProperty property, long flags, byte[] offsetData, Types.MAPIType actualType) {
super(property, flags, offsetData, actualType);
}
@Override
public Chunk getValue() {

View File

@ -20,6 +20,7 @@ package org.apache.poi.hsmf.datatypes;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import org.apache.poi.util.LittleEndian;
@ -28,6 +29,7 @@ import org.apache.poi.util.LittleEndian;
* byte header
*/
public class MessagePropertiesChunk extends PropertiesChunk {
private boolean isEmbedded;
private long nextRecipientId;
private long nextAttachmentId;
private long recipientCount;
@ -37,6 +39,11 @@ public class MessagePropertiesChunk extends PropertiesChunk {
super(parentGroup);
}
public MessagePropertiesChunk(ChunkGroup parentGroup, boolean isEmbedded) {
super(parentGroup);
this.isEmbedded = isEmbedded;
}
public long getNextRecipientId() {
return nextRecipientId;
}
@ -53,8 +60,24 @@ public class MessagePropertiesChunk extends PropertiesChunk {
return attachmentCount;
}
public void setNextRecipientId(long nextRecipientId) {
this.nextRecipientId = nextRecipientId;
}
public void setNextAttachmentId(long nextAttachmentId) {
this.nextAttachmentId = nextAttachmentId;
}
public void setRecipientCount(long recipientCount) {
this.recipientCount = recipientCount;
}
public void setAttachmentCount(long attachmentCount) {
this.attachmentCount = attachmentCount;
}
@Override
public void readValue(InputStream stream) throws IOException {
protected void readProperties(InputStream stream) throws IOException {
// 8 bytes of reserved zeros
LittleEndian.readLong(stream);
@ -64,28 +87,44 @@ public class MessagePropertiesChunk extends PropertiesChunk {
recipientCount = LittleEndian.readUInt(stream);
attachmentCount = LittleEndian.readUInt(stream);
// 8 bytes of reserved zeros
LittleEndian.readLong(stream);
if (!isEmbedded) {
// 8 bytes of reserved zeros (top level properties stream only)
LittleEndian.readLong(stream);
}
// Now properties
readProperties(stream);
super.readProperties(stream);
}
@Override
public void writeValue(OutputStream out) throws IOException {
public void readValue(InputStream value) throws IOException {
readProperties(value);
}
@Override
protected List<PropertyValue> writeProperties(OutputStream stream) throws IOException
{
// 8 bytes of reserved zeros
out.write(new byte[8]);
LittleEndian.putLong(0, stream);
// Nexts and counts
LittleEndian.putUInt(nextRecipientId, out);
LittleEndian.putUInt(nextAttachmentId, out);
LittleEndian.putUInt(recipientCount, out);
LittleEndian.putUInt(attachmentCount, out);
LittleEndian.putUInt(nextRecipientId, stream);
LittleEndian.putUInt(nextAttachmentId, stream);
LittleEndian.putUInt(recipientCount, stream);
LittleEndian.putUInt(attachmentCount, stream);
// 8 bytes of reserved zeros
out.write(new byte[8]);
if (!isEmbedded) {
// 8 bytes of reserved zeros (top level properties stream only)
LittleEndian.putLong(0, stream);
}
// Now properties
writeProperties(out);
// Now properties.
return super.writeProperties(stream);
}
@Override
public void writeValue(OutputStream stream) throws IOException {
// write properties without variable length properties
writeProperties(stream);
}
}

View File

@ -17,13 +17,18 @@
package org.apache.poi.hsmf.datatypes;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.poi.hsmf.datatypes.PropertyValue.BooleanPropertyValue;
import org.apache.poi.hsmf.datatypes.PropertyValue.CurrencyPropertyValue;
@ -35,6 +40,7 @@ import org.apache.poi.hsmf.datatypes.PropertyValue.NullPropertyValue;
import org.apache.poi.hsmf.datatypes.PropertyValue.ShortPropertyValue;
import org.apache.poi.hsmf.datatypes.PropertyValue.TimePropertyValue;
import org.apache.poi.hsmf.datatypes.Types.MAPIType;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndian.BufferUnderrunException;
@ -51,9 +57,16 @@ import org.apache.poi.util.POILogger;
public abstract class PropertiesChunk extends Chunk {
public static final String NAME = "__properties_version1.0";
//arbitrarily selected; may need to increase
// arbitrarily selected; may need to increase
private static final int MAX_RECORD_LENGTH = 1_000_000;
// standard prefix, defined in the spec
public static final String VARIABLE_LENGTH_PROPERTY_PREFIX = "__substg1.0_";
// standard property flags, defined in the spec
public static final int PROPERTIES_FLAG_READABLE = 2;
public static final int PROPERTIES_FLAG_WRITEABLE = 4;
/** For logging problems we spot with the file */
private POILogger logger = POILogFactory.getLogger(PropertiesChunk.class);
@ -105,6 +118,13 @@ public abstract class PropertiesChunk extends Chunk {
return props;
}
/**
* Defines a property. Multi-valued properties are not yet supported.
*/
public void setProperty(PropertyValue value) {
properties.put(value.getProperty(), value);
}
/**
* Returns all values for the given property, looking up chunk based ones as
* required, of null if none exist
@ -239,7 +259,7 @@ public abstract class PropertiesChunk extends Chunk {
PropertyValue propVal = null;
if (isPointer) {
// We'll match up the chunk later
propVal = new ChunkBasedPropertyValue(prop, flags, data);
propVal = new ChunkBasedPropertyValue(prop, flags, data, type);
} else if (type == Types.NULL) {
propVal = new NullPropertyValue(prop, flags, data);
} else if (type == Types.BOOLEAN) {
@ -261,7 +281,7 @@ public abstract class PropertiesChunk extends Chunk {
}
// TODO Add in the rest of the types
else {
propVal = new PropertyValue(prop, flags, data);
propVal = new PropertyValue(prop, flags, data, type);
}
if (properties.get(prop) != null) {
@ -276,7 +296,130 @@ public abstract class PropertiesChunk extends Chunk {
}
}
protected void writeProperties(OutputStream out) throws IOException {
// TODO
/**
* Writes this chunk in the specified {@code DirectoryEntry}.
*
* @param directory
* The directory.
* @throws IOException
* If an I/O error occurs.
*/
public void writeProperties(DirectoryEntry directory) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
List<PropertyValue> values = writeProperties(baos);
baos.close();
// write the header data with the properties declaration
directory.createDocument(org.apache.poi.hsmf.datatypes.PropertiesChunk.NAME,
new ByteArrayInputStream(baos.toByteArray()));
// write the property values
writeNodeData(directory, values);
}
/**
* Write the nodes for variable-length data. Those properties are returned by
* {@link #writeProperties(java.io.OutputStream)}.
*
* @param directory
* The directory.
* @param values
* The values.
* @throws IOException
* If an I/O error occurs.
*/
protected void writeNodeData(DirectoryEntry directory, List<PropertyValue> values) throws IOException {
for (PropertyValue value : values) {
byte[] bytes = value.getRawValue();
String nodeName = VARIABLE_LENGTH_PROPERTY_PREFIX + getFileName(value.getProperty(), value.getActualType());
directory.createDocument(nodeName, new ByteArrayInputStream(bytes));
}
}
/**
* Writes the header of the properties.
*
* @param out
* The {@code OutputStream}.
* @return The variable-length properties that need to be written in another
* node.
* @throws IOException
* If an I/O error occurs.
*/
protected List<PropertyValue> writeProperties(OutputStream out) throws IOException {
List<PropertyValue> variableLengthProperties = new ArrayList<>();
for (Entry<MAPIProperty, PropertyValue> entry : properties.entrySet()) {
MAPIProperty property = entry.getKey();
PropertyValue value = entry.getValue();
if (value == null) {
continue;
}
if (property.id < 0) {
continue;
}
// generic header
// page 23, point 2.4.2
// tag is the property id and its type
long tag = Long.parseLong(getFileName(property, value.getActualType()), 16);
LittleEndian.putUInt(tag, out);
LittleEndian.putUInt(value.getFlags(), out); // readable + writable
MAPIType type = getTypeMapping(value.getActualType());
if (type.isFixedLength()) {
// page 11, point 2.1.2
writeFixedLengthValueHeader(out, property, type, value);
} else {
// page 12, point 2.1.3
writeVariableLengthValueHeader(out, property, type, value);
variableLengthProperties.add(value);
}
}
return variableLengthProperties;
}
private void writeFixedLengthValueHeader(OutputStream out, MAPIProperty property, MAPIType type, PropertyValue value) throws IOException {
// fixed type header
// page 24, point 2.4.2.1.1
byte[] bytes = value.getRawValue();
int length = bytes != null ? bytes.length : 0;
if (bytes != null) {
// Little endian.
byte[] reversed = new byte[bytes.length];
for (int i = 0; i < bytes.length; ++i) {
reversed[bytes.length - i - 1] = bytes[i];
}
out.write(reversed);
}
out.write(new byte[8 - length]);
}
private void writeVariableLengthValueHeader(OutputStream out, MAPIProperty propertyEx, MAPIType type,
PropertyValue value) throws IOException {
// variable length header
// page 24, point 2.4.2.2
byte[] bytes = value.getRawValue();
int length = bytes != null ? bytes.length : 0;
// alter the length, as specified in page 25
if (type == Types.UNICODE_STRING) {
length += 2;
} else if (type == Types.ASCII_STRING) {
length += 1;
}
LittleEndian.putUInt(length, out);
// specified in page 25
LittleEndian.putUInt(0, out);
}
private String getFileName(MAPIProperty property, MAPIType actualType) {
String str = Integer.toHexString(property.id).toUpperCase(Locale.ROOT);
while (str.length() < 4) {
str = "0" + str;
}
MAPIType type = getTypeMapping(actualType);
return str + type.asFileEnding();
}
private MAPIType getTypeMapping(MAPIType type) {
return type == Types.ASCII_STRING ? Types.UNICODE_STRING : type;
}
}

View File

@ -22,6 +22,7 @@ import java.util.Calendar;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.hsmf.datatypes.Types.MAPIType;
/**
* An instance of a {@link MAPIProperty} inside a {@link PropertiesChunk}. Where
@ -32,13 +33,18 @@ import org.apache.poi.util.LocaleUtil;
*/
public class PropertyValue {
private MAPIProperty property;
private MAPIType actualType;
private long flags;
protected byte[] data;
public PropertyValue(MAPIProperty property, long flags, byte[] data) {
this(property, flags, data, property.usualType);
}
public PropertyValue(MAPIProperty property, long flags, byte[] data, MAPIType actualType) {
this.property = property;
this.flags = flags;
this.data = data;
this.actualType = actualType;
}
public MAPIProperty getProperty() {
@ -56,6 +62,14 @@ public class PropertyValue {
return data;
}
public byte[] getRawValue() {
return data;
}
public MAPIType getActualType() {
return actualType;
}
public void setRawValue(byte[] value) {
this.data = value;
}
@ -78,7 +92,7 @@ public class PropertyValue {
public static class NullPropertyValue extends PropertyValue {
public NullPropertyValue(MAPIProperty property, long flags,
byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.NULL);
}
@Override
@ -90,7 +104,7 @@ public class PropertyValue {
public static class BooleanPropertyValue extends PropertyValue {
public BooleanPropertyValue(MAPIProperty property, long flags,
byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.BOOLEAN);
}
@Override
@ -112,7 +126,7 @@ public class PropertyValue {
public static class ShortPropertyValue extends PropertyValue {
public ShortPropertyValue(MAPIProperty property, long flags,
byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.SHORT);
}
@Override
@ -130,7 +144,7 @@ public class PropertyValue {
public static class LongPropertyValue extends PropertyValue {
public LongPropertyValue(MAPIProperty property, long flags, byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.LONG);
}
@Override
@ -149,7 +163,7 @@ public class PropertyValue {
public static class LongLongPropertyValue extends PropertyValue {
public LongLongPropertyValue(MAPIProperty property, long flags,
byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.LONG_LONG);
}
@Override
@ -168,7 +182,7 @@ public class PropertyValue {
public static class FloatPropertyValue extends PropertyValue {
public FloatPropertyValue(MAPIProperty property, long flags,
byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.FLOAT);
}
@Override
@ -186,7 +200,7 @@ public class PropertyValue {
public static class DoublePropertyValue extends PropertyValue {
public DoublePropertyValue(MAPIProperty property, long flags, byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.DOUBLE);
}
@Override
@ -210,7 +224,7 @@ public class PropertyValue {
private static final BigInteger SHIFT = BigInteger.valueOf(10000);
public CurrencyPropertyValue(MAPIProperty property, long flags, byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.CURRENCY);
}
@Override
@ -236,7 +250,7 @@ public class PropertyValue {
* (365L * 369L + 89L);
public TimePropertyValue(MAPIProperty property, long flags, byte[] data) {
super(property, flags, data);
super(property, flags, data, org.apache.poi.hsmf.datatypes.Types.TIME);
}
@Override

View File

@ -129,7 +129,8 @@ public final class POIFSChunkParser {
if (entryName.equals(PropertiesChunk.NAME)) {
if (grouping instanceof Chunks) {
// These should be the properties for the message itself
chunk = new MessagePropertiesChunk(grouping);
chunk = new MessagePropertiesChunk(grouping,
entry.getParent() != null && entry.getParent().getParent() != null);
} else {
// Will be properties on an attachment or recipient
chunk = new StoragePropertiesChunk(grouping);

View File

@ -38,7 +38,8 @@ import org.junit.runners.Suite;
TestOutlookTextExtractor.class,
TestPOIFSChunkParser.class,
TestMessageSubmissionChunkY2KRead.class,
TestMessageSubmissionChunk.class
TestMessageSubmissionChunk.class,
TestExtractEmbeddedMSG.class
})
public class AllHSMFTests {
}

View File

@ -0,0 +1,179 @@
package org.apache.poi.hsmf;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Calendar;
import java.util.Map;
import java.util.TimeZone;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.ChunkBasedPropertyValue;
import org.apache.poi.hsmf.datatypes.MAPIProperty;
import org.apache.poi.hsmf.datatypes.MessagePropertiesChunk;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.PropertiesChunk;
import org.apache.poi.hsmf.datatypes.PropertyValue;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.datatypes.Types.MAPIType;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.EntryUtils;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestExtractEmbeddedMSG {
private static MAPIMessage pdfMsgAttachments;
/**
* Initialize this test, load up the attachment_msg_pdf.msg mapi message.
*
* @throws Exception
*/
@BeforeClass
public static void setUp() throws IOException {
POIDataSamples samples = POIDataSamples.getHSMFInstance();
pdfMsgAttachments = new MAPIMessage(samples.openResourceAsStream("attachment_msg_pdf.msg"));
}
@AfterClass
public static void tearDown() throws IOException {
pdfMsgAttachments.close();
}
/**
* Test to see if embedded message properties can be read, extracted, and
* re-parsed
*
* @throws ChunkNotFoundException
*
*/
@Test
public void testEmbeddedMSGProperties() throws IOException, ChunkNotFoundException {
AttachmentChunks[] attachments = pdfMsgAttachments.getAttachmentFiles();
assertEquals(2, attachments.length);
if (attachments.length == 2) {
MAPIMessage attachedMsg = attachments[0].getEmbeddedMessage();
assertNotNull(attachedMsg);
// test properties of embedded message
testFixedAndVariableLengthPropertiesOfAttachedMSG(attachedMsg);
// rebuild top level message from embedded message
try (POIFSFileSystem extractedAttachedMsg = rebuildFromAttached(attachedMsg)) {
try (ByteArrayOutputStream extractedAttachedMsgOut = new ByteArrayOutputStream()) {
extractedAttachedMsg.writeFilesystem(extractedAttachedMsgOut);
byte[] extratedAttachedMsgRaw = extractedAttachedMsgOut.toByteArray();
MAPIMessage extractedMsgTopLevel = new MAPIMessage(
new ByteArrayInputStream(extratedAttachedMsgRaw));
// test properties of rebuilt embedded message
testFixedAndVariableLengthPropertiesOfAttachedMSG(extractedMsgTopLevel);
}
}
}
}
private void testFixedAndVariableLengthPropertiesOfAttachedMSG(MAPIMessage msg) throws ChunkNotFoundException {
// test fixed length property
msg.setReturnNullOnMissingChunk(true);
Calendar messageDate = msg.getMessageDate();
assertNotNull(messageDate);
Calendar expectedMessageDate = Calendar.getInstance();
expectedMessageDate.set(2010, 05, 17, 23, 52, 19); // 2010/06/17 23:52:19 GMT
expectedMessageDate.setTimeZone(TimeZone.getTimeZone("GMT"));
expectedMessageDate.set(Calendar.MILLISECOND, 0);
assertEquals(expectedMessageDate.getTimeInMillis(), messageDate.getTimeInMillis());
// test variable length property
assertEquals(msg.getSubject(), "Test Attachment");
}
private POIFSFileSystem rebuildFromAttached(MAPIMessage attachedMsg) throws IOException {
// Create new MSG and copy properties.
POIFSFileSystem newDoc = new POIFSFileSystem();
MessagePropertiesChunk topLevelChunk = new MessagePropertiesChunk(null);
// Copy attachments and recipients.
int recipientscount = 0;
int attachmentscount = 0;
for (Entry entry : attachedMsg.getDirectory()) {
if (entry.getName().startsWith(RecipientChunks.PREFIX)) {
recipientscount++;
DirectoryEntry newDir = newDoc.createDirectory(entry.getName());
for (Entry e : ((DirectoryEntry) entry)) {
EntryUtils.copyNodeRecursively(e, newDir);
}
} else if (entry.getName().startsWith(AttachmentChunks.PREFIX)) {
attachmentscount++;
DirectoryEntry newDir = newDoc.createDirectory(entry.getName());
for (Entry e : ((DirectoryEntry) entry)) {
EntryUtils.copyNodeRecursively(e, newDir);
}
}
}
// Copy properties from properties stream.
MessagePropertiesChunk mpc = attachedMsg.getMainChunks().getMessageProperties();
for (Map.Entry<MAPIProperty, PropertyValue> p : mpc.getRawProperties().entrySet()) {
PropertyValue val = p.getValue();
if (!(val instanceof ChunkBasedPropertyValue)) {
// Reverse data.
byte[] bytes = val.getRawValue();
for (int idx = 0; idx < bytes.length / 2; idx++) {
byte xchg = bytes[bytes.length - 1 - idx];
bytes[bytes.length - 1 - idx] = bytes[idx];
bytes[idx] = xchg;
}
MAPIType type = val.getActualType();
if (type != null && type != Types.UNKNOWN) {
topLevelChunk.setProperty(val);
}
}
}
// Create nameid entries.
DirectoryEntry nameid = newDoc.getRoot().createDirectory(NameIdChunks.NAME);
// GUID stream
nameid.createDocument(PropertiesChunk.DEFAULT_NAME_PREFIX + "00020102", new ByteArrayInputStream(new byte[0]));
// Entry stream
nameid.createDocument(PropertiesChunk.DEFAULT_NAME_PREFIX + "00030102", new ByteArrayInputStream(new byte[0]));
// String stream
nameid.createDocument(PropertiesChunk.DEFAULT_NAME_PREFIX + "00040102", new ByteArrayInputStream(new byte[0]));
// Base properties.
// Attachment/Recipient counter.
topLevelChunk.setAttachmentCount(attachmentscount);
topLevelChunk.setRecipientCount(recipientscount);
topLevelChunk.setNextAttachmentId(attachmentscount);
topLevelChunk.setNextRecipientId(recipientscount);
// Unicode string format.
topLevelChunk.setProperty(new PropertyValue(MAPIProperty.STORE_SUPPORT_MASK,
MessagePropertiesChunk.PROPERTIES_FLAG_READABLE | MessagePropertiesChunk.PROPERTIES_FLAG_WRITEABLE,
ByteBuffer.allocate(4).putInt(0x00040000).array()));
topLevelChunk.setProperty(new PropertyValue(MAPIProperty.HASATTACH,
MessagePropertiesChunk.PROPERTIES_FLAG_READABLE | MessagePropertiesChunk.PROPERTIES_FLAG_WRITEABLE,
attachmentscount == 0 ? new byte[] { 0 } : new byte[] { 1 }));
// Copy properties from MSG file system.
for (Chunk chunk : attachedMsg.getMainChunks().getChunks()) {
if (!(chunk instanceof MessagePropertiesChunk)) {
String entryName = chunk.getEntryName();
String entryType = entryName.substring(entryName.length() - 4);
int iType = Integer.parseInt(entryType, 16);
MAPIType type = Types.getById(iType);
if (type != null && type != Types.UNKNOWN) {
MAPIProperty mprop = MAPIProperty.createCustom(chunk.getChunkId(), type, chunk.getEntryName());
ByteArrayOutputStream data = new ByteArrayOutputStream();
chunk.writeValue(data);
PropertyValue pval = new PropertyValue(mprop, MessagePropertiesChunk.PROPERTIES_FLAG_READABLE
| MessagePropertiesChunk.PROPERTIES_FLAG_WRITEABLE, data.toByteArray(), type);
topLevelChunk.setProperty(pval);
}
}
}
topLevelChunk.writeProperties(newDoc.getRoot());
return newDoc;
}
}