github-167 - HSMF enhancements

introduce NameIdChunks.GetPropertyTag:
which enables evaluating property ids from properties identified by name/id in property sets (simple version of IMAPIProp::GetIDsFromNames)

AttachmentChunks.getAttachData:
use new ByteChunkDeferred instead of ByteChunk which enables delayed reading of attachments to avoid all attachments are completely read into memory when parsing which may cause OutOfMemoryErrors on e-mails with big attachments.

POIFSChunkParser:
support reading multi valued chunks (e.g. required when reading the Keywords ("categories") property)

add MAPIProperty.RECEIVED_BY_SMTP_ADDRESS

add unit tests

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1874990 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2020-03-08 23:26:53 +00:00
parent ef90a5f2c8
commit 40f320bcf9
8 changed files with 690 additions and 180 deletions

View File

@ -0,0 +1,100 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.datatypes;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.poi.hsmf.datatypes.Types.MAPIType;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.util.IOUtils;
/**
* A Chunk that either acts as {@link ByteChunk} (if not initialized with a node) or
* lazy loads its binary data from the document (if linked with a node via {@link #readValue(DocumentNode)}).
*/
public class ByteChunkDeferred extends ByteChunk {
private DocumentNode node;
/**
* Creates a Byte Stream Chunk, with the specified type.
*/
public ByteChunkDeferred(String namePrefix, int chunkId, MAPIType type) {
super(namePrefix, chunkId, type);
}
/**
* Links the chunk to a document
* @param node the document node
*/
public void readValue(DocumentNode node) {
this.node = node;
}
public void readValue(InputStream value) throws IOException {
if (node == null) {
super.readValue(value);
}
}
@Override
public void writeValue(OutputStream out) throws IOException {
if (node == null) {
super.writeValue(out);
return;
}
try (DocumentInputStream dis = createDocumentInputStream()) {
IOUtils.copy(dis, out);
}
}
/**
* Get bytes directly.
*/
public byte[] getValue() {
if (node == null) {
return super.getValue();
}
try (DocumentInputStream dis = createDocumentInputStream()) {
return IOUtils.toByteArray(dis, node.getSize());
} catch (IOException e) {
return null;
}
}
/**
* Set bytes directly.
* <p>
* updating the linked document node/msg file directly would be unexpected,
* so we remove the link and act as a ByteChunk from then
*/
public void setValue(byte[] value) {
node = null;
super.setValue(value);
}
private DocumentInputStream createDocumentInputStream() throws IOException {
return ((DirectoryNode) node.getParent()).createDocumentInputStream(node);
}
}

View File

@ -44,7 +44,13 @@ public final class Chunks implements ChunkGroupWithProperties {
* Normally a property will have zero chunks (fixed sized) or one chunk * Normally a property will have zero chunks (fixed sized) or one chunk
* (variable size), but in some cases (eg Unknown) you may get more. * (variable size), but in some cases (eg Unknown) you may get more.
*/ */
private Map<MAPIProperty, List<Chunk>> allChunks = new HashMap<>(); private final Map<MAPIProperty, List<Chunk>> allChunks = new HashMap<>();
/**
* Holds all the unknown properties that were found, indexed by their property id and property type.
* All unknown properties have a custom properties instance.
*/
private final Map<Long, MAPIProperty> unknownProperties = new HashMap<>();
/** Type of message that the MSG represents (ie. IPM.Note) */ /** Type of message that the MSG represents (ie. IPM.Note) */
private StringChunk messageClass; private StringChunk messageClass;
@ -188,6 +194,14 @@ public final class Chunks implements ChunkGroupWithProperties {
public void record(Chunk chunk) { public void record(Chunk chunk) {
// Work out what MAPIProperty this corresponds to // Work out what MAPIProperty this corresponds to
MAPIProperty prop = MAPIProperty.get(chunk.getChunkId()); MAPIProperty prop = MAPIProperty.get(chunk.getChunkId());
if (prop == MAPIProperty.UNKNOWN) {
long id = (chunk.getChunkId() << 16) + chunk.getType().getId();
prop = unknownProperties.get(id);
if (prop == null) {
prop = MAPIProperty.createCustom(chunk.getChunkId(), chunk.getType(), chunk.getEntryName());
unknownProperties.put(id, prop);
}
}
// Assign it for easy lookup, as best we can // Assign it for easy lookup, as best we can
if (prop == MAPIProperty.MESSAGE_CLASS) { if (prop == MAPIProperty.MESSAGE_CLASS) {

View File

@ -43,6 +43,7 @@ import org.apache.poi.hsmf.datatypes.Types.MAPIType;
* https://msdn.microsoft.com/en-us/library/microsoft.exchange.data.contenttypes.tnef.tnefpropertyid(v=exchg.150).aspx * https://msdn.microsoft.com/en-us/library/microsoft.exchange.data.contenttypes.tnef.tnefpropertyid(v=exchg.150).aspx
* http://msdn.microsoft.com/en-us/library/ms526356%28v=exchg.10%29.aspx * http://msdn.microsoft.com/en-us/library/ms526356%28v=exchg.10%29.aspx
*/ */
@SuppressWarnings("unused")
public class MAPIProperty { public class MAPIProperty {
private static Map<Integer, MAPIProperty> attributes = new HashMap<>(); private static Map<Integer, MAPIProperty> attributes = new HashMap<>();
@ -790,6 +791,8 @@ public class MAPIProperty {
new MAPIProperty(0x3f, BINARY, "ReceivedByEntryId", "PR_RECEIVED_BY_ENTRYID"); new MAPIProperty(0x3f, BINARY, "ReceivedByEntryId", "PR_RECEIVED_BY_ENTRYID");
public static final MAPIProperty RECEIVED_BY_NAME = public static final MAPIProperty RECEIVED_BY_NAME =
new MAPIProperty(0x40, ASCII_STRING, "ReceivedByName", "PR_RECEIVED_BY_NAME"); new MAPIProperty(0x40, ASCII_STRING, "ReceivedByName", "PR_RECEIVED_BY_NAME");
public static final MAPIProperty RECEIVED_BY_SMTP_ADDRESS =
new MAPIProperty(0x5D07, ASCII_STRING, "ReceivedBySmtpAddress", "PR_RECEIVED_BY_SMTP_ADDRESS");
public static final MAPIProperty RECIPIENT_DISPLAY_NAME = public static final MAPIProperty RECIPIENT_DISPLAY_NAME =
new MAPIProperty(0x5ff6, Types.UNICODE_STRING, "RecipientDisplayName", null); new MAPIProperty(0x5ff6, Types.UNICODE_STRING, "RecipientDisplayName", null);
public static final MAPIProperty RECIPIENT_ENTRY_ID = public static final MAPIProperty RECIPIENT_ENTRY_ID =
@ -1095,7 +1098,7 @@ public class MAPIProperty {
return new CustomMAPIProperty(id, type, name, null); return new CustomMAPIProperty(id, type, name, null);
} }
private static class CustomMAPIProperty extends MAPIProperty { private static final class CustomMAPIProperty extends MAPIProperty {
private CustomMAPIProperty(int id, MAPIType usualType, String name, String mapiProperty) { private CustomMAPIProperty(int id, MAPIType usualType, String name, String mapiProperty) {
super(id, usualType, name, mapiProperty); super(id, usualType, name, mapiProperty);
} }

View File

@ -19,6 +19,14 @@ package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.function.Consumer;
import org.apache.commons.codec.digest.PureJavaCrc32;
import org.apache.poi.hpsf.ClassID;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayInputStream;
import org.apache.poi.util.StringUtil;
/** /**
* Collection of convenience chunks for the NameID part of an outlook file * Collection of convenience chunks for the NameID part of an outlook file
@ -26,6 +34,43 @@ import java.util.List;
public final class NameIdChunks implements ChunkGroup { public final class NameIdChunks implements ChunkGroup {
public static final String NAME = "__nameid_version1.0"; public static final String NAME = "__nameid_version1.0";
public enum PropertySetType {
PS_MAPI("00020328-0000-0000-C000-000000000046"),
PS_PUBLIC_STRINGS("00020329-0000-0000-C000-000000000046"),
PS_INTERNET_HEADERS("00020386-0000-0000-C000-000000000046");
public ClassID classID;
PropertySetType(String uuid) {
classID = new ClassID(uuid);
}
}
public enum PredefinedPropertySet {
PSETID_COMMON("00062008-0000-0000-C000-000000000046"),
PSETID_ADDRESS("00062004-0000-0000-C000-000000000046"),
PSETID_APPOINTMENT("00062002-0000-0000-C000-000000000046"),
PSETID_MEETING("6ED8DA90-450B-101B-98DA-00AA003F1305"),
PSETID_LOG("0006200A-0000-0000-C000-000000000046"),
PSETID_MESSAGING("41F28F13-83F4-4114-A584-EEDB5A6B0BFF"),
PSETID_NOTE("0006200E-0000-0000-C000-000000000046"),
PSETID_POST_RSS("00062041-0000-0000-C000-000000000046"),
PSETID_TASK("00062003-0000-0000-C000-000000000046"),
PSETID_UNIFIED_MESSAGING("4442858E-A9E3-4E80-B900-317A210CC15B"),
PSETID_AIR_SYNC("71035549-0739-4DCB-9163-00F0580DBBDF"),
PSETID_SHARING("00062040-0000-0000-C000-000000000046"),
PSETID_XML_EXTRACTED_ENTITIES("23239608-685D-4732-9C55-4C95CB4E8E33"),
PSETID_ATTACHMENT("96357F7F-59E1-47D0-99A7-46515C183B54");
public ClassID classID;
PredefinedPropertySet(String uuid) {
classID = new ClassID(uuid);
}
}
private ByteChunk guidStream;
private ByteChunk entryStream;
private ByteChunk stringStream;
/** Holds all the chunks that were found. */ /** Holds all the chunks that were found. */
private List<Chunk> allChunks = new ArrayList<>(); private List<Chunk> allChunks = new ArrayList<>();
@ -43,6 +88,19 @@ public final class NameIdChunks implements ChunkGroup {
*/ */
@Override @Override
public void record(Chunk chunk) { public void record(Chunk chunk) {
if (chunk.getType() == Types.BINARY) {
switch (chunk.getChunkId()) {
case 2:
guidStream = (ByteChunk)chunk;
break;
case 3:
entryStream = (ByteChunk)chunk;
break;
case 4:
stringStream = (ByteChunk)chunk;
break;
}
}
allChunks.add(chunk); allChunks.add(chunk);
} }
@ -54,4 +112,165 @@ public final class NameIdChunks implements ChunkGroup {
// Currently, we don't need to do anything special once // Currently, we don't need to do anything special once
// all the chunks have been located // all the chunks have been located
} }
/**
* Get property tag id by property set GUID and string name or numerical name from named properties mapping
* @param guid Property set GUID in registry format without brackets.
* May be one of the PS_* or PSETID_* constants
* @param name Property name in case of string named property
* @param id Property id in case of numerical named property
* @return Property tag which can be matched with {@link org.apache.poi.hsmf.datatypes.MAPIProperty#id}
* or 0 if the property could not be found.
*
*/
public long getPropertyTag(ClassID guid, String name, long id) {
final byte[] entryStreamBytes = (entryStream == null) ? null : entryStream.getValue();
if (guidStream == null || entryStream == null || stringStream == null || guid == null ||
entryStreamBytes == null) {
return 0;
}
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(entryStreamBytes);
for (int i = 0; i < entryStreamBytes.length / 8; i++) {
final long nameOffset = leis.readUInt();
int guidIndex = leis.readUShort();
final int propertyKind = guidIndex & 0x01;
guidIndex = guidIndex >>> 1;
final int propertyIndex = leis.readUShort();
// fetch and match property GUID
if (!guid.equals(getPropertyGUID(guidIndex))) {
continue;
}
// fetch property name / stream ID
final String[] propertyName = { null };
final long[] propertyNameCRC32 = { -1L };
long streamID = getStreamID(propertyKind, (int)nameOffset, guid, guidIndex,
n -> propertyName[0] = n, c -> propertyNameCRC32[0] = c);
if (!matchesProperty(propertyKind, nameOffset, name, propertyName[0], id)) {
continue;
}
// find property index in matching stream entry
if (propertyKind == 1 && propertyNameCRC32[0] < 0) {
// skip stream entry matching and return tag from property index from entry stream
// this code should not be reached
return 0x8000 + propertyIndex;
}
return getPropertyTag(streamID, nameOffset, propertyNameCRC32[0]);
}
return 0;
}
private long getPropertyTag(long streamID, long nameOffset, long propertyNameCRC32) {
for (Chunk chunk : allChunks) {
if (chunk.getType() != Types.BINARY || chunk.getChunkId() != streamID) {
continue;
}
byte[] matchChunkBytes = ((ByteChunk) chunk).getValue();
if (matchChunkBytes == null) {
continue;
}
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(matchChunkBytes);
for (int m = 0; m < matchChunkBytes.length / 8; m++) {
long nameCRC = leis.readUInt();
int matchGuidIndex = leis.readUShort();
int matchPropertyIndex = leis.readUShort();
int matchPropertyKind = matchGuidIndex & 0x01;
if (nameCRC == (matchPropertyKind == 0 ? nameOffset : propertyNameCRC32)) {
return 0x8000 + matchPropertyIndex;
}
}
}
return 0;
}
private ClassID getPropertyGUID(int guidIndex) {
if (guidIndex == 1) {
// predefined GUID
return PropertySetType.PS_MAPI.classID;
} else if (guidIndex == 2) {
// predefined GUID
return PropertySetType.PS_PUBLIC_STRINGS.classID;
} else if (guidIndex >= 3) {
// GUID from guid stream
byte[] guidStreamBytes = guidStream.getValue();
int guidIndexOffset = (guidIndex - 3) * 0x10;
if (guidStreamBytes.length >= guidIndexOffset + 0x10) {
return new ClassID(guidStreamBytes, guidIndexOffset);
}
}
return null;
}
// property set GUID matches
private static boolean matchesProperty(int propertyKind, long nameOffset, String name, String propertyName, long id) {
return
// match property by id
(propertyKind == 0 && id >= 0 && id == nameOffset) ||
// match property by name
(propertyKind == 1 && name != null && name.equals(propertyName));
}
private long getStreamID(int propertyKind, int nameOffset, ClassID guid, int guidIndex,
Consumer<String> propertyNameSetter, Consumer<Long> propertyNameCRC32Setter) {
if (propertyKind == 0) {
// numerical named property
return 0x1000 + (nameOffset ^ (guidIndex << 1)) % 0x1F;
}
// string named property
byte[] stringBytes = stringStream.getValue();
long propertyNameCRC32 = -1;
if (stringBytes.length > nameOffset) {
long nameLength = LittleEndian.getUInt(stringBytes, nameOffset);
if (stringBytes.length >= nameOffset + 4 + nameLength) {
int nameStart = nameOffset + 4;
String propertyName = new String(stringBytes, nameStart, (int) nameLength, StringUtil.UTF16LE);
if (PropertySetType.PS_INTERNET_HEADERS.classID.equals(guid)) {
byte[] n = propertyName.toLowerCase(Locale.ROOT).getBytes(StringUtil.UTF16LE);
propertyNameCRC32 = calculateCRC32(n, 0, n.length);
} else {
propertyNameCRC32 = calculateCRC32(stringBytes, nameStart, (int)nameLength);
}
propertyNameSetter.accept(propertyName);
propertyNameCRC32Setter.accept(propertyNameCRC32);
}
}
return 0x1000 + (propertyNameCRC32 ^ ((guidIndex << 1) | 1)) % 0x1F;
}
/**
* Calculates the CRC32 of the given bytes (conforms to RFC 1510, SSH-1).
* The CRC32 calculation is similar to the standard one as demonstrated in RFC 1952,
* but with the inversion (before and after the calculation) omitted.
* <ul>
* <li>poly: 0x04C11DB7</li>
* <li>init: 0x00000000</li>
* <li>xor: 0x00000000</li>
* <li>revin: true</li>
* <li>revout: true</li>
* <li>check: 0x2DFD2D88 (CRC32 of "123456789")</li>
* </ul>
*
* @param buf the byte array to calculate CRC32 on
* @param off the offset within buf at which the CRC32 calculation will start
* @param len the number of bytes on which to calculate the CRC32
* @return the CRC32 value (unsigned 32-bit integer stored in a long).
*
* @see <a href="http://www.zorc.breitbandkatze.de/crc.html">CRC parameter check</a>
*/
private static long calculateCRC32(byte[] buf, int off, int len) {
PureJavaCrc32 crc = new PureJavaCrc32();
// set initial crc value to 0
crc.update( new byte[] {-1,-1,-1,-1}, 0, 4);
crc.update(buf, off, len);
return ~crc.getValue() & 0xFFFFFFFFL;
}
} }

View File

@ -18,10 +18,15 @@
package org.apache.poi.hsmf.parsers; package org.apache.poi.hsmf.parsers;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.ByteChunk; import org.apache.poi.hsmf.datatypes.ByteChunk;
import org.apache.poi.hsmf.datatypes.ByteChunkDeferred;
import org.apache.poi.hsmf.datatypes.Chunk; import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.ChunkGroup; import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks; import org.apache.poi.hsmf.datatypes.Chunks;
@ -50,171 +55,248 @@ import org.apache.poi.util.POILogger;
* data and so on. * data and so on.
*/ */
public final class POIFSChunkParser { public final class POIFSChunkParser {
private final static POILogger logger = POILogFactory.getLogger(POIFSChunkParser.class); private static final POILogger LOG = POILogFactory.getLogger(POIFSChunkParser.class);
public static ChunkGroup[] parse(POIFSFileSystem fs) throws IOException { private POIFSChunkParser() {}
return parse(fs.getRoot());
}
public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
Chunks mainChunks = new Chunks();
ArrayList<ChunkGroup> groups = new ArrayList<>(); public static ChunkGroup[] parse(POIFSFileSystem fs) {
groups.add(mainChunks); return parse(fs.getRoot());
}
// Find our top level children public static ChunkGroup[] parse(DirectoryNode node) {
// Note - we don't handle children of children yet, as Chunks mainChunks = new Chunks();
// there doesn't seem to be any use of that in Outlook
for(Entry entry : node) {
if(entry instanceof DirectoryNode) {
DirectoryNode dir = (DirectoryNode)entry;
ChunkGroup group = null;
// Do we know what to do with it? ArrayList<ChunkGroup> groups = new ArrayList<>();
if(dir.getName().startsWith(AttachmentChunks.PREFIX)) { groups.add(mainChunks);
group = new AttachmentChunks(dir.getName());
// Find our top level children
// Note - we don't handle children of children yet, as
// there doesn't seem to be any use of that in Outlook
for (Entry entry : node) {
if (entry instanceof DirectoryNode) {
DirectoryNode dir = (DirectoryNode) entry;
ChunkGroup group = null;
// Do we know what to do with it?
if (dir.getName().startsWith(AttachmentChunks.PREFIX)) {
group = new AttachmentChunks(dir.getName());
}
if (dir.getName().startsWith(NameIdChunks.NAME)) {
group = new NameIdChunks();
}
if (dir.getName().startsWith(RecipientChunks.PREFIX)) {
group = new RecipientChunks(dir.getName());
}
if (group != null) {
processChunks(dir, group);
groups.add(group);
}
} }
if(dir.getName().startsWith(NameIdChunks.NAME)) { }
group = new NameIdChunks();
// Now do the top level chunks
processChunks(node, mainChunks);
// All chunks are now processed, have the ChunkGroup
// match up variable-length properties and their chunks
for (ChunkGroup group : groups) {
group.chunksComplete();
}
// Finish
return groups.toArray(new ChunkGroup[0]);
}
/**
* Creates all the chunks for a given Directory, but
* doesn't recurse or descend
*/
private static void processChunks(DirectoryNode node, ChunkGroup grouping) {
final Map<Integer, MultiChunk> multiChunks = new TreeMap<>();
for (Entry entry : node) {
if (entry instanceof DocumentNode ||
(entry instanceof DirectoryNode && entry.getName().endsWith(Types.DIRECTORY.asFileEnding()))) {
process(entry, grouping, multiChunks);
} }
if(dir.getName().startsWith(RecipientChunks.PREFIX)) { }
group = new RecipientChunks(dir.getName());
// Finish up variable length multivalued properties
multiChunks.entrySet().stream()
.flatMap(me -> me.getValue().getChunks().values().stream())
.filter(Objects::nonNull)
.forEach(grouping::record);
}
/**
* Creates a chunk, and gives it to its parent group
*/
private static void process(Entry entry, ChunkGroup grouping, Map<Integer, MultiChunk> multiChunks) {
final String entryName = entry.getName();
boolean[] isMultiValued = { false };
// Is it a properties chunk? (They have special names)
Chunk chunk = (PropertiesChunk.NAME.equals(entryName))
? readPropertiesChunk(grouping, entry)
: readPrimitiveChunk(entry, isMultiValued, multiChunks);
if (chunk == null) {
return;
}
if (entry instanceof DocumentNode) {
try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) {
chunk.readValue(inp);
} catch (IOException e) {
LOG.log(POILogger.ERROR, "Error reading from part " + entry.getName(), e);
} }
}
if(group != null) { if (!isMultiValued[0]) {
processChunks(dir, group); // multi value chunks will be grouped later, in the correct order
groups.add(group); grouping.record(chunk);
} else { }
// Unknown directory, skip silently }
}
}
}
// Now do the top level chunks private static Chunk readPropertiesChunk(ChunkGroup grouping, Entry entry) {
processChunks(node, mainChunks); if (grouping instanceof Chunks) {
// All chunks are now processed, have the ChunkGroup
// match up variable-length properties and their chunks
for (ChunkGroup group : groups) {
group.chunksComplete();
}
// Finish
return groups.toArray(new ChunkGroup[0]);
}
/**
* Creates all the chunks for a given Directory, but
* doesn't recurse or descend
*/
protected static void processChunks(DirectoryNode node, ChunkGroup grouping) {
for(Entry entry : node) {
if(entry instanceof DocumentNode) {
process(entry, grouping);
} else if(entry instanceof DirectoryNode) {
if(entry.getName().endsWith(Types.DIRECTORY.asFileEnding())) {
process(entry, grouping);
}
}
}
}
/**
* Creates a chunk, and gives it to its parent group
*/
protected static void process(Entry entry, ChunkGroup grouping) {
String entryName = entry.getName();
Chunk chunk = null;
// Is it a properties chunk? (They have special names)
if (entryName.equals(PropertiesChunk.NAME)) {
if (grouping instanceof Chunks) {
// These should be the properties for the message itself // These should be the properties for the message itself
chunk = new MessagePropertiesChunk(grouping, boolean isEmbedded = entry.getParent() != null && entry.getParent().getParent() != null;
entry.getParent() != null && entry.getParent().getParent() != null); return new MessagePropertiesChunk(grouping, isEmbedded);
} else { } else {
// Will be properties on an attachment or recipient // Will be properties on an attachment or recipient
chunk = new StoragePropertiesChunk(grouping); return new StoragePropertiesChunk(grouping);
} }
} else { }
// Check it's a regular chunk
if(entryName.length() < 9) {
// Name in the wrong format
return;
}
if(! entryName.contains("_")) {
// Name in the wrong format
return;
}
// Split it into its parts private static Chunk readPrimitiveChunk(Entry entry, boolean[] isMultiValue, Map<Integer, MultiChunk> multiChunks) {
int splitAt = entryName.lastIndexOf('_'); final String entryName = entry.getName();
String namePrefix = entryName.substring(0, splitAt+1); final int splitAt = entryName.lastIndexOf('_');
String ids = entryName.substring(splitAt+1);
// Make sure we got what we expected, should be of // Check it's a regular chunk
// the form __<name>_<id><type> if (entryName.length() < 9 || splitAt == -1) {
if(namePrefix.equals("Olk10SideProps") || // Name in the wrong format
namePrefix.equals("Olk10SideProps_")) { return null;
}
// Split it into its parts
final String namePrefix = entryName.substring(0, splitAt + 1);
final String ids = entryName.substring(splitAt + 1);
// Make sure we got what we expected, should be of
// the form __<name>_<id><type>
if (namePrefix.equals("Olk10SideProps") || namePrefix.equals("Olk10SideProps_")) {
// This is some odd Outlook 2002 thing, skip // This is some odd Outlook 2002 thing, skip
return; return null;
} else if(splitAt <= entryName.length()-8) { } else if (splitAt > entryName.length() - 8) {
// In the right form for a normal chunk
// We'll process this further in a little bit
} else {
// Underscores not the right place, something's wrong // Underscores not the right place, something's wrong
throw new IllegalArgumentException("Invalid chunk name " + entryName); throw new IllegalArgumentException("Invalid chunk name " + entryName);
} }
// Now try to turn it into id + type // Now try to turn it into id + type
try { final int chunkId, typeId;
int chunkId = Integer.parseInt(ids.substring(0, 4), 16); try {
int typeId = Integer.parseInt(ids.substring(4, 8), 16); chunkId = Integer.parseInt(ids.substring(0, 4), 16);
int tid = Integer.parseInt(ids.substring(4, 8), 16);
MAPIType type = Types.getById(typeId); isMultiValue[0] = (tid & Types.MULTIVALUED_FLAG) != 0;
if (type == null) { typeId = tid & ~Types.MULTIVALUED_FLAG;
type = Types.createCustom(typeId); } catch (NumberFormatException e) {
}
// Special cases based on the ID
if(chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
chunk = new MessageSubmissionChunk(namePrefix, chunkId, type);
}
else {
// Nothing special about this ID
// So, do the usual thing which is by type
if (type == Types.BINARY) {
chunk = new ByteChunk(namePrefix, chunkId, type);
}
else if (type == Types.DIRECTORY) {
if(entry instanceof DirectoryNode) {
chunk = new DirectoryChunk((DirectoryNode)entry, namePrefix, chunkId, type);
}
}
else if (type == Types.ASCII_STRING ||
type == Types.UNICODE_STRING) {
chunk = new StringChunk(namePrefix, chunkId, type);
}
else {
// Type of an unsupported type! Skipping...
}
}
} catch(NumberFormatException e) {
// Name in the wrong format // Name in the wrong format
return; return null;
} }
}
if(chunk != null) { MAPIType type = Types.getById(typeId);
if(entry instanceof DocumentNode) { if (type == null) {
try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) { type = Types.createCustom(typeId);
chunk.readValue(inp); }
grouping.record(chunk);
} catch (IOException e) { // Special cases based on the ID
logger.log(POILogger.ERROR, "Error reading from part " + entry.getName() + " - " + e); if (chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
} return new MessageSubmissionChunk(namePrefix, chunkId, type);
} else { } else if (type == Types.BINARY && chunkId == MAPIProperty.ATTACH_DATA.id) {
grouping.record(chunk); ByteChunkDeferred bcd = new ByteChunkDeferred(namePrefix, chunkId, type);
} if (entry instanceof DocumentNode) {
} bcd.readValue((DocumentNode) entry);
} }
return bcd;
} else {
// Nothing special about this ID
// So, do the usual thing which is by type
if (isMultiValue[0]) {
return readMultiValue(namePrefix, ids, chunkId, entry, type, multiChunks);
} else {
if (type == Types.DIRECTORY && entry instanceof DirectoryNode) {
return new DirectoryChunk((DirectoryNode) entry, namePrefix, chunkId, type);
} else if (type == Types.BINARY) {
return new ByteChunk(namePrefix, chunkId, type);
} else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
return new StringChunk(namePrefix, chunkId, type);
}
// Type of an unsupported type! Skipping...
LOG.log(POILogger.WARN, "UNSUPPORTED PROP TYPE " + entryName);
return null;
}
}
}
private static Chunk readMultiValue(String namePrefix, String ids, int chunkId, Entry entry, MAPIType type,
Map<Integer, MultiChunk> multiChunks) {
long multiValueIdx = -1;
if (ids.contains("-")) {
String mvidxstr = ids.substring(ids.lastIndexOf('-') + 1);
try {
multiValueIdx = Long.parseLong(mvidxstr) & 0xFFFFFFFFL;
} catch (NumberFormatException ignore) {
LOG.log(POILogger.WARN, "Can't read multi value idx from entry " + entry.getName());
}
}
final MultiChunk mc = multiChunks.computeIfAbsent(chunkId, k -> new MultiChunk());
if (multiValueIdx == -1) {
return new ByteChunk(chunkId, Types.BINARY) {
@Override
public void readValue(InputStream value) throws IOException {
super.readValue(value);
mc.setLength(getValue().length / 4);
}
};
} else {
final Chunk chunk;
if (type == Types.BINARY) {
chunk = new ByteChunk(namePrefix, chunkId, type);
} else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
chunk = new StringChunk(namePrefix, chunkId, type);
} else {
// Type of an unsupported multivalued type! Skipping...
LOG.log(POILogger.WARN, "Unsupported multivalued prop type for entry " + entry.getName());
return null;
}
mc.addChunk((int) multiValueIdx, chunk);
return chunk;
}
}
private static class MultiChunk {
private int length = -1;
private final Map<Integer,Chunk> chunks = new TreeMap<>();
@SuppressWarnings("unused")
int getLength() {
return length;
}
void setLength(int length) {
this.length = length;
}
void addChunk(int multiValueIdx, Chunk value) {
chunks.put(multiValueIdx, value);
}
Map<Integer, Chunk> getChunks() {
return chunks;
}
}
} }

View File

@ -18,19 +18,18 @@
package org.apache.poi.hsmf; package org.apache.poi.hsmf;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.hsmf.datatypes.AttachmentChunks; import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException; import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/** /**
* Tests to verify that we can read attachments from msg file * Tests to verify that we can read attachments from msg file
@ -42,8 +41,6 @@ public class TestFileWithAttachmentsRead {
/** /**
* Initialize this test, load up the attachment_test_msg.msg mapi message. * Initialize this test, load up the attachment_test_msg.msg mapi message.
*
* @throws Exception
*/ */
@BeforeClass @BeforeClass
public static void setUp() throws IOException { public static void setUp() throws IOException {
@ -62,9 +59,6 @@ public class TestFileWithAttachmentsRead {
/** /**
* Test to see if we can retrieve attachments. * Test to see if we can retrieve attachments.
*
* @throws ChunkNotFoundException
*
*/ */
@Test @Test
public void testRetrieveAttachments() { public void testRetrieveAttachments() {
@ -134,14 +128,22 @@ public class TestFileWithAttachmentsRead {
assertEquals("test-unicode.doc", attachment.getAttachLongFileName().getValue()); assertEquals("test-unicode.doc", attachment.getAttachLongFileName().getValue());
assertEquals(".doc", attachment.getAttachExtension().getValue()); assertEquals(".doc", attachment.getAttachExtension().getValue());
assertNull(attachment.getAttachMimeTag()); assertNull(attachment.getAttachMimeTag());
assertEquals(24064, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data ByteArrayOutputStream attachmentstream = new ByteArrayOutputStream();
attachment.getAttachData().writeValue(attachmentstream);
assertEquals(24064, attachmentstream.size());
// or compare the hashes of the attachment data
assertEquals(24064, attachment.getAttachData().getValue().length);
attachment = twoSimpleAttachments.getAttachmentFiles()[1]; attachment = twoSimpleAttachments.getAttachmentFiles()[1];
assertEquals("pj1.txt", attachment.getAttachFileName().getValue()); assertEquals("pj1.txt", attachment.getAttachFileName().getValue());
assertEquals("pj1.txt", attachment.getAttachLongFileName().getValue()); assertEquals("pj1.txt", attachment.getAttachLongFileName().getValue());
assertEquals(".txt", attachment.getAttachExtension().getValue()); assertEquals(".txt", attachment.getAttachExtension().getValue());
assertNull(attachment.getAttachMimeTag()); assertNull(attachment.getAttachMimeTag());
assertEquals(89, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data // or compare the hashes of the attachment data
assertEquals(89, attachment.getAttachData().getValue().length);
attachmentstream = new ByteArrayOutputStream();
attachment.getAttachData().writeValue(attachmentstream);
assertEquals(89, attachmentstream.size());
} }
/** /**
@ -161,7 +163,8 @@ public class TestFileWithAttachmentsRead {
assertEquals(".pdf", attachment.getAttachExtension().getValue()); assertEquals(".pdf", attachment.getAttachExtension().getValue());
assertNull(attachment.getAttachMimeTag()); assertNull(attachment.getAttachMimeTag());
assertNull(attachment.getAttachmentDirectory()); assertNull(attachment.getAttachmentDirectory());
assertEquals(13539, attachment.getAttachData().getValue().length); //or compare the hashes of the attachment data //or compare the hashes of the attachment data
assertEquals(13539, attachment.getAttachData().getValue().length);
// First in a nested message // First in a nested message
attachment = pdfMsgAttachments.getAttachmentFiles()[0]; attachment = pdfMsgAttachments.getAttachmentFiles()[0];

View File

@ -0,0 +1,89 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf;
import static org.apache.poi.hsmf.datatypes.NameIdChunks.PredefinedPropertySet.PSETID_COMMON;
import static org.apache.poi.hsmf.datatypes.NameIdChunks.PropertySetType.PS_PUBLIC_STRINGS;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Tests to verify that we can read properties identified by name or id in property sets.
*/
public class TestNameIdChunks {
private static MAPIMessage keywordsMsg;
/**
* Initialize this test, load up the keywords.msg mapi message.
*/
@BeforeClass
public static void setUp() throws IOException {
POIDataSamples samples = POIDataSamples.getHSMFInstance();
try (InputStream is = samples.openResourceAsStream("keywords.msg")) {
keywordsMsg = new MAPIMessage(is);
}
}
@AfterClass
public static void tearDown() throws IOException {
keywordsMsg.close();
}
/**
* Test to see if we can read the keywords list from the msg.
* The keywords property is a property identified by the name "Keywords" in the property set PS_PUBLIC_STRINGS.
*/
@Test
public void testReadKeywords() {
long keywordsPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PS_PUBLIC_STRINGS.classID, "Keywords", 0);
assertEquals(0x8003, keywordsPropTag);
String[] exp = { "TODO", "Currently Important", "Currently To Do", "Test" };
String[] act = getValues(keywordsPropTag);
assertArrayEquals(exp, act);
}
/**
* Test to see if we can read the current version name from the msg.
* The current version name property is a property identified by the id 0x8554 in the property set PSETID_Common.
*/
@Test
public void testCurrentVersionName() {
long testPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PSETID_COMMON.classID, null, 0x8554);
assertEquals(0x8006, testPropTag);
String[] exp = { "16.0" };
String[] act = getValues(testPropTag);
assertArrayEquals(exp, act);
}
private String[] getValues(long tag) {
return keywordsMsg.getMainChunks().getAll().entrySet().stream()
.filter(me -> me.getKey().id == tag)
.flatMap(me -> me.getValue().stream())
.map(c -> ((StringChunk)c).getValue())
.toArray(String[]::new);
}
}

BIN
test-data/hsmf/keywords.msg Normal file

Binary file not shown.