mirror of https://github.com/apache/poi.git
github-167 - HSMF enhancements
introduce NameIdChunks.GetPropertyTag: which enables evaluating property ids from properties identified by name/id in property sets (simple version of IMAPIProp::GetIDsFromNames) AttachmentChunks.getAttachData: use new ByteChunkDeferred instead of ByteChunk which enables delayed reading of attachments to avoid all attachments are completely read into memory when parsing which may cause OutOfMemoryErrors on e-mails with big attachments. POIFSChunkParser: support reading multi valued chunks (e.g. required when reading the Keywords ("categories") property) add MAPIProperty.RECEIVED_BY_SMTP_ADDRESS add unit tests git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1874990 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ef90a5f2c8
commit
40f320bcf9
|
@ -0,0 +1,100 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hsmf.datatypes;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.poi.hsmf.datatypes.Types.MAPIType;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
||||
import org.apache.poi.poifs.filesystem.DocumentNode;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
||||
/**
|
||||
* A Chunk that either acts as {@link ByteChunk} (if not initialized with a node) or
|
||||
* lazy loads its binary data from the document (if linked with a node via {@link #readValue(DocumentNode)}).
|
||||
*/
|
||||
public class ByteChunkDeferred extends ByteChunk {
|
||||
|
||||
private DocumentNode node;
|
||||
|
||||
/**
|
||||
* Creates a Byte Stream Chunk, with the specified type.
|
||||
*/
|
||||
public ByteChunkDeferred(String namePrefix, int chunkId, MAPIType type) {
|
||||
super(namePrefix, chunkId, type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Links the chunk to a document
|
||||
* @param node the document node
|
||||
*/
|
||||
public void readValue(DocumentNode node) {
|
||||
this.node = node;
|
||||
}
|
||||
|
||||
public void readValue(InputStream value) throws IOException {
|
||||
if (node == null) {
|
||||
super.readValue(value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeValue(OutputStream out) throws IOException {
|
||||
if (node == null) {
|
||||
super.writeValue(out);
|
||||
return;
|
||||
}
|
||||
|
||||
try (DocumentInputStream dis = createDocumentInputStream()) {
|
||||
IOUtils.copy(dis, out);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get bytes directly.
|
||||
*/
|
||||
public byte[] getValue() {
|
||||
if (node == null) {
|
||||
return super.getValue();
|
||||
}
|
||||
|
||||
try (DocumentInputStream dis = createDocumentInputStream()) {
|
||||
return IOUtils.toByteArray(dis, node.getSize());
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set bytes directly.
|
||||
* <p>
|
||||
* updating the linked document node/msg file directly would be unexpected,
|
||||
* so we remove the link and act as a ByteChunk from then
|
||||
*/
|
||||
public void setValue(byte[] value) {
|
||||
node = null;
|
||||
super.setValue(value);
|
||||
}
|
||||
|
||||
private DocumentInputStream createDocumentInputStream() throws IOException {
|
||||
return ((DirectoryNode) node.getParent()).createDocumentInputStream(node);
|
||||
}
|
||||
}
|
|
@ -44,7 +44,13 @@ public final class Chunks implements ChunkGroupWithProperties {
|
|||
* Normally a property will have zero chunks (fixed sized) or one chunk
|
||||
* (variable size), but in some cases (eg Unknown) you may get more.
|
||||
*/
|
||||
private Map<MAPIProperty, List<Chunk>> allChunks = new HashMap<>();
|
||||
private final Map<MAPIProperty, List<Chunk>> allChunks = new HashMap<>();
|
||||
|
||||
/**
|
||||
* Holds all the unknown properties that were found, indexed by their property id and property type.
|
||||
* All unknown properties have a custom properties instance.
|
||||
*/
|
||||
private final Map<Long, MAPIProperty> unknownProperties = new HashMap<>();
|
||||
|
||||
/** Type of message that the MSG represents (ie. IPM.Note) */
|
||||
private StringChunk messageClass;
|
||||
|
@ -188,6 +194,14 @@ public final class Chunks implements ChunkGroupWithProperties {
|
|||
public void record(Chunk chunk) {
|
||||
// Work out what MAPIProperty this corresponds to
|
||||
MAPIProperty prop = MAPIProperty.get(chunk.getChunkId());
|
||||
if (prop == MAPIProperty.UNKNOWN) {
|
||||
long id = (chunk.getChunkId() << 16) + chunk.getType().getId();
|
||||
prop = unknownProperties.get(id);
|
||||
if (prop == null) {
|
||||
prop = MAPIProperty.createCustom(chunk.getChunkId(), chunk.getType(), chunk.getEntryName());
|
||||
unknownProperties.put(id, prop);
|
||||
}
|
||||
}
|
||||
|
||||
// Assign it for easy lookup, as best we can
|
||||
if (prop == MAPIProperty.MESSAGE_CLASS) {
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.poi.hsmf.datatypes.Types.MAPIType;
|
|||
* https://msdn.microsoft.com/en-us/library/microsoft.exchange.data.contenttypes.tnef.tnefpropertyid(v=exchg.150).aspx
|
||||
* http://msdn.microsoft.com/en-us/library/ms526356%28v=exchg.10%29.aspx
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class MAPIProperty {
|
||||
private static Map<Integer, MAPIProperty> attributes = new HashMap<>();
|
||||
|
||||
|
@ -790,6 +791,8 @@ public class MAPIProperty {
|
|||
new MAPIProperty(0x3f, BINARY, "ReceivedByEntryId", "PR_RECEIVED_BY_ENTRYID");
|
||||
public static final MAPIProperty RECEIVED_BY_NAME =
|
||||
new MAPIProperty(0x40, ASCII_STRING, "ReceivedByName", "PR_RECEIVED_BY_NAME");
|
||||
public static final MAPIProperty RECEIVED_BY_SMTP_ADDRESS =
|
||||
new MAPIProperty(0x5D07, ASCII_STRING, "ReceivedBySmtpAddress", "PR_RECEIVED_BY_SMTP_ADDRESS");
|
||||
public static final MAPIProperty RECIPIENT_DISPLAY_NAME =
|
||||
new MAPIProperty(0x5ff6, Types.UNICODE_STRING, "RecipientDisplayName", null);
|
||||
public static final MAPIProperty RECIPIENT_ENTRY_ID =
|
||||
|
@ -1095,7 +1098,7 @@ public class MAPIProperty {
|
|||
return new CustomMAPIProperty(id, type, name, null);
|
||||
}
|
||||
|
||||
private static class CustomMAPIProperty extends MAPIProperty {
|
||||
private static final class CustomMAPIProperty extends MAPIProperty {
|
||||
private CustomMAPIProperty(int id, MAPIType usualType, String name, String mapiProperty) {
|
||||
super(id, usualType, name, mapiProperty);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,14 @@ package org.apache.poi.hsmf.datatypes;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.commons.codec.digest.PureJavaCrc32;
|
||||
import org.apache.poi.hpsf.ClassID;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianByteArrayInputStream;
|
||||
import org.apache.poi.util.StringUtil;
|
||||
|
||||
/**
|
||||
* Collection of convenience chunks for the NameID part of an outlook file
|
||||
|
@ -26,6 +34,43 @@ import java.util.List;
|
|||
public final class NameIdChunks implements ChunkGroup {
|
||||
public static final String NAME = "__nameid_version1.0";
|
||||
|
||||
public enum PropertySetType {
|
||||
PS_MAPI("00020328-0000-0000-C000-000000000046"),
|
||||
PS_PUBLIC_STRINGS("00020329-0000-0000-C000-000000000046"),
|
||||
PS_INTERNET_HEADERS("00020386-0000-0000-C000-000000000046");
|
||||
|
||||
public ClassID classID;
|
||||
PropertySetType(String uuid) {
|
||||
classID = new ClassID(uuid);
|
||||
}
|
||||
}
|
||||
|
||||
public enum PredefinedPropertySet {
|
||||
PSETID_COMMON("00062008-0000-0000-C000-000000000046"),
|
||||
PSETID_ADDRESS("00062004-0000-0000-C000-000000000046"),
|
||||
PSETID_APPOINTMENT("00062002-0000-0000-C000-000000000046"),
|
||||
PSETID_MEETING("6ED8DA90-450B-101B-98DA-00AA003F1305"),
|
||||
PSETID_LOG("0006200A-0000-0000-C000-000000000046"),
|
||||
PSETID_MESSAGING("41F28F13-83F4-4114-A584-EEDB5A6B0BFF"),
|
||||
PSETID_NOTE("0006200E-0000-0000-C000-000000000046"),
|
||||
PSETID_POST_RSS("00062041-0000-0000-C000-000000000046"),
|
||||
PSETID_TASK("00062003-0000-0000-C000-000000000046"),
|
||||
PSETID_UNIFIED_MESSAGING("4442858E-A9E3-4E80-B900-317A210CC15B"),
|
||||
PSETID_AIR_SYNC("71035549-0739-4DCB-9163-00F0580DBBDF"),
|
||||
PSETID_SHARING("00062040-0000-0000-C000-000000000046"),
|
||||
PSETID_XML_EXTRACTED_ENTITIES("23239608-685D-4732-9C55-4C95CB4E8E33"),
|
||||
PSETID_ATTACHMENT("96357F7F-59E1-47D0-99A7-46515C183B54");
|
||||
|
||||
public ClassID classID;
|
||||
PredefinedPropertySet(String uuid) {
|
||||
classID = new ClassID(uuid);
|
||||
}
|
||||
}
|
||||
|
||||
private ByteChunk guidStream;
|
||||
private ByteChunk entryStream;
|
||||
private ByteChunk stringStream;
|
||||
|
||||
/** Holds all the chunks that were found. */
|
||||
private List<Chunk> allChunks = new ArrayList<>();
|
||||
|
||||
|
@ -43,6 +88,19 @@ public final class NameIdChunks implements ChunkGroup {
|
|||
*/
|
||||
@Override
|
||||
public void record(Chunk chunk) {
|
||||
if (chunk.getType() == Types.BINARY) {
|
||||
switch (chunk.getChunkId()) {
|
||||
case 2:
|
||||
guidStream = (ByteChunk)chunk;
|
||||
break;
|
||||
case 3:
|
||||
entryStream = (ByteChunk)chunk;
|
||||
break;
|
||||
case 4:
|
||||
stringStream = (ByteChunk)chunk;
|
||||
break;
|
||||
}
|
||||
}
|
||||
allChunks.add(chunk);
|
||||
}
|
||||
|
||||
|
@ -54,4 +112,165 @@ public final class NameIdChunks implements ChunkGroup {
|
|||
// Currently, we don't need to do anything special once
|
||||
// all the chunks have been located
|
||||
}
|
||||
|
||||
/**
|
||||
* Get property tag id by property set GUID and string name or numerical name from named properties mapping
|
||||
* @param guid Property set GUID in registry format without brackets.
|
||||
* May be one of the PS_* or PSETID_* constants
|
||||
* @param name Property name in case of string named property
|
||||
* @param id Property id in case of numerical named property
|
||||
* @return Property tag which can be matched with {@link org.apache.poi.hsmf.datatypes.MAPIProperty#id}
|
||||
* or 0 if the property could not be found.
|
||||
*
|
||||
*/
|
||||
public long getPropertyTag(ClassID guid, String name, long id) {
|
||||
final byte[] entryStreamBytes = (entryStream == null) ? null : entryStream.getValue();
|
||||
if (guidStream == null || entryStream == null || stringStream == null || guid == null ||
|
||||
entryStreamBytes == null) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(entryStreamBytes);
|
||||
for (int i = 0; i < entryStreamBytes.length / 8; i++) {
|
||||
final long nameOffset = leis.readUInt();
|
||||
int guidIndex = leis.readUShort();
|
||||
final int propertyKind = guidIndex & 0x01;
|
||||
guidIndex = guidIndex >>> 1;
|
||||
final int propertyIndex = leis.readUShort();
|
||||
|
||||
// fetch and match property GUID
|
||||
if (!guid.equals(getPropertyGUID(guidIndex))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// fetch property name / stream ID
|
||||
final String[] propertyName = { null };
|
||||
final long[] propertyNameCRC32 = { -1L };
|
||||
long streamID = getStreamID(propertyKind, (int)nameOffset, guid, guidIndex,
|
||||
n -> propertyName[0] = n, c -> propertyNameCRC32[0] = c);
|
||||
|
||||
if (!matchesProperty(propertyKind, nameOffset, name, propertyName[0], id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// find property index in matching stream entry
|
||||
if (propertyKind == 1 && propertyNameCRC32[0] < 0) {
|
||||
// skip stream entry matching and return tag from property index from entry stream
|
||||
// this code should not be reached
|
||||
return 0x8000 + propertyIndex;
|
||||
}
|
||||
|
||||
return getPropertyTag(streamID, nameOffset, propertyNameCRC32[0]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private long getPropertyTag(long streamID, long nameOffset, long propertyNameCRC32) {
|
||||
for (Chunk chunk : allChunks) {
|
||||
if (chunk.getType() != Types.BINARY || chunk.getChunkId() != streamID) {
|
||||
continue;
|
||||
}
|
||||
byte[] matchChunkBytes = ((ByteChunk) chunk).getValue();
|
||||
if (matchChunkBytes == null) {
|
||||
continue;
|
||||
}
|
||||
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(matchChunkBytes);
|
||||
for (int m = 0; m < matchChunkBytes.length / 8; m++) {
|
||||
long nameCRC = leis.readUInt();
|
||||
int matchGuidIndex = leis.readUShort();
|
||||
int matchPropertyIndex = leis.readUShort();
|
||||
int matchPropertyKind = matchGuidIndex & 0x01;
|
||||
|
||||
if (nameCRC == (matchPropertyKind == 0 ? nameOffset : propertyNameCRC32)) {
|
||||
return 0x8000 + matchPropertyIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private ClassID getPropertyGUID(int guidIndex) {
|
||||
if (guidIndex == 1) {
|
||||
// predefined GUID
|
||||
return PropertySetType.PS_MAPI.classID;
|
||||
} else if (guidIndex == 2) {
|
||||
// predefined GUID
|
||||
return PropertySetType.PS_PUBLIC_STRINGS.classID;
|
||||
} else if (guidIndex >= 3) {
|
||||
// GUID from guid stream
|
||||
byte[] guidStreamBytes = guidStream.getValue();
|
||||
int guidIndexOffset = (guidIndex - 3) * 0x10;
|
||||
if (guidStreamBytes.length >= guidIndexOffset + 0x10) {
|
||||
return new ClassID(guidStreamBytes, guidIndexOffset);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// property set GUID matches
|
||||
private static boolean matchesProperty(int propertyKind, long nameOffset, String name, String propertyName, long id) {
|
||||
return
|
||||
// match property by id
|
||||
(propertyKind == 0 && id >= 0 && id == nameOffset) ||
|
||||
// match property by name
|
||||
(propertyKind == 1 && name != null && name.equals(propertyName));
|
||||
}
|
||||
|
||||
|
||||
private long getStreamID(int propertyKind, int nameOffset, ClassID guid, int guidIndex,
|
||||
Consumer<String> propertyNameSetter, Consumer<Long> propertyNameCRC32Setter) {
|
||||
if (propertyKind == 0) {
|
||||
// numerical named property
|
||||
return 0x1000 + (nameOffset ^ (guidIndex << 1)) % 0x1F;
|
||||
}
|
||||
|
||||
// string named property
|
||||
byte[] stringBytes = stringStream.getValue();
|
||||
long propertyNameCRC32 = -1;
|
||||
if (stringBytes.length > nameOffset) {
|
||||
long nameLength = LittleEndian.getUInt(stringBytes, nameOffset);
|
||||
if (stringBytes.length >= nameOffset + 4 + nameLength) {
|
||||
int nameStart = nameOffset + 4;
|
||||
String propertyName = new String(stringBytes, nameStart, (int) nameLength, StringUtil.UTF16LE);
|
||||
if (PropertySetType.PS_INTERNET_HEADERS.classID.equals(guid)) {
|
||||
byte[] n = propertyName.toLowerCase(Locale.ROOT).getBytes(StringUtil.UTF16LE);
|
||||
propertyNameCRC32 = calculateCRC32(n, 0, n.length);
|
||||
} else {
|
||||
propertyNameCRC32 = calculateCRC32(stringBytes, nameStart, (int)nameLength);
|
||||
}
|
||||
propertyNameSetter.accept(propertyName);
|
||||
propertyNameCRC32Setter.accept(propertyNameCRC32);
|
||||
}
|
||||
}
|
||||
return 0x1000 + (propertyNameCRC32 ^ ((guidIndex << 1) | 1)) % 0x1F;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the CRC32 of the given bytes (conforms to RFC 1510, SSH-1).
|
||||
* The CRC32 calculation is similar to the standard one as demonstrated in RFC 1952,
|
||||
* but with the inversion (before and after the calculation) omitted.
|
||||
* <ul>
|
||||
* <li>poly: 0x04C11DB7</li>
|
||||
* <li>init: 0x00000000</li>
|
||||
* <li>xor: 0x00000000</li>
|
||||
* <li>revin: true</li>
|
||||
* <li>revout: true</li>
|
||||
* <li>check: 0x2DFD2D88 (CRC32 of "123456789")</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param buf the byte array to calculate CRC32 on
|
||||
* @param off the offset within buf at which the CRC32 calculation will start
|
||||
* @param len the number of bytes on which to calculate the CRC32
|
||||
* @return the CRC32 value (unsigned 32-bit integer stored in a long).
|
||||
*
|
||||
* @see <a href="http://www.zorc.breitbandkatze.de/crc.html">CRC parameter check</a>
|
||||
*/
|
||||
private static long calculateCRC32(byte[] buf, int off, int len) {
|
||||
PureJavaCrc32 crc = new PureJavaCrc32();
|
||||
// set initial crc value to 0
|
||||
crc.update( new byte[] {-1,-1,-1,-1}, 0, 4);
|
||||
crc.update(buf, off, len);
|
||||
return ~crc.getValue() & 0xFFFFFFFFL;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,10 +18,15 @@
|
|||
package org.apache.poi.hsmf.parsers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
|
||||
import org.apache.poi.hsmf.datatypes.ByteChunk;
|
||||
import org.apache.poi.hsmf.datatypes.ByteChunkDeferred;
|
||||
import org.apache.poi.hsmf.datatypes.Chunk;
|
||||
import org.apache.poi.hsmf.datatypes.ChunkGroup;
|
||||
import org.apache.poi.hsmf.datatypes.Chunks;
|
||||
|
@ -50,12 +55,15 @@ import org.apache.poi.util.POILogger;
|
|||
* data and so on.
|
||||
*/
|
||||
public final class POIFSChunkParser {
|
||||
private final static POILogger logger = POILogFactory.getLogger(POIFSChunkParser.class);
|
||||
private static final POILogger LOG = POILogFactory.getLogger(POIFSChunkParser.class);
|
||||
|
||||
public static ChunkGroup[] parse(POIFSFileSystem fs) throws IOException {
|
||||
private POIFSChunkParser() {}
|
||||
|
||||
public static ChunkGroup[] parse(POIFSFileSystem fs) {
|
||||
return parse(fs.getRoot());
|
||||
}
|
||||
public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
|
||||
|
||||
public static ChunkGroup[] parse(DirectoryNode node) {
|
||||
Chunks mainChunks = new Chunks();
|
||||
|
||||
ArrayList<ChunkGroup> groups = new ArrayList<>();
|
||||
|
@ -83,8 +91,6 @@ public final class POIFSChunkParser {
|
|||
if (group != null) {
|
||||
processChunks(dir, group);
|
||||
groups.add(group);
|
||||
} else {
|
||||
// Unknown directory, skip silently
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -106,69 +112,99 @@ public final class POIFSChunkParser {
|
|||
* Creates all the chunks for a given Directory, but
|
||||
* doesn't recurse or descend
|
||||
*/
|
||||
protected static void processChunks(DirectoryNode node, ChunkGroup grouping) {
|
||||
private static void processChunks(DirectoryNode node, ChunkGroup grouping) {
|
||||
final Map<Integer, MultiChunk> multiChunks = new TreeMap<>();
|
||||
|
||||
for (Entry entry : node) {
|
||||
if(entry instanceof DocumentNode) {
|
||||
process(entry, grouping);
|
||||
} else if(entry instanceof DirectoryNode) {
|
||||
if(entry.getName().endsWith(Types.DIRECTORY.asFileEnding())) {
|
||||
process(entry, grouping);
|
||||
}
|
||||
if (entry instanceof DocumentNode ||
|
||||
(entry instanceof DirectoryNode && entry.getName().endsWith(Types.DIRECTORY.asFileEnding()))) {
|
||||
process(entry, grouping, multiChunks);
|
||||
}
|
||||
}
|
||||
|
||||
// Finish up variable length multivalued properties
|
||||
multiChunks.entrySet().stream()
|
||||
.flatMap(me -> me.getValue().getChunks().values().stream())
|
||||
.filter(Objects::nonNull)
|
||||
.forEach(grouping::record);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a chunk, and gives it to its parent group
|
||||
*/
|
||||
protected static void process(Entry entry, ChunkGroup grouping) {
|
||||
String entryName = entry.getName();
|
||||
Chunk chunk = null;
|
||||
private static void process(Entry entry, ChunkGroup grouping, Map<Integer, MultiChunk> multiChunks) {
|
||||
final String entryName = entry.getName();
|
||||
boolean[] isMultiValued = { false };
|
||||
|
||||
// Is it a properties chunk? (They have special names)
|
||||
if (entryName.equals(PropertiesChunk.NAME)) {
|
||||
Chunk chunk = (PropertiesChunk.NAME.equals(entryName))
|
||||
? readPropertiesChunk(grouping, entry)
|
||||
: readPrimitiveChunk(entry, isMultiValued, multiChunks);
|
||||
|
||||
if (chunk == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (entry instanceof DocumentNode) {
|
||||
try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) {
|
||||
chunk.readValue(inp);
|
||||
} catch (IOException e) {
|
||||
LOG.log(POILogger.ERROR, "Error reading from part " + entry.getName(), e);
|
||||
}
|
||||
}
|
||||
|
||||
if (!isMultiValued[0]) {
|
||||
// multi value chunks will be grouped later, in the correct order
|
||||
grouping.record(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
private static Chunk readPropertiesChunk(ChunkGroup grouping, Entry entry) {
|
||||
if (grouping instanceof Chunks) {
|
||||
// These should be the properties for the message itself
|
||||
chunk = new MessagePropertiesChunk(grouping,
|
||||
entry.getParent() != null && entry.getParent().getParent() != null);
|
||||
boolean isEmbedded = entry.getParent() != null && entry.getParent().getParent() != null;
|
||||
return new MessagePropertiesChunk(grouping, isEmbedded);
|
||||
} else {
|
||||
// Will be properties on an attachment or recipient
|
||||
chunk = new StoragePropertiesChunk(grouping);
|
||||
return new StoragePropertiesChunk(grouping);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
|
||||
private static Chunk readPrimitiveChunk(Entry entry, boolean[] isMultiValue, Map<Integer, MultiChunk> multiChunks) {
|
||||
final String entryName = entry.getName();
|
||||
final int splitAt = entryName.lastIndexOf('_');
|
||||
|
||||
// Check it's a regular chunk
|
||||
if(entryName.length() < 9) {
|
||||
if (entryName.length() < 9 || splitAt == -1) {
|
||||
// Name in the wrong format
|
||||
return;
|
||||
}
|
||||
if(! entryName.contains("_")) {
|
||||
// Name in the wrong format
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
// Split it into its parts
|
||||
int splitAt = entryName.lastIndexOf('_');
|
||||
String namePrefix = entryName.substring(0, splitAt+1);
|
||||
String ids = entryName.substring(splitAt+1);
|
||||
final String namePrefix = entryName.substring(0, splitAt + 1);
|
||||
final String ids = entryName.substring(splitAt + 1);
|
||||
|
||||
// Make sure we got what we expected, should be of
|
||||
// the form __<name>_<id><type>
|
||||
if(namePrefix.equals("Olk10SideProps") ||
|
||||
namePrefix.equals("Olk10SideProps_")) {
|
||||
if (namePrefix.equals("Olk10SideProps") || namePrefix.equals("Olk10SideProps_")) {
|
||||
// This is some odd Outlook 2002 thing, skip
|
||||
return;
|
||||
} else if(splitAt <= entryName.length()-8) {
|
||||
// In the right form for a normal chunk
|
||||
// We'll process this further in a little bit
|
||||
} else {
|
||||
return null;
|
||||
} else if (splitAt > entryName.length() - 8) {
|
||||
// Underscores not the right place, something's wrong
|
||||
throw new IllegalArgumentException("Invalid chunk name " + entryName);
|
||||
}
|
||||
|
||||
// Now try to turn it into id + type
|
||||
final int chunkId, typeId;
|
||||
try {
|
||||
int chunkId = Integer.parseInt(ids.substring(0, 4), 16);
|
||||
int typeId = Integer.parseInt(ids.substring(4, 8), 16);
|
||||
chunkId = Integer.parseInt(ids.substring(0, 4), 16);
|
||||
int tid = Integer.parseInt(ids.substring(4, 8), 16);
|
||||
isMultiValue[0] = (tid & Types.MULTIVALUED_FLAG) != 0;
|
||||
typeId = tid & ~Types.MULTIVALUED_FLAG;
|
||||
} catch (NumberFormatException e) {
|
||||
// Name in the wrong format
|
||||
return null;
|
||||
}
|
||||
|
||||
MAPIType type = Types.getById(typeId);
|
||||
if (type == null) {
|
||||
|
@ -177,44 +213,90 @@ public final class POIFSChunkParser {
|
|||
|
||||
// Special cases based on the ID
|
||||
if (chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
|
||||
chunk = new MessageSubmissionChunk(namePrefix, chunkId, type);
|
||||
return new MessageSubmissionChunk(namePrefix, chunkId, type);
|
||||
} else if (type == Types.BINARY && chunkId == MAPIProperty.ATTACH_DATA.id) {
|
||||
ByteChunkDeferred bcd = new ByteChunkDeferred(namePrefix, chunkId, type);
|
||||
if (entry instanceof DocumentNode) {
|
||||
bcd.readValue((DocumentNode) entry);
|
||||
}
|
||||
else {
|
||||
return bcd;
|
||||
} else {
|
||||
// Nothing special about this ID
|
||||
// So, do the usual thing which is by type
|
||||
if (type == Types.BINARY) {
|
||||
chunk = new ByteChunk(namePrefix, chunkId, type);
|
||||
if (isMultiValue[0]) {
|
||||
return readMultiValue(namePrefix, ids, chunkId, entry, type, multiChunks);
|
||||
} else {
|
||||
if (type == Types.DIRECTORY && entry instanceof DirectoryNode) {
|
||||
return new DirectoryChunk((DirectoryNode) entry, namePrefix, chunkId, type);
|
||||
} else if (type == Types.BINARY) {
|
||||
return new ByteChunk(namePrefix, chunkId, type);
|
||||
} else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
|
||||
return new StringChunk(namePrefix, chunkId, type);
|
||||
}
|
||||
else if (type == Types.DIRECTORY) {
|
||||
if(entry instanceof DirectoryNode) {
|
||||
chunk = new DirectoryChunk((DirectoryNode)entry, namePrefix, chunkId, type);
|
||||
}
|
||||
}
|
||||
else if (type == Types.ASCII_STRING ||
|
||||
type == Types.UNICODE_STRING) {
|
||||
chunk = new StringChunk(namePrefix, chunkId, type);
|
||||
}
|
||||
else {
|
||||
// Type of an unsupported type! Skipping...
|
||||
LOG.log(POILogger.WARN, "UNSUPPORTED PROP TYPE " + entryName);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
} catch(NumberFormatException e) {
|
||||
// Name in the wrong format
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if(chunk != null) {
|
||||
if(entry instanceof DocumentNode) {
|
||||
try (DocumentInputStream inp = new DocumentInputStream((DocumentNode) entry)) {
|
||||
chunk.readValue(inp);
|
||||
grouping.record(chunk);
|
||||
} catch (IOException e) {
|
||||
logger.log(POILogger.ERROR, "Error reading from part " + entry.getName() + " - " + e);
|
||||
|
||||
private static Chunk readMultiValue(String namePrefix, String ids, int chunkId, Entry entry, MAPIType type,
|
||||
Map<Integer, MultiChunk> multiChunks) {
|
||||
long multiValueIdx = -1;
|
||||
if (ids.contains("-")) {
|
||||
String mvidxstr = ids.substring(ids.lastIndexOf('-') + 1);
|
||||
try {
|
||||
multiValueIdx = Long.parseLong(mvidxstr) & 0xFFFFFFFFL;
|
||||
} catch (NumberFormatException ignore) {
|
||||
LOG.log(POILogger.WARN, "Can't read multi value idx from entry " + entry.getName());
|
||||
}
|
||||
}
|
||||
|
||||
final MultiChunk mc = multiChunks.computeIfAbsent(chunkId, k -> new MultiChunk());
|
||||
if (multiValueIdx == -1) {
|
||||
return new ByteChunk(chunkId, Types.BINARY) {
|
||||
@Override
|
||||
public void readValue(InputStream value) throws IOException {
|
||||
super.readValue(value);
|
||||
mc.setLength(getValue().length / 4);
|
||||
}
|
||||
};
|
||||
} else {
|
||||
grouping.record(chunk);
|
||||
}
|
||||
final Chunk chunk;
|
||||
if (type == Types.BINARY) {
|
||||
chunk = new ByteChunk(namePrefix, chunkId, type);
|
||||
} else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
|
||||
chunk = new StringChunk(namePrefix, chunkId, type);
|
||||
} else {
|
||||
// Type of an unsupported multivalued type! Skipping...
|
||||
LOG.log(POILogger.WARN, "Unsupported multivalued prop type for entry " + entry.getName());
|
||||
return null;
|
||||
}
|
||||
mc.addChunk((int) multiValueIdx, chunk);
|
||||
return chunk;
|
||||
}
|
||||
}
|
||||
|
||||
private static class MultiChunk {
|
||||
private int length = -1;
|
||||
private final Map<Integer,Chunk> chunks = new TreeMap<>();
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
int getLength() {
|
||||
return length;
|
||||
}
|
||||
|
||||
void setLength(int length) {
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
void addChunk(int multiValueIdx, Chunk value) {
|
||||
chunks.put(multiValueIdx, value);
|
||||
}
|
||||
|
||||
Map<Integer, Chunk> getChunks() {
|
||||
return chunks;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,19 +18,18 @@
|
|||
package org.apache.poi.hsmf;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
|
||||
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests to verify that we can read attachments from msg file
|
||||
|
@ -42,8 +41,6 @@ public class TestFileWithAttachmentsRead {
|
|||
|
||||
/**
|
||||
* Initialize this test, load up the attachment_test_msg.msg mapi message.
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void setUp() throws IOException {
|
||||
|
@ -62,9 +59,6 @@ public class TestFileWithAttachmentsRead {
|
|||
|
||||
/**
|
||||
* Test to see if we can retrieve attachments.
|
||||
*
|
||||
* @throws ChunkNotFoundException
|
||||
*
|
||||
*/
|
||||
@Test
|
||||
public void testRetrieveAttachments() {
|
||||
|
@ -134,14 +128,22 @@ public class TestFileWithAttachmentsRead {
|
|||
assertEquals("test-unicode.doc", attachment.getAttachLongFileName().getValue());
|
||||
assertEquals(".doc", attachment.getAttachExtension().getValue());
|
||||
assertNull(attachment.getAttachMimeTag());
|
||||
assertEquals(24064, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data
|
||||
ByteArrayOutputStream attachmentstream = new ByteArrayOutputStream();
|
||||
attachment.getAttachData().writeValue(attachmentstream);
|
||||
assertEquals(24064, attachmentstream.size());
|
||||
// or compare the hashes of the attachment data
|
||||
assertEquals(24064, attachment.getAttachData().getValue().length);
|
||||
|
||||
attachment = twoSimpleAttachments.getAttachmentFiles()[1];
|
||||
assertEquals("pj1.txt", attachment.getAttachFileName().getValue());
|
||||
assertEquals("pj1.txt", attachment.getAttachLongFileName().getValue());
|
||||
assertEquals(".txt", attachment.getAttachExtension().getValue());
|
||||
assertNull(attachment.getAttachMimeTag());
|
||||
assertEquals(89, attachment.getAttachData().getValue().length); // or compare the hashes of the attachment data
|
||||
// or compare the hashes of the attachment data
|
||||
assertEquals(89, attachment.getAttachData().getValue().length);
|
||||
attachmentstream = new ByteArrayOutputStream();
|
||||
attachment.getAttachData().writeValue(attachmentstream);
|
||||
assertEquals(89, attachmentstream.size());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -161,7 +163,8 @@ public class TestFileWithAttachmentsRead {
|
|||
assertEquals(".pdf", attachment.getAttachExtension().getValue());
|
||||
assertNull(attachment.getAttachMimeTag());
|
||||
assertNull(attachment.getAttachmentDirectory());
|
||||
assertEquals(13539, attachment.getAttachData().getValue().length); //or compare the hashes of the attachment data
|
||||
//or compare the hashes of the attachment data
|
||||
assertEquals(13539, attachment.getAttachData().getValue().length);
|
||||
|
||||
// First in a nested message
|
||||
attachment = pdfMsgAttachments.getAttachmentFiles()[0];
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hsmf;
|
||||
|
||||
import static org.apache.poi.hsmf.datatypes.NameIdChunks.PredefinedPropertySet.PSETID_COMMON;
|
||||
import static org.apache.poi.hsmf.datatypes.NameIdChunks.PropertySetType.PS_PUBLIC_STRINGS;
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hsmf.datatypes.StringChunk;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests to verify that we can read properties identified by name or id in property sets.
|
||||
*/
|
||||
public class TestNameIdChunks {
|
||||
private static MAPIMessage keywordsMsg;
|
||||
|
||||
/**
|
||||
* Initialize this test, load up the keywords.msg mapi message.
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void setUp() throws IOException {
|
||||
POIDataSamples samples = POIDataSamples.getHSMFInstance();
|
||||
try (InputStream is = samples.openResourceAsStream("keywords.msg")) {
|
||||
keywordsMsg = new MAPIMessage(is);
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDown() throws IOException {
|
||||
keywordsMsg.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to see if we can read the keywords list from the msg.
|
||||
* The keywords property is a property identified by the name "Keywords" in the property set PS_PUBLIC_STRINGS.
|
||||
*/
|
||||
@Test
|
||||
public void testReadKeywords() {
|
||||
long keywordsPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PS_PUBLIC_STRINGS.classID, "Keywords", 0);
|
||||
assertEquals(0x8003, keywordsPropTag);
|
||||
String[] exp = { "TODO", "Currently Important", "Currently To Do", "Test" };
|
||||
String[] act = getValues(keywordsPropTag);
|
||||
assertArrayEquals(exp, act);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to see if we can read the current version name from the msg.
|
||||
* The current version name property is a property identified by the id 0x8554 in the property set PSETID_Common.
|
||||
*/
|
||||
@Test
|
||||
public void testCurrentVersionName() {
|
||||
long testPropTag = keywordsMsg.getNameIdChunks().getPropertyTag(PSETID_COMMON.classID, null, 0x8554);
|
||||
assertEquals(0x8006, testPropTag);
|
||||
String[] exp = { "16.0" };
|
||||
String[] act = getValues(testPropTag);
|
||||
assertArrayEquals(exp, act);
|
||||
}
|
||||
|
||||
private String[] getValues(long tag) {
|
||||
return keywordsMsg.getMainChunks().getAll().entrySet().stream()
|
||||
.filter(me -> me.getKey().id == tag)
|
||||
.flatMap(me -> me.getValue().stream())
|
||||
.map(c -> ((StringChunk)c).getValue())
|
||||
.toArray(String[]::new);
|
||||
}
|
||||
}
|
Binary file not shown.
Loading…
Reference in New Issue