mirror of https://github.com/apache/poi.git
60279 -- back off to brute-force search for macro content if offset information is not correct
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1808301 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
23ecb9a172
commit
76d3f15560
|
@ -43,7 +43,9 @@ import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||||
import org.apache.poi.util.CodePageUtil;
|
import org.apache.poi.util.CodePageUtil;
|
||||||
import org.apache.poi.util.HexDump;
|
import org.apache.poi.util.HexDump;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.RLEDecompressingInputStream;
|
import org.apache.poi.util.RLEDecompressingInputStream;
|
||||||
|
import org.apache.poi.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
|
* <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
|
||||||
|
@ -61,9 +63,7 @@ import org.apache.poi.util.RLEDecompressingInputStream;
|
||||||
public class VBAMacroReader implements Closeable {
|
public class VBAMacroReader implements Closeable {
|
||||||
protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
|
protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
|
||||||
protected static final String VBA_PROJECT_POIFS = "VBA";
|
protected static final String VBA_PROJECT_POIFS = "VBA";
|
||||||
// FIXME: When minimum supported version is Java 7, replace with java.nio.charset.StandardCharsets.UTF_16LE
|
|
||||||
private static final Charset UTF_16LE = Charset.forName("UTF-16LE");
|
|
||||||
|
|
||||||
private NPOIFSFileSystem fs;
|
private NPOIFSFileSystem fs;
|
||||||
|
|
||||||
public VBAMacroReader(InputStream rstream) throws IOException {
|
public VBAMacroReader(InputStream rstream) throws IOException {
|
||||||
|
@ -145,7 +145,7 @@ public class VBAMacroReader implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
protected static class ModuleMap extends HashMap<String, Module> {
|
protected static class ModuleMap extends HashMap<String, Module> {
|
||||||
Charset charset = Charset.forName("Cp1252"); // default charset
|
Charset charset = StringUtil.WIN_1252; // default charset
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -172,20 +172,7 @@ public class VBAMacroReader implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
|
|
||||||
*
|
|
||||||
* @param stream the inputstream to read from
|
|
||||||
* @param length number of bytes to read from stream
|
|
||||||
* @param charset the character set encoding of the bytes in the stream
|
|
||||||
* @return a java String in the supplied character set
|
|
||||||
* @throws IOException If reading from the stream fails
|
|
||||||
*/
|
|
||||||
private static String readString(InputStream stream, int length, Charset charset) throws IOException {
|
|
||||||
byte[] buffer = new byte[length];
|
|
||||||
int count = stream.read(buffer);
|
|
||||||
return new String(buffer, 0, count, charset);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* reads module from DIR node in input stream and adds it to the modules map for decompression later
|
* reads module from DIR node in input stream and adds it to the modules map for decompression later
|
||||||
|
@ -199,7 +186,7 @@ public class VBAMacroReader implements Closeable {
|
||||||
* @param modules a map to store the modules
|
* @param modules a map to store the modules
|
||||||
* @throws IOException If reading data from the stream or from modules fails
|
* @throws IOException If reading data from the stream or from modules fails
|
||||||
*/
|
*/
|
||||||
private static void readModule(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
|
private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
|
||||||
int moduleOffset = in.readInt();
|
int moduleOffset = in.readInt();
|
||||||
Module module = modules.get(streamName);
|
Module module = modules.get(streamName);
|
||||||
if (module == null) {
|
if (module == null) {
|
||||||
|
@ -218,27 +205,57 @@ public class VBAMacroReader implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void readModule(DocumentInputStream dis, String name, ModuleMap modules) throws IOException {
|
private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException {
|
||||||
Module module = modules.get(name);
|
Module module = modules.get(name);
|
||||||
// TODO Refactor this to fetch dir then do the rest
|
// TODO Refactor this to fetch dir then do the rest
|
||||||
if (module == null) {
|
if (module == null) {
|
||||||
// no DIR stream with offsets yet, so store the compressed bytes for later
|
// no DIR stream with offsets yet, so store the compressed bytes for later
|
||||||
module = new Module();
|
module = new Module();
|
||||||
modules.put(name, module);
|
modules.put(name, module);
|
||||||
module.read(dis);
|
InputStream dis = new DocumentInputStream(documentNode);
|
||||||
|
try {
|
||||||
|
module.read(dis);
|
||||||
|
} finally {
|
||||||
|
dis.close();
|
||||||
|
}
|
||||||
} else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
|
} else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
|
||||||
|
|
||||||
if (module.offset == null) {
|
if (module.offset == null) {
|
||||||
//This should not happen. bug 59858
|
//This should not happen. bug 59858
|
||||||
throw new IOException("Module offset for '" + name + "' was never read.");
|
throw new IOException("Module offset for '" + name + "' was never read.");
|
||||||
}
|
}
|
||||||
// we know the offset already, so decompress immediately on-the-fly
|
|
||||||
long skippedBytes = dis.skip(module.offset);
|
//try the general case, where module.offset is accurate
|
||||||
if (skippedBytes != module.offset) {
|
InputStream decompressed = null;
|
||||||
throw new IOException("tried to skip " + module.offset + " bytes, but actually skipped " + skippedBytes + " bytes");
|
InputStream compressed = new DocumentInputStream(documentNode);
|
||||||
|
try {
|
||||||
|
// we know the offset already, so decompress immediately on-the-fly
|
||||||
|
long skippedBytes = compressed.skip(module.offset);
|
||||||
|
if (skippedBytes != module.offset) {
|
||||||
|
throw new IOException("tried to skip " + module.offset + " bytes, but actually skipped " + skippedBytes + " bytes");
|
||||||
|
}
|
||||||
|
decompressed = new RLEDecompressingInputStream(compressed);
|
||||||
|
module.read(decompressed);
|
||||||
|
return;
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
} catch (IllegalStateException e) {
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeQuietly(compressed);
|
||||||
|
IOUtils.closeQuietly(decompressed);
|
||||||
|
}
|
||||||
|
|
||||||
|
//bad module.offset, try brute force
|
||||||
|
compressed = new DocumentInputStream(documentNode);
|
||||||
|
byte[] decompressedBytes = null;
|
||||||
|
try {
|
||||||
|
decompressedBytes = findCompressedStreamWBruteForce(compressed);
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeQuietly(compressed);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (decompressedBytes != null) {
|
||||||
|
module.read(new ByteArrayInputStream(decompressedBytes));
|
||||||
}
|
}
|
||||||
InputStream stream = new RLEDecompressingInputStream(dis);
|
|
||||||
module.read(stream);
|
|
||||||
stream.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -249,7 +266,7 @@ public class VBAMacroReader implements Closeable {
|
||||||
* @throws IOException If skipping would exceed the available data or skipping did not work.
|
* @throws IOException If skipping would exceed the available data or skipping did not work.
|
||||||
*/
|
*/
|
||||||
private static void trySkip(InputStream in, long n) throws IOException {
|
private static void trySkip(InputStream in, long n) throws IOException {
|
||||||
long skippedBytes = in.skip(n);
|
long skippedBytes = IOUtils.skipFully(in, n);
|
||||||
if (skippedBytes != n) {
|
if (skippedBytes != n) {
|
||||||
if (skippedBytes < 0) {
|
if (skippedBytes < 0) {
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
|
@ -258,33 +275,18 @@ public class VBAMacroReader implements Closeable {
|
||||||
} else {
|
} else {
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
"Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. "
|
"Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. "
|
||||||
+ "This should never happen.");
|
+ "This should never happen with a non-corrupt file.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
|
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
|
||||||
private static final int EOF = -1;
|
|
||||||
private static final int VERSION_INDEPENDENT_TERMINATOR = 0x0010;
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static final int VERSION_DEPENDENT_TERMINATOR = 0x002B;
|
|
||||||
private static final int PROJECTVERSION = 0x0009;
|
|
||||||
private static final int PROJECTCODEPAGE = 0x0003;
|
|
||||||
private static final int STREAMNAME = 0x001A;
|
|
||||||
private static final int MODULEOFFSET = 0x0031;
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static final int MODULETYPE_PROCEDURAL = 0x0021;
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static final int MODULETYPE_DOCUMENT_CLASS_OR_DESIGNER = 0x0022;
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static final int PROJECTLCID = 0x0002;
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static final int MODULE_NAME = 0x0019;
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static final int MODULE_NAME_UNICODE = 0x0047;
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static final int MODULE_DOC_STRING = 0x001c;
|
|
||||||
private static final int STREAMNAME_RESERVED = 0x0032;
|
private static final int STREAMNAME_RESERVED = 0x0032;
|
||||||
|
private static final int PROJECT_CONSTANTS_RESERVED = 0x003C;
|
||||||
|
private static final int HELP_FILE_PATH_RESERVED = 0x003D;
|
||||||
|
private static final int REFERENCE_NAME_RESERVED = 0x003E;
|
||||||
|
private static final int DOC_STRING_RESERVED = 0x0040;
|
||||||
|
private static final int MODULE_DOCSTRING_RESERVED = 0x0048;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads VBA Project modules from a VBA Project directory located at
|
* Reads VBA Project modules from a VBA Project directory located at
|
||||||
|
@ -293,76 +295,330 @@ public class VBAMacroReader implements Closeable {
|
||||||
* @since 3.15-beta2
|
* @since 3.15-beta2
|
||||||
*/
|
*/
|
||||||
protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
|
protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
|
||||||
|
//bug59858 shows that dirstream may not be in this directory (\MBD00082648\_VBA_PROJECT_CUR\VBA ENTRY NAME)
|
||||||
|
//but may be in another directory (\_VBA_PROJECT_CUR\VBA ENTRY NAME)
|
||||||
|
//process the dirstream first -- "dir" is case insensitive
|
||||||
|
for (String entryName : macroDir.getEntryNames()) {
|
||||||
|
if ("dir".equalsIgnoreCase(entryName)) {
|
||||||
|
processDirStream(macroDir.getEntry(entryName), modules);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (Entry entry : macroDir) {
|
for (Entry entry : macroDir) {
|
||||||
if (! (entry instanceof DocumentNode)) { continue; }
|
if (! (entry instanceof DocumentNode)) { continue; }
|
||||||
|
|
||||||
String name = entry.getName();
|
String name = entry.getName();
|
||||||
DocumentNode document = (DocumentNode)entry;
|
DocumentNode document = (DocumentNode)entry;
|
||||||
DocumentInputStream dis = new DocumentInputStream(document);
|
|
||||||
try {
|
if (! "dir".equalsIgnoreCase(name) && !startsWithIgnoreCase(name, "__SRP")
|
||||||
if ("dir".equalsIgnoreCase(name)) {
|
|
||||||
// process DIR
|
|
||||||
RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
|
|
||||||
String streamName = null;
|
|
||||||
int recordId = 0;
|
|
||||||
try {
|
|
||||||
while (true) {
|
|
||||||
recordId = in.readShort();
|
|
||||||
if (EOF == recordId
|
|
||||||
|| VERSION_INDEPENDENT_TERMINATOR == recordId) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
int recordLength = in.readInt();
|
|
||||||
switch (recordId) {
|
|
||||||
case PROJECTVERSION:
|
|
||||||
trySkip(in, 6);
|
|
||||||
break;
|
|
||||||
case PROJECTCODEPAGE:
|
|
||||||
int codepage = in.readShort();
|
|
||||||
modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
|
|
||||||
break;
|
|
||||||
case STREAMNAME:
|
|
||||||
streamName = readString(in, recordLength, modules.charset);
|
|
||||||
int reserved = in.readShort();
|
|
||||||
if (reserved != STREAMNAME_RESERVED) {
|
|
||||||
throw new IOException("Expected x0032 after stream name before Unicode stream name, but found: "+
|
|
||||||
Integer.toHexString(reserved));
|
|
||||||
}
|
|
||||||
int unicodeNameRecordLength = in.readInt();
|
|
||||||
readUnicodeString(in, unicodeNameRecordLength);
|
|
||||||
// do something with this at some point
|
|
||||||
break;
|
|
||||||
case MODULEOFFSET:
|
|
||||||
readModule(in, streamName, modules);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
trySkip(in, recordLength);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (final IOException e) {
|
|
||||||
throw new IOException(
|
|
||||||
"Error occurred while reading macros at section id "
|
|
||||||
+ recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
in.close();
|
|
||||||
}
|
|
||||||
} else if (!startsWithIgnoreCase(name, "__SRP")
|
|
||||||
&& !startsWithIgnoreCase(name, "_VBA_PROJECT")) {
|
&& !startsWithIgnoreCase(name, "_VBA_PROJECT")) {
|
||||||
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
|
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
|
||||||
readModule(dis, name, modules);
|
readModuleFromDocumentStream(document, name, modules);
|
||||||
}
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
dis.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private enum RecordType {
|
||||||
|
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
|
||||||
|
MODULE_OFFSET(0x0031),
|
||||||
|
PROJECT_SYS_KIND(0x01),
|
||||||
|
PROJECT_LCID(0x0002),
|
||||||
|
PROJECT_LCID_INVOKE(0x14),
|
||||||
|
PROJECT_CODEPAGE(0x0003),
|
||||||
|
PROJECT_NAME(0x04),
|
||||||
|
PROJECT_DOC_STRING(0x05),
|
||||||
|
PROJECT_HELP_FILE_PATH(0x06),
|
||||||
|
PROJECT_HELP_CONTEXT(0x07, 8),
|
||||||
|
PROJECT_LIB_FLAGS(0x08),
|
||||||
|
PROJECT_VERSION(0x09, 10),
|
||||||
|
PROJECT_CONSTANTS(0x0C),
|
||||||
|
PROJECT_MODULES(0x0F),
|
||||||
|
DIR_STREAM_TERMINATOR(0x10),
|
||||||
|
PROJECT_COOKIE(0x13),
|
||||||
|
MODULE_NAME(0x19),
|
||||||
|
MODULE_NAME_UNICODE(0x47),
|
||||||
|
MODULE_STREAM_NAME(0x1A),
|
||||||
|
MODULE_DOC_STRING(0x1C),
|
||||||
|
MODULE_HELP_CONTEXT(0x1E),
|
||||||
|
MODULE_COOKIE(0x2c),
|
||||||
|
MODULE_TYPE_PROCEDURAL(0x21, 4),
|
||||||
|
MODULE_TYPE_OTHER(0x22, 4),
|
||||||
|
MODULE_PRIVATE(0x28, 4),
|
||||||
|
REFERENCE_NAME(0x16),
|
||||||
|
REFERENCE_REGISTERED(0x0D),
|
||||||
|
REFERENCE_PROJECT(0x0E),
|
||||||
|
REFERENCE_CONTROL_A(0x2F),
|
||||||
|
|
||||||
|
//according to the spec, REFERENCE_CONTROL_B(0x33) should have the
|
||||||
|
//same structure as REFERENCE_CONTROL_A(0x2F).
|
||||||
|
//However, it seems to have the int(length) record structure that most others do.
|
||||||
|
//See 59830.xls for this record.
|
||||||
|
REFERENCE_CONTROL_B(0x33),
|
||||||
|
//REFERENCE_ORIGINAL(0x33),
|
||||||
|
|
||||||
|
|
||||||
|
MODULE_TERMINATOR(0x002B),
|
||||||
|
EOF(-1),
|
||||||
|
UNKNOWN(-2);
|
||||||
|
|
||||||
|
|
||||||
|
private final int VARIABLE_LENGTH = -1;
|
||||||
|
private final int id;
|
||||||
|
private final int constantLength;
|
||||||
|
|
||||||
|
RecordType(int id) {
|
||||||
|
this.id = id;
|
||||||
|
this.constantLength = VARIABLE_LENGTH;
|
||||||
|
}
|
||||||
|
|
||||||
|
RecordType(int id, int constantLength) {
|
||||||
|
this.id = id;
|
||||||
|
this.constantLength = constantLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getConstantLength() {
|
||||||
|
return constantLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
static RecordType lookup(int id) {
|
||||||
|
for (RecordType type : RecordType.values()) {
|
||||||
|
if (type.id == id) {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private enum DIR_STATE {
|
||||||
|
INFORMATION_RECORD,
|
||||||
|
REFERENCES_RECORD,
|
||||||
|
MODULES_RECORD
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ASCIIUnicodeStringPair {
|
||||||
|
private final String ascii;
|
||||||
|
private final String unicode;
|
||||||
|
|
||||||
|
ASCIIUnicodeStringPair(String ascii, String unicode) {
|
||||||
|
this.ascii = ascii;
|
||||||
|
this.unicode = unicode;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getAscii() {
|
||||||
|
return ascii;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getUnicode() {
|
||||||
|
return unicode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processDirStream(Entry dir, ModuleMap modules) throws IOException {
|
||||||
|
DocumentNode dirDocumentNode = (DocumentNode)dir;
|
||||||
|
DocumentInputStream dis = new DocumentInputStream(dirDocumentNode);
|
||||||
|
DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD;
|
||||||
|
try {
|
||||||
|
RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
|
||||||
|
String streamName = null;
|
||||||
|
int recordId = 0;
|
||||||
|
boolean inReferenceTwiddled = false;
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
recordId = in.readShort();
|
||||||
|
if (recordId == -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
RecordType type = RecordType.lookup(recordId);
|
||||||
|
|
||||||
|
if (type.equals(RecordType.EOF) || type.equals(RecordType.DIR_STREAM_TERMINATOR)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch (type) {
|
||||||
|
case PROJECT_VERSION:
|
||||||
|
trySkip(in, RecordType.PROJECT_VERSION.getConstantLength());
|
||||||
|
break;
|
||||||
|
case PROJECT_CODEPAGE:
|
||||||
|
in.readInt();//record size must == 4
|
||||||
|
int codepage = in.readShort();
|
||||||
|
modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
|
||||||
|
break;
|
||||||
|
case MODULE_STREAM_NAME:
|
||||||
|
ASCIIUnicodeStringPair pair = readStringPair(in, modules.charset, STREAMNAME_RESERVED);
|
||||||
|
streamName = pair.getAscii();
|
||||||
|
break;
|
||||||
|
case PROJECT_DOC_STRING:
|
||||||
|
readStringPair(in, modules.charset, DOC_STRING_RESERVED);
|
||||||
|
break;
|
||||||
|
case PROJECT_HELP_FILE_PATH:
|
||||||
|
readStringPair(in, modules.charset, HELP_FILE_PATH_RESERVED);
|
||||||
|
break;
|
||||||
|
case PROJECT_CONSTANTS:
|
||||||
|
readStringPair(in, modules.charset, PROJECT_CONSTANTS_RESERVED);
|
||||||
|
break;
|
||||||
|
case REFERENCE_NAME:
|
||||||
|
if (dirState.equals(DIR_STATE.INFORMATION_RECORD)) {
|
||||||
|
dirState = DIR_STATE.REFERENCES_RECORD;
|
||||||
|
}
|
||||||
|
readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
|
||||||
|
break;
|
||||||
|
case MODULE_DOC_STRING :
|
||||||
|
int modDocStringLength = in.readInt();
|
||||||
|
readString(in, modDocStringLength, modules.charset);
|
||||||
|
int modDocStringReserved = in.readShort();
|
||||||
|
if (modDocStringReserved != MODULE_DOCSTRING_RESERVED) {
|
||||||
|
throw new IOException("Expected x003C after stream name before Unicode stream name, but found: " +
|
||||||
|
Integer.toHexString(modDocStringReserved));
|
||||||
|
}
|
||||||
|
int unicodeModDocStringLength = in.readInt();
|
||||||
|
readUnicodeString(in, unicodeModDocStringLength);
|
||||||
|
// do something with this at some point
|
||||||
|
break;
|
||||||
|
case MODULE_OFFSET:
|
||||||
|
int modOffsetSz = in.readInt();
|
||||||
|
//should be 4
|
||||||
|
readModuleMetadataFromDirStream(in, streamName, modules);
|
||||||
|
break;
|
||||||
|
case PROJECT_MODULES:
|
||||||
|
dirState = DIR_STATE.MODULES_RECORD;
|
||||||
|
in.readInt();//size must == 2
|
||||||
|
in.readShort();//number of modules
|
||||||
|
break;
|
||||||
|
case REFERENCE_CONTROL_A:
|
||||||
|
int szTwiddled = in.readInt();
|
||||||
|
trySkip(in, szTwiddled);
|
||||||
|
int nextRecord = in.readShort();
|
||||||
|
//reference name is optional!
|
||||||
|
if (nextRecord == RecordType.REFERENCE_NAME.id) {
|
||||||
|
readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
|
||||||
|
nextRecord = in.readShort();
|
||||||
|
}
|
||||||
|
if (nextRecord != 0x30) {
|
||||||
|
throw new IOException("Expected 0x30 as Reserved3 in a ReferenceControl record");
|
||||||
|
}
|
||||||
|
int szExtended = in.readInt();
|
||||||
|
trySkip(in, szExtended);
|
||||||
|
break;
|
||||||
|
case MODULE_TERMINATOR:
|
||||||
|
int endOfModulesReserved = in.readInt();
|
||||||
|
//must be 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (type.getConstantLength() > -1) {
|
||||||
|
trySkip(in, type.getConstantLength());
|
||||||
|
} else {
|
||||||
|
int recordLength = in.readInt();
|
||||||
|
trySkip(in, recordLength);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new IOException(
|
||||||
|
"Error occurred while reading macros at section id "
|
||||||
|
+ recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
|
||||||
|
} finally {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
dis.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in, Charset charset, int reservedByte) throws IOException {
|
||||||
|
int nameLength = in.readInt();
|
||||||
|
String ascii = readString(in, nameLength, charset);
|
||||||
|
int reserved = in.readShort();
|
||||||
|
if (reserved != reservedByte) {
|
||||||
|
throw new IOException("Expected "+Integer.toHexString(reservedByte)+ "after name before Unicode name, but found: " +
|
||||||
|
Integer.toHexString(reserved));
|
||||||
|
}
|
||||||
|
int unicodeNameRecordLength = in.readInt();
|
||||||
|
String unicode = readUnicodeString(in, unicodeNameRecordLength);
|
||||||
|
return new ASCIIUnicodeStringPair(ascii, unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
|
||||||
|
*
|
||||||
|
* @param stream the inputstream to read from
|
||||||
|
* @param length number of bytes to read from stream
|
||||||
|
* @param charset the character set encoding of the bytes in the stream
|
||||||
|
* @return a java String in the supplied character set
|
||||||
|
* @throws IOException If reading from the stream fails
|
||||||
|
*/
|
||||||
|
private static String readString(InputStream stream, int length, Charset charset) throws IOException {
|
||||||
|
byte[] buffer = IOUtils.safelyAllocate(length, 20000);
|
||||||
|
int bytesRead = IOUtils.readFully(stream, buffer);
|
||||||
|
if (bytesRead != length) {
|
||||||
|
throw new IOException("Tried to read: "+length +
|
||||||
|
", but could only read: "+bytesRead);
|
||||||
|
}
|
||||||
|
return new String(buffer, 0, length, charset);
|
||||||
|
}
|
||||||
|
|
||||||
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
|
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
|
||||||
byte[] buffer = new byte[unicodeNameRecordLength];
|
byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, 20000);
|
||||||
IOUtils.readFully(in, buffer);
|
int bytesRead = IOUtils.readFully(in, buffer);
|
||||||
return new String(buffer, UTF_16LE);
|
if (bytesRead != unicodeNameRecordLength) {
|
||||||
|
|
||||||
|
}
|
||||||
|
return new String(buffer, StringUtil.UTF16LE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sometimes the offset record in the dirstream is incorrect, but the macro can still be found.
|
||||||
|
* This will try to find the the first RLEDecompressing stream that starts with "Attribute".
|
||||||
|
* This relies on some, er, heuristics, admittedly.
|
||||||
|
*
|
||||||
|
* @param is full module inputstream to read
|
||||||
|
* @return uncompressed bytes if found, <code>null</code> otherwise
|
||||||
|
* @throws IOException for a true IOException copying the is to a byte array
|
||||||
|
*/
|
||||||
|
private static byte[] findCompressedStreamWBruteForce(InputStream is) throws IOException {
|
||||||
|
//buffer to memory for multiple tries
|
||||||
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||||
|
IOUtils.copy(is, bos);
|
||||||
|
byte[] compressed = bos.toByteArray();
|
||||||
|
byte[] decompressed = null;
|
||||||
|
for (int i = 0; i < compressed.length; i++) {
|
||||||
|
if (compressed[i] == 0x01 && i < compressed.length-1) {
|
||||||
|
int w = LittleEndian.getUShort(compressed, i+1);
|
||||||
|
if (w <= 0 || (w & 0x7000) != 0x3000) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
decompressed = tryToDecompress(new ByteArrayInputStream(compressed, i, compressed.length - i));
|
||||||
|
if (decompressed != null) {
|
||||||
|
if (decompressed.length > 9) {
|
||||||
|
//this is a complete hack. The challenge is that there
|
||||||
|
//can be many 0 length or junk streams that are uncompressed
|
||||||
|
//look in the first 20 characters for "Attribute"
|
||||||
|
int firstX = Math.min(20, decompressed.length);
|
||||||
|
String start = new String(decompressed, 0, firstX, StringUtil.WIN_1252);
|
||||||
|
if (start.contains("Attribute")) {
|
||||||
|
return decompressed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return decompressed;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static byte[] tryToDecompress(InputStream is) {
|
||||||
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||||
|
try {
|
||||||
|
IOUtils.copy(new RLEDecompressingInputStream(is), bos);
|
||||||
|
} catch (IllegalArgumentException e){
|
||||||
|
return null;
|
||||||
|
} catch (IllegalStateException e) {
|
||||||
|
return null;
|
||||||
|
} catch (IOException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return bos.toByteArray();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -156,7 +156,7 @@ public class RLEDecompressingInputStream extends InputStream {
|
||||||
private int readChunk() throws IOException {
|
private int readChunk() throws IOException {
|
||||||
pos = 0;
|
pos = 0;
|
||||||
int w = readShort(in);
|
int w = readShort(in);
|
||||||
if (w == -1) {
|
if (w == -1 || w == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
int chunkSize = (w & 0x0FFF) + 1; // plus 3 bytes minus 2 for the length
|
int chunkSize = (w & 0x0FFF) + 1; // plus 3 bytes minus 2 for the length
|
||||||
|
|
|
@ -33,6 +33,7 @@ import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.apache.poi.POITestCase.assertContains;
|
import static org.apache.poi.POITestCase.assertContains;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
|
||||||
|
@ -251,6 +252,7 @@ public class TestVBAMacroReader {
|
||||||
File f = POIDataSamples.getSpreadSheetInstance().getFile("59830.xls");
|
File f = POIDataSamples.getSpreadSheetInstance().getFile("59830.xls");
|
||||||
VBAMacroReader r = new VBAMacroReader(f);
|
VBAMacroReader r = new VBAMacroReader(f);
|
||||||
Map<String, String> macros = r.readMacros();
|
Map<String, String> macros = r.readMacros();
|
||||||
|
assertEquals(29, macros.size());
|
||||||
assertNotNull(macros.get("Module20"));
|
assertNotNull(macros.get("Module20"));
|
||||||
assertContains(macros.get("Module20"), "here start of superscripting");
|
assertContains(macros.get("Module20"), "here start of superscripting");
|
||||||
r.close();
|
r.close();
|
||||||
|
@ -261,6 +263,7 @@ public class TestVBAMacroReader {
|
||||||
File f = POIDataSamples.getSpreadSheetInstance().getFile("59858.xls");
|
File f = POIDataSamples.getSpreadSheetInstance().getFile("59858.xls");
|
||||||
VBAMacroReader r = new VBAMacroReader(f);
|
VBAMacroReader r = new VBAMacroReader(f);
|
||||||
Map<String, String> macros = r.readMacros();
|
Map<String, String> macros = r.readMacros();
|
||||||
|
assertEquals(11, macros.size());
|
||||||
assertNotNull(macros.get("Sheet4"));
|
assertNotNull(macros.get("Sheet4"));
|
||||||
assertContains(macros.get("Sheet4"), "intentional constituent");
|
assertContains(macros.get("Sheet4"), "intentional constituent");
|
||||||
r.close();
|
r.close();
|
||||||
|
@ -271,6 +274,7 @@ public class TestVBAMacroReader {
|
||||||
File f = POIDataSamples.getDocumentInstance().getFile("60158.docm");
|
File f = POIDataSamples.getDocumentInstance().getFile("60158.docm");
|
||||||
VBAMacroReader r = new VBAMacroReader(f);
|
VBAMacroReader r = new VBAMacroReader(f);
|
||||||
Map<String, String> macros = r.readMacros();
|
Map<String, String> macros = r.readMacros();
|
||||||
|
assertEquals(2, macros.size());
|
||||||
assertNotNull(macros.get("NewMacros"));
|
assertNotNull(macros.get("NewMacros"));
|
||||||
assertContains(macros.get("NewMacros"), "' dirty");
|
assertContains(macros.get("NewMacros"), "' dirty");
|
||||||
r.close();
|
r.close();
|
||||||
|
@ -282,8 +286,24 @@ public class TestVBAMacroReader {
|
||||||
File f = POIDataSamples.getSpreadSheetInstance().getFile("60273.xls");
|
File f = POIDataSamples.getSpreadSheetInstance().getFile("60273.xls");
|
||||||
VBAMacroReader r = new VBAMacroReader(f);
|
VBAMacroReader r = new VBAMacroReader(f);
|
||||||
Map<String, String> macros = r.readMacros();
|
Map<String, String> macros = r.readMacros();
|
||||||
|
assertEquals(2, macros.size());
|
||||||
assertNotNull(macros.get("Module1"));
|
assertNotNull(macros.get("Module1"));
|
||||||
assertContains(macros.get("Module1"), "9/8/2004");
|
assertContains(macros.get("Module1"), "9/8/2004");
|
||||||
r.close();
|
r.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bug60279() throws IOException {
|
||||||
|
File f = POIDataSamples.getDocumentInstance().getFile("60279.doc");
|
||||||
|
VBAMacroReader r = new VBAMacroReader(f);
|
||||||
|
Map<String, String> macros = r.readMacros();
|
||||||
|
assertEquals(1, macros.size());
|
||||||
|
String content = macros.get("ThisDocument");
|
||||||
|
assertContains(content, "Attribute VB_Base = \"1Normal.ThisDocument\"");
|
||||||
|
assertContains(content, "Attribute VB_Customizable = True");
|
||||||
|
r.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue