Apply patch from bug 61096: Add support for modules in VBAMacroReader

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1811383 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2017-10-07 06:11:12 +00:00
parent fbdf4dca68
commit 2587ed1849
3 changed files with 169 additions and 98 deletions

View File

@ -0,0 +1,46 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.macros;
/**
* Representation of Macro module in office file.
*/
public interface Module {
/**
* Type of macro module
*/
public enum ModuleType {
Document,
Module,
Class
}
/**
* Get the module content.
*
* @return the module content
*/
public String getContent();
/**
* Get the module type.
*
* @return the module type
*/
public ModuleType geModuleType();
}

View File

@ -20,15 +20,8 @@ package org.apache.poi.poifs.macros;
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
@ -41,6 +34,7 @@ import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.macros.Module.ModuleType;
import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.IOUtils;
@ -115,6 +109,20 @@ public class VBAMacroReader implements Closeable {
fs = null;
}
public Map<String, Module> readMacroModules() throws IOException {
final ModuleMap modules = new ModuleMap();
findMacros(fs.getRoot(), modules);
findProjectProperties(fs.getRoot(), modules);
Map<String, Module> moduleSources = new HashMap<>();
for (Map.Entry<String, ModuleImpl> entry : modules.entrySet()) {
ModuleImpl module = entry.getValue();
module.charset = modules.charset;
moduleSources.put(entry.getKey(), module);
}
return moduleSources;
}
/**
* Reads all macros from all modules of the opened office file.
* @return All the macros and their contents
@ -122,30 +130,33 @@ public class VBAMacroReader implements Closeable {
* @since 3.15-beta2
*/
public Map<String, String> readMacros() throws IOException {
final ModuleMap modules = new ModuleMap();
findMacros(fs.getRoot(), modules);
Map<String, Module> modules = readMacroModules();
Map<String, String> moduleSources = new HashMap<>();
for (Map.Entry<String, Module> entry : modules.entrySet()) {
Module module = entry.getValue();
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
}
moduleSources.put(entry.getKey(), entry.getValue().getContent());
}
return moduleSources;
}
protected static class Module {
protected static class ModuleImpl implements Module {
Integer offset;
byte[] buf;
ModuleType moduleType;
Charset charset;
void read(InputStream in) throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(in, out);
out.close();
buf = out.toByteArray();
}
public String getContent() {
return new String(buf, charset);
}
public ModuleType geModuleType() {
return moduleType;
}
}
protected static class ModuleMap extends HashMap<String, Module> {
protected static class ModuleMap extends HashMap<String, ModuleImpl> {
Charset charset = StringUtil.WIN_1252; // default charset
}
@ -189,10 +200,10 @@ public class VBAMacroReader implements Closeable {
*/
private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
int moduleOffset = in.readInt();
Module module = modules.get(streamName);
ModuleImpl module = modules.get(streamName);
if (module == null) {
// First time we've seen the module. Add it to the ModuleMap and decompress it later
module = new Module();
module = new ModuleImpl();
module.offset = moduleOffset;
modules.put(streamName, module);
// Would adding module.read(in) here be correct?
@ -207,17 +218,14 @@ public class VBAMacroReader implements Closeable {
}
private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException {
Module module = modules.get(name);
ModuleImpl module = modules.get(name);
// TODO Refactor this to fetch dir then do the rest
if (module == null) {
// no DIR stream with offsets yet, so store the compressed bytes for later
module = new Module();
module = new ModuleImpl();
modules.put(name, module);
InputStream dis = new DocumentInputStream(documentNode);
try {
try (InputStream dis = new DocumentInputStream(documentNode)) {
module.read(dis);
} finally {
dis.close();
}
} else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
@ -238,8 +246,7 @@ public class VBAMacroReader implements Closeable {
decompressed = new RLEDecompressingInputStream(compressed);
module.read(decompressed);
return;
} catch (IllegalArgumentException e) {
} catch (IllegalStateException e) {
} catch (IllegalArgumentException | IllegalStateException e) {
} finally {
IOUtils.closeQuietly(compressed);
IOUtils.closeQuietly(decompressed);
@ -247,7 +254,7 @@ public class VBAMacroReader implements Closeable {
//bad module.offset, try brute force
compressed = new DocumentInputStream(documentNode);
byte[] decompressedBytes = null;
byte[] decompressedBytes;
try {
decompressedBytes = findCompressedStreamWBruteForce(compressed);
} finally {
@ -320,6 +327,23 @@ public class VBAMacroReader implements Closeable {
}
}
protected void findProjectProperties(DirectoryNode node, ModuleMap modules) throws IOException {
for (Entry entry : node) {
if ("project".equalsIgnoreCase(entry.getName())) {
DocumentNode document = (DocumentNode)entry;
DocumentInputStream dis = new DocumentInputStream(document);
readProjectProperties(dis, modules);
} else {
for (Entry child : node) {
if (child instanceof DirectoryNode) {
findProjectProperties((DirectoryNode)child, modules);
}
}
}
}
}
private enum RecordType {
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
MODULE_OFFSET(0x0031),
@ -419,14 +443,12 @@ public class VBAMacroReader implements Closeable {
private void processDirStream(Entry dir, ModuleMap modules) throws IOException {
DocumentNode dirDocumentNode = (DocumentNode)dir;
DocumentInputStream dis = new DocumentInputStream(dirDocumentNode);
DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD;
try {
RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
try (DocumentInputStream dis = new DocumentInputStream(dirDocumentNode)) {
String streamName = null;
int recordId = 0;
boolean inReferenceTwiddled = false;
try {
try (RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis)) {
while (true) {
recordId = in.readShort();
if (recordId == -1) {
@ -465,7 +487,7 @@ public class VBAMacroReader implements Closeable {
}
readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
break;
case MODULE_DOC_STRING :
case MODULE_DOC_STRING:
int modDocStringLength = in.readInt();
readString(in, modDocStringLength, modules.charset);
int modDocStringReserved = in.readShort();
@ -520,11 +542,7 @@ public class VBAMacroReader implements Closeable {
throw new IOException(
"Error occurred while reading macros at section id "
+ recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
} finally {
in.close();
}
} finally {
dis.close();
}
}
@ -561,6 +579,37 @@ public class VBAMacroReader implements Closeable {
return new String(buffer, 0, length, charset);
}
protected void readProjectProperties(DocumentInputStream dis, ModuleMap modules) throws IOException {
InputStreamReader reader = new InputStreamReader(dis, modules.charset);
StringBuilder builder = new StringBuilder();
char[] buffer = new char[512];
int read;
while ((read = reader.read(buffer)) >= 0) {
builder.append(buffer, 0, read);
}
String properties = builder.toString();
for (String line : properties.split("\r\n|\n\r")) {
if (!line.startsWith("[")) {
String[] tokens = line.split("=");
if (tokens.length > 1 && tokens[1].length() > 1 && tokens[1].startsWith("\"")) {
// Remove any double qouates
tokens[1] = tokens[1].substring(1, tokens[1].length() - 2);
}
if ("Document".equals(tokens[0])) {
String mn = tokens[1].substring(0, tokens[1].indexOf("/&H"));
ModuleImpl module = modules.get(mn);
module.moduleType = ModuleType.Document;
} else if ("Module".equals(tokens[0])) {
ModuleImpl module = modules.get(tokens[1]);
module.moduleType = ModuleType.Module;
} else if ("Class".equals(tokens[0])) {
ModuleImpl module = modules.get(tokens[1]);
module.moduleType = ModuleType.Class;
}
}
}
}
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, 20000);
int bytesRead = IOUtils.readFully(in, buffer);
@ -613,11 +662,7 @@ public class VBAMacroReader implements Closeable {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
IOUtils.copy(new RLEDecompressingInputStream(is), bos);
} catch (IllegalArgumentException e){
return null;
} catch (IllegalStateException e) {
return null;
} catch (IOException e) {
} catch (IllegalArgumentException | IOException | IllegalStateException e){
return null;
}
return bos.toByteArray();

View File

@ -24,6 +24,11 @@ import org.apache.poi.util.StringUtil;
import org.junit.Ignore;
import org.junit.Test;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@ -32,11 +37,6 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
public class TestVBAMacroReader {
private static final Map<POIDataSamples, String> expectedMacroContents;
@ -44,11 +44,8 @@ public class TestVBAMacroReader {
File macro = poiDataSamples.getFile("SimpleMacro.vba");
final byte[] bytes;
try {
FileInputStream stream = new FileInputStream(macro);
try {
try (FileInputStream stream = new FileInputStream(macro)) {
bytes = IOUtils.toByteArray(stream);
} finally {
stream.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
@ -79,140 +76,125 @@ public class TestVBAMacroReader {
//////////////////////////////// From Stream /////////////////////////////
@Test
public void HSSFfromStream() throws Exception {
public void HSSFFromStream() throws Exception {
fromStream(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xls");
}
@Test
public void XSSFfromStream() throws Exception {
public void XSSFFromStream() throws Exception {
fromStream(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm");
}
@Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" +
"for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant")
@Test
public void HSLFfromStream() throws Exception {
public void HSLFFromStream() throws Exception {
fromStream(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt");
}
@Test
public void XSLFfromStream() throws Exception {
public void XSLFFromStream() throws Exception {
fromStream(POIDataSamples.getSlideShowInstance(), "SimpleMacro.pptm");
}
@Test
public void HWPFfromStream() throws Exception {
public void HWPFFromStream() throws Exception {
fromStream(POIDataSamples.getDocumentInstance(), "SimpleMacro.doc");
}
@Test
public void XWPFfromStream() throws Exception {
public void XWPFFromStream() throws Exception {
fromStream(POIDataSamples.getDocumentInstance(), "SimpleMacro.docm");
}
@Ignore("Found 0 macros")
@Test
public void HDGFfromStream() throws Exception {
public void HDGFFromStream() throws Exception {
fromStream(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsd");
}
@Test
public void XDGFfromStream() throws Exception {
public void XDGFFromStream() throws Exception {
fromStream(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsdm");
}
//////////////////////////////// From File /////////////////////////////
@Test
public void HSSFfromFile() throws Exception {
public void HSSFFromFile() throws Exception {
fromFile(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xls");
}
@Test
public void XSSFfromFile() throws Exception {
public void XSSFFromFile() throws Exception {
fromFile(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm");
}
@Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" +
"for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant")
@Test
public void HSLFfromFile() throws Exception {
public void HSLFFromFile() throws Exception {
fromFile(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt");
}
@Test
public void XSLFfromFile() throws Exception {
public void XSLFFromFile() throws Exception {
fromFile(POIDataSamples.getSlideShowInstance(), "SimpleMacro.pptm");
}
@Test
public void HWPFfromFile() throws Exception {
public void HWPFFromFile() throws Exception {
fromFile(POIDataSamples.getDocumentInstance(), "SimpleMacro.doc");
}
@Test
public void XWPFfromFile() throws Exception {
public void XWPFFromFile() throws Exception {
fromFile(POIDataSamples.getDocumentInstance(), "SimpleMacro.docm");
}
@Ignore("Found 0 macros")
@Test
public void HDGFfromFile() throws Exception {
public void HDGFFromFile() throws Exception {
fromFile(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsd");
}
@Test
public void XDGFfromFile() throws Exception {
public void XDGFFromFile() throws Exception {
fromFile(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsdm");
}
//////////////////////////////// From NPOIFS /////////////////////////////
@Test
public void HSSFfromNPOIFS() throws Exception {
public void HSSFFromNPOIFS() throws Exception {
fromNPOIFS(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xls");
}
@Ignore("bug 59302: Found 0 macros")
@Test
public void HSLFfromNPOIFS() throws Exception {
public void HSLFFromNPOIFS() throws Exception {
fromNPOIFS(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt");
}
@Test
public void HWPFfromNPOIFS() throws Exception {
public void HWPFFromNPOIFS() throws Exception {
fromNPOIFS(POIDataSamples.getDocumentInstance(), "SimpleMacro.doc");
}
@Ignore("Found 0 macros")
@Test
public void HDGFfromNPOIFS() throws Exception {
public void HDGFFromNPOIFS() throws Exception {
fromNPOIFS(POIDataSamples.getDiagramInstance(), "SimpleMacro.vsd");
}
protected void fromFile(POIDataSamples dataSamples, String filename) throws IOException {
File f = dataSamples.getFile(filename);
VBAMacroReader r = new VBAMacroReader(f);
try {
try (VBAMacroReader r = new VBAMacroReader(f)) {
assertMacroContents(dataSamples, r);
} finally {
r.close();
}
}
protected void fromStream(POIDataSamples dataSamples, String filename) throws IOException {
InputStream fis = dataSamples.openResourceAsStream(filename);
try {
VBAMacroReader r = new VBAMacroReader(fis);
try {
try (InputStream fis = dataSamples.openResourceAsStream(filename)) {
try (VBAMacroReader r = new VBAMacroReader(fis)) {
assertMacroContents(dataSamples, r);
} finally {
r.close();
}
} finally {
fis.close();
}
}
protected void fromNPOIFS(POIDataSamples dataSamples, String filename) throws IOException {
File f = dataSamples.getFile(filename);
NPOIFSFileSystem fs = new NPOIFSFileSystem(f);
try {
VBAMacroReader r = new VBAMacroReader(fs);
try {
try (NPOIFSFileSystem fs = new NPOIFSFileSystem(f)) {
try (VBAMacroReader r = new VBAMacroReader(fs)) {
assertMacroContents(dataSamples, r);
} finally {
r.close();
}
} finally {
fs.close();
}
}
protected void assertMacroContents(POIDataSamples samples, VBAMacroReader r) throws IOException {
assertNotNull(r);
Map<String,String> contents = r.readMacros();
Map<String,Module> contents = r.readMacroModules();
assertNotNull(contents);
assertFalse("Found 0 macros", contents.isEmpty());
/*
@ -235,16 +217,17 @@ public class TestVBAMacroReader {
// Check the script one
assertContains(contents, "Module1");
String content = contents.get("Module1");
assertNotNull(content);
Module module = contents.get("Module1");
assertNotNull(module);
String content = module.getContent();
assertContains(content, "Attribute VB_Name = \"Module1\"");
//assertContains(content, "Attribute TestMacro.VB_Description = \"This is a test macro\"");
assertEquals(Module.ModuleType.Module, module.geModuleType());
// And the macro itself
String testMacroNoSub = expectedMacroContents.get(samples);
assertContains(content, testMacroNoSub);
}
@Test
public void bug59830() throws IOException {
@ -292,7 +275,6 @@ public class TestVBAMacroReader {
r.close();
}
@Test
public void bug60279() throws IOException {
File f = POIDataSamples.getDocumentInstance().getFile("60279.doc");
@ -304,6 +286,4 @@ public class TestVBAMacroReader {
assertContains(content, "Attribute VB_Customizable = True");
r.close();
}
}