Unit test for VBA macro reading #52949

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738427 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-04-10 12:45:53 +00:00
parent 74c03a3f7b
commit f5091846ff
6 changed files with 251 additions and 109 deletions

View File

@ -19,6 +19,7 @@ package org.apache.poi.poifs.macros;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@ -30,10 +31,10 @@ import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.IOUtils;
@ -43,8 +44,9 @@ import org.apache.poi.util.RLEDecompressingInputStream;
* Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
* and returns them
*/
public class VBAMacroReader {
protected static final String VBA_PROJECT = "xl/vbaProject.bin";
public class VBAMacroReader implements Closeable {
protected static final String VBA_PROJECT_OOXML = "xl/vbaProject.bin";
protected static final String VBA_PROJECT_POIFS = "VBA";
private NPOIFSFileSystem fs;
@ -55,7 +57,6 @@ public class VBAMacroReader {
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
fs = new NPOIFSFileSystem(stream);
} else {
stream.unread(header8);
openOOXML(stream);
}
}
@ -75,125 +76,142 @@ public class VBAMacroReader {
ZipInputStream zis = new ZipInputStream(zipFile);
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
if (VBA_PROJECT.equals(zipEntry.getName())) {
if (VBA_PROJECT_OOXML.equals(zipEntry.getName())) {
try {
// Make a NPOIFS from the contents, and close the stream
this.fs = new NPOIFSFileSystem(zis);
} finally {
zis.closeEntry();
return;
} catch (IOException e) {
// Tidy up
zis.close();
// Pass on
throw e;
}
zis.close();
return;
}
}
zis.close();
throw new IllegalArgumentException("No VBA project found");
}
public void close() throws IOException {
fs.close();
fs = null;
}
/**
* Reads all macros from all modules of the opened office file.
* @return All the macros and their contents
*/
public Map<String, String> readMacros() throws IOException {
class Module {
Integer offset;
byte[] buf;
}
class ModuleMap extends HashMap<String, Module> {
final ModuleMap modules = new ModuleMap();
findMacros(fs.getRoot(), modules);
Charset charset = Charset.forName("Cp1252"); // default charset
Map<String, String> moduleSources = new HashMap<String, String>();
for (Map.Entry<String, Module> entry : modules.entrySet()) {
Module module = entry.getValue();
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
}
}
try {
final ModuleMap modules = new ModuleMap();
POIFSReader dirReader = new POIFSReader();
dirReader.registerListener(new POIFSReaderListener() {
return moduleSources;
}
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
try {
String name = event.getName();
if (event.getPath().toString().endsWith("\\VBA")) {
if ("dir".equals(name)) {
// process DIR
RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());
String streamName = null;
while (true) {
int id = in.readShort();
if (id == -1 || id == 0x0010) {
break; // EOF or TERMINATOR
}
int len = in.readInt();
switch (id) {
case 0x0009: // PROJECTVERSION
in.skip(6);
break;
case 0x0003: // PROJECTCODEPAGE
int codepage = in.readShort();
modules.charset = Charset.forName("Cp" + codepage);
break;
case 0x001A: // STREAMNAME
byte[] streamNameBuf = new byte[len];
int count = in.read(streamNameBuf);
streamName = new String(streamNameBuf, 0, count, modules.charset);
break;
case 0x0031: // MODULEOFFSET
int moduleOffset = in.readInt();
Module module = modules.get(streamName);
if (module != null) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(
module.buf, moduleOffset, module.buf.length - moduleOffset));
IOUtils.copy(stream, out);
stream.close();
out.close();
module.buf = out.toByteArray();
} else {
module = new Module();
module.offset = moduleOffset;
modules.put(streamName, module);
}
break;
default:
in.skip(len);
break;
}
}
} else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
Module module = modules.get(name);
final DocumentInputStream stream = event.getStream();
final InputStream in;
if (module == null) {
// no DIR stream with offsets yet, so store the compressed bytes for later
module = new Module();
modules.put(name, module);
in = stream;
} else {
// we know the offset already, so decompress immediately on-the-fly
stream.skip(module.offset);
in = new RLEDecompressingInputStream(stream);
}
final ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(in, out);
in.close();
out.close();
module.buf = out.toByteArray();
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
dirReader.read(null); // TODO
Map<String, String> moduleSources = new HashMap<String, String>();
for (Map.Entry<String, Module> entry : modules.entrySet()) {
Module module = entry.getValue();
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
protected static class Module {
Integer offset;
byte[] buf;
}
protected static class ModuleMap extends HashMap<String, Module> {
Charset charset = Charset.forName("Cp1252"); // default charset
}
protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException {
if (VBA_PROJECT_POIFS.equals(dir.getName())) {
// VBA project directory, process
readMacros(dir, modules);
} else {
// Check children
for (Entry child : dir) {
if (child instanceof DirectoryNode) {
findMacros((DirectoryNode)child, modules);
}
}
return moduleSources;
} catch (IOException e) {
e.printStackTrace();
throw e;
}
}
protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
for (Entry entry : macroDir) {
if (! (entry instanceof DocumentNode)) { continue; }
String name = entry.getName();
DocumentNode document = (DocumentNode)entry;
DocumentInputStream dis = new DocumentInputStream(document);
if ("dir".equals(name)) {
// process DIR
RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
String streamName = null;
while (true) {
int id = in.readShort();
if (id == -1 || id == 0x0010) {
break; // EOF or TERMINATOR
}
int len = in.readInt();
switch (id) {
case 0x0009: // PROJECTVERSION
in.skip(6);
break;
case 0x0003: // PROJECTCODEPAGE
int codepage = in.readShort();
modules.charset = Charset.forName("Cp" + codepage);
break;
case 0x001A: // STREAMNAME
byte[] streamNameBuf = new byte[len];
int count = in.read(streamNameBuf);
streamName = new String(streamNameBuf, 0, count, modules.charset);
break;
case 0x0031: // MODULEOFFSET
int moduleOffset = in.readInt();
Module module = modules.get(streamName);
if (module != null) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(
module.buf, moduleOffset, module.buf.length - moduleOffset));
IOUtils.copy(stream, out);
stream.close();
out.close();
module.buf = out.toByteArray();
} else {
module = new Module();
module.offset = moduleOffset;
modules.put(streamName, module);
}
break;
default:
in.skip(len);
break;
}
}
in.close();
} else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
Module module = modules.get(name);
final InputStream in;
// TODO Refactor this to fetch dir then do the rest
if (module == null) {
// no DIR stream with offsets yet, so store the compressed bytes for later
module = new Module();
modules.put(name, module);
in = dis;
} else {
// we know the offset already, so decompress immediately on-the-fly
dis.skip(module.offset);
in = new RLEDecompressingInputStream(dis);
}
final ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(in, out);
in.close();
out.close();
module.buf = out.toByteArray();
}
}
}
}

View File

@ -19,6 +19,7 @@ package org.apache.poi.poifs;
import org.apache.poi.poifs.eventfilesystem.TestPOIFSReaderRegistry;
import org.apache.poi.poifs.filesystem.AllPOIFSFileSystemTests;
import org.apache.poi.poifs.macros.TestVBAMacroReader;
import org.apache.poi.poifs.nio.TestDataSource;
import org.apache.poi.poifs.property.AllPOIFSPropertyTests;
import org.apache.poi.poifs.storage.AllPOIFSStorageTests;
@ -32,6 +33,7 @@ import org.junit.runners.Suite;
@Suite.SuiteClasses({
TestPOIFSReaderRegistry.class
, TestDataSource.class
, TestVBAMacroReader.class
, AllPOIFSFileSystemTests.class
, AllPOIFSPropertyTests.class
, AllPOIFSStorageTests.class

View File

@ -0,0 +1,112 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.macros;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileInputStream;
import java.util.Map;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.StringUtil;
import org.junit.Test;
public class TestVBAMacroReader {
private final String testMacroContents;
private final String testMacroNoSub;
public TestVBAMacroReader() throws Exception {
File macro = HSSFTestDataSamples.getSampleFile("SimpleMacro.vba");
testMacroContents = new String(
IOUtils.toByteArray(new FileInputStream(macro)),
StringUtil.UTF8
);
if (! testMacroContents.startsWith("Sub ")) {
throw new IllegalArgumentException("Not a macro");
}
testMacroNoSub = testMacroContents.substring(testMacroContents.indexOf("()")+3);
}
@Test
public void fromStream() throws Exception {
VBAMacroReader r;
r = new VBAMacroReader(HSSFTestDataSamples.openSampleFileStream("SimpleMacro.xls"));
assertMacroContents(r);
r.close();
r = new VBAMacroReader(HSSFTestDataSamples.openSampleFileStream("SimpleMacro.xlsm"));
assertMacroContents(r);
r.close();
}
@Test
public void fromFile() throws Exception {
VBAMacroReader r;
r = new VBAMacroReader(HSSFTestDataSamples.getSampleFile("SimpleMacro.xls"));
assertMacroContents(r);
r.close();
r = new VBAMacroReader(HSSFTestDataSamples.getSampleFile("SimpleMacro.xlsm"));
assertMacroContents(r);
r.close();
}
@Test
public void fromNPOIFS() throws Exception {
NPOIFSFileSystem fs = new NPOIFSFileSystem(
HSSFTestDataSamples.getSampleFile("SimpleMacro.xls"));
VBAMacroReader r = new VBAMacroReader(fs);
assertMacroContents(r);
r.close();
}
protected void assertMacroContents(VBAMacroReader r) throws Exception {
Map<String,String> contents = r.readMacros();
assertFalse(contents.isEmpty());
assertEquals(5, contents.size());
// Check the ones without scripts
String[] noScripts = new String[] { "ThisWorkbook",
"Sheet1", "Sheet2", "Sheet3" };
for (String entry : noScripts) {
assertTrue(entry, contents.containsKey(entry));
String content = contents.get(entry);
assertContains(content, "Attribute VB_Exposed = True");
assertContains(content, "Attribute VB_Customizable = True");
assertContains(content, "Attribute VB_TemplateDerived = False");
assertContains(content, "Attribute VB_GlobalNameSpace = False");
assertContains(content, "Attribute VB_Exposed = True");
}
// Check the script one
String content = contents.get("Module1");
assertContains(content, "Attribute VB_Name = \"Module1\"");
assertContains(content, "Attribute TestMacro.VB_Description = \"This is a test macro\"");
// And the macro itself
assertContains(content, testMacroNoSub);
}
}

View File

@ -0,0 +1,10 @@
Sub TestMacro()
'
' TestMacro Macro
' This is a test macro
'
'
ActiveCell.FormulaR1C1 = "This is a macro workbook"
Range("A2").Select
End Sub

Binary file not shown.

Binary file not shown.