Unit test for VBA macro reading #52949

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738427 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-04-10 12:45:53 +00:00
parent 74c03a3f7b
commit f5091846ff
6 changed files with 251 additions and 109 deletions

View File

@ -19,6 +19,7 @@ package org.apache.poi.poifs.macros;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@ -30,10 +31,10 @@ import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.IOUtils;
@ -43,8 +44,9 @@ import org.apache.poi.util.RLEDecompressingInputStream;
* Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
* and returns them
*/
public class VBAMacroReader {
protected static final String VBA_PROJECT = "xl/vbaProject.bin";
public class VBAMacroReader implements Closeable {
protected static final String VBA_PROJECT_OOXML = "xl/vbaProject.bin";
protected static final String VBA_PROJECT_POIFS = "VBA";
private NPOIFSFileSystem fs;
@ -55,7 +57,6 @@ public class VBAMacroReader {
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
fs = new NPOIFSFileSystem(stream);
} else {
stream.unread(header8);
openOOXML(stream);
}
}
@ -75,44 +76,78 @@ public class VBAMacroReader {
ZipInputStream zis = new ZipInputStream(zipFile);
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
if (VBA_PROJECT.equals(zipEntry.getName())) {
if (VBA_PROJECT_OOXML.equals(zipEntry.getName())) {
try {
// Make a NPOIFS from the contents, and close the stream
this.fs = new NPOIFSFileSystem(zis);
} finally {
zis.closeEntry();
}
zis.close();
return;
} catch (IOException e) {
// Tidy up
zis.close();
// Pass on
throw e;
}
}
}
zis.close();
throw new IllegalArgumentException("No VBA project found");
}
public void close() throws IOException {
fs.close();
fs = null;
}
/**
* Reads all macros from all modules of the opened office file.
* @return All the macros and their contents
*/
public Map<String, String> readMacros() throws IOException {
class Module {
final ModuleMap modules = new ModuleMap();
findMacros(fs.getRoot(), modules);
Map<String, String> moduleSources = new HashMap<String, String>();
for (Map.Entry<String, Module> entry : modules.entrySet()) {
Module module = entry.getValue();
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
}
}
return moduleSources;
}
protected static class Module {
Integer offset;
byte[] buf;
}
class ModuleMap extends HashMap<String, Module> {
protected static class ModuleMap extends HashMap<String, Module> {
Charset charset = Charset.forName("Cp1252"); // default charset
}
try {
final ModuleMap modules = new ModuleMap();
POIFSReader dirReader = new POIFSReader();
dirReader.registerListener(new POIFSReaderListener() {
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
try {
String name = event.getName();
if (event.getPath().toString().endsWith("\\VBA")) {
protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException {
if (VBA_PROJECT_POIFS.equals(dir.getName())) {
// VBA project directory, process
readMacros(dir, modules);
} else {
// Check children
for (Entry child : dir) {
if (child instanceof DirectoryNode) {
findMacros((DirectoryNode)child, modules);
}
}
}
}
protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
for (Entry entry : macroDir) {
if (! (entry instanceof DocumentNode)) { continue; }
String name = entry.getName();
DocumentNode document = (DocumentNode)entry;
DocumentInputStream dis = new DocumentInputStream(document);
if ("dir".equals(name)) {
// process DIR
RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());
RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
String streamName = null;
while (true) {
int id = in.readShort();
@ -155,20 +190,21 @@ public class VBAMacroReader {
break;
}
}
in.close();
} else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
Module module = modules.get(name);
final DocumentInputStream stream = event.getStream();
final InputStream in;
// TODO Refactor this to fetch dir then do the rest
if (module == null) {
// no DIR stream with offsets yet, so store the compressed bytes for later
module = new Module();
modules.put(name, module);
in = stream;
in = dis;
} else {
// we know the offset already, so decompress immediately on-the-fly
stream.skip(module.offset);
in = new RLEDecompressingInputStream(stream);
dis.skip(module.offset);
in = new RLEDecompressingInputStream(dis);
}
final ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(in, out);
@ -177,23 +213,5 @@ public class VBAMacroReader {
module.buf = out.toByteArray();
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
dirReader.read(null); // TODO
Map<String, String> moduleSources = new HashMap<String, String>();
for (Map.Entry<String, Module> entry : modules.entrySet()) {
Module module = entry.getValue();
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
}
}
return moduleSources;
} catch (IOException e) {
e.printStackTrace();
throw e;
}
}
}

View File

@ -19,6 +19,7 @@ package org.apache.poi.poifs;
import org.apache.poi.poifs.eventfilesystem.TestPOIFSReaderRegistry;
import org.apache.poi.poifs.filesystem.AllPOIFSFileSystemTests;
import org.apache.poi.poifs.macros.TestVBAMacroReader;
import org.apache.poi.poifs.nio.TestDataSource;
import org.apache.poi.poifs.property.AllPOIFSPropertyTests;
import org.apache.poi.poifs.storage.AllPOIFSStorageTests;
@ -32,6 +33,7 @@ import org.junit.runners.Suite;
@Suite.SuiteClasses({
TestPOIFSReaderRegistry.class
, TestDataSource.class
, TestVBAMacroReader.class
, AllPOIFSFileSystemTests.class
, AllPOIFSPropertyTests.class
, AllPOIFSStorageTests.class

View File

@ -0,0 +1,112 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.macros;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileInputStream;
import java.util.Map;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.StringUtil;
import org.junit.Test;
public class TestVBAMacroReader {
private final String testMacroContents;
private final String testMacroNoSub;
public TestVBAMacroReader() throws Exception {
File macro = HSSFTestDataSamples.getSampleFile("SimpleMacro.vba");
testMacroContents = new String(
IOUtils.toByteArray(new FileInputStream(macro)),
StringUtil.UTF8
);
if (! testMacroContents.startsWith("Sub ")) {
throw new IllegalArgumentException("Not a macro");
}
testMacroNoSub = testMacroContents.substring(testMacroContents.indexOf("()")+3);
}
@Test
public void fromStream() throws Exception {
VBAMacroReader r;
r = new VBAMacroReader(HSSFTestDataSamples.openSampleFileStream("SimpleMacro.xls"));
assertMacroContents(r);
r.close();
r = new VBAMacroReader(HSSFTestDataSamples.openSampleFileStream("SimpleMacro.xlsm"));
assertMacroContents(r);
r.close();
}
@Test
public void fromFile() throws Exception {
VBAMacroReader r;
r = new VBAMacroReader(HSSFTestDataSamples.getSampleFile("SimpleMacro.xls"));
assertMacroContents(r);
r.close();
r = new VBAMacroReader(HSSFTestDataSamples.getSampleFile("SimpleMacro.xlsm"));
assertMacroContents(r);
r.close();
}
@Test
public void fromNPOIFS() throws Exception {
NPOIFSFileSystem fs = new NPOIFSFileSystem(
HSSFTestDataSamples.getSampleFile("SimpleMacro.xls"));
VBAMacroReader r = new VBAMacroReader(fs);
assertMacroContents(r);
r.close();
}
protected void assertMacroContents(VBAMacroReader r) throws Exception {
Map<String,String> contents = r.readMacros();
assertFalse(contents.isEmpty());
assertEquals(5, contents.size());
// Check the ones without scripts
String[] noScripts = new String[] { "ThisWorkbook",
"Sheet1", "Sheet2", "Sheet3" };
for (String entry : noScripts) {
assertTrue(entry, contents.containsKey(entry));
String content = contents.get(entry);
assertContains(content, "Attribute VB_Exposed = True");
assertContains(content, "Attribute VB_Customizable = True");
assertContains(content, "Attribute VB_TemplateDerived = False");
assertContains(content, "Attribute VB_GlobalNameSpace = False");
assertContains(content, "Attribute VB_Exposed = True");
}
// Check the script one
String content = contents.get("Module1");
assertContains(content, "Attribute VB_Name = \"Module1\"");
assertContains(content, "Attribute TestMacro.VB_Description = \"This is a test macro\"");
// And the macro itself
assertContains(content, testMacroNoSub);
}
}

View File

@ -0,0 +1,10 @@
Sub TestMacro()
'
' TestMacro Macro
' This is a test macro
'
'
ActiveCell.FormulaR1C1 = "This is a macro workbook"
Range("A2").Select
End Sub

Binary file not shown.

Binary file not shown.