mirror of https://github.com/apache/poi.git
BUG-59302 --add minimal support for VBAMacro extraction to HSLF; credit to Andreas Beeker for this patch. Problems are mine.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1765696 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2df4509dc5
commit
9eef74edd1
|
@ -46,8 +46,15 @@ import org.apache.poi.util.IOUtils;
|
|||
import org.apache.poi.util.RLEDecompressingInputStream;
|
||||
|
||||
/**
|
||||
* Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
|
||||
* <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
|
||||
* and returns them.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>NOTE:</b> This does not read macros from .ppt files.
|
||||
* See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF() in the scratchpad
|
||||
* module for an example of how to do this. Patches that make macro
|
||||
* extraction from .ppt more elegant are welcomed!
|
||||
* </p>
|
||||
*
|
||||
* @since 3.15-beta2
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hslf.record;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
/**
|
||||
* A container record that specifies information about the document and document display settings.
|
||||
*/
|
||||
public final class DocInfoListContainer extends RecordContainer {
|
||||
private byte[] _header;
|
||||
private static long _type = RecordTypes.List.typeID;
|
||||
|
||||
// Links to our more interesting children
|
||||
|
||||
/**
|
||||
* Set things up, and find our more interesting children
|
||||
*/
|
||||
protected DocInfoListContainer(byte[] source, int start, int len) {
|
||||
// Grab the header
|
||||
_header = new byte[8];
|
||||
System.arraycopy(source,start,_header,0,8);
|
||||
|
||||
// Find our children
|
||||
_children = Record.findChildRecords(source,start+8,len-8);
|
||||
findInterestingChildren();
|
||||
}
|
||||
|
||||
/**
|
||||
* Go through our child records, picking out the ones that are
|
||||
* interesting, and saving those for use by the easy helper
|
||||
* methods.
|
||||
*/
|
||||
private void findInterestingChildren() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new DocInfoListContainer, with blank fields - not yet supported
|
||||
*/
|
||||
private DocInfoListContainer() {
|
||||
_header = new byte[8];
|
||||
_children = new Record[0];
|
||||
|
||||
// Setup our header block
|
||||
_header[0] = 0x0f; // We are a container record
|
||||
LittleEndian.putShort(_header, 2, (short)_type);
|
||||
|
||||
// Setup our child records
|
||||
findInterestingChildren();
|
||||
}
|
||||
|
||||
/**
|
||||
* We are of type 0x7D0
|
||||
*/
|
||||
public long getRecordType() { return _type; }
|
||||
|
||||
/**
|
||||
* Write the contents of the record back, so it can be written
|
||||
* to disk
|
||||
*/
|
||||
public void writeOut(OutputStream out) throws IOException {
|
||||
writeOut(_header[0],_header[1],_type,_children,out);
|
||||
}
|
||||
|
||||
}
|
|
@ -143,6 +143,15 @@ public class ExOleObjStg extends RecordAtom implements PositionDependentRecord,
|
|||
return RecordTypes.ExOleObjStg.typeID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the record instance from the header
|
||||
*
|
||||
* @return record instance
|
||||
*/
|
||||
public int getRecordInstance() {
|
||||
return (LittleEndian.getUShort(_header, 0) >>> 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the contents of the record back, so it can be written
|
||||
* to disk.
|
||||
|
|
|
@ -47,8 +47,8 @@ public enum RecordTypes {
|
|||
ViewInfo(1020,null),
|
||||
ViewInfoAtom(1021,null),
|
||||
SlideViewInfoAtom(1022,null),
|
||||
VBAInfo(1023,null),
|
||||
VBAInfoAtom(1024,null),
|
||||
VBAInfo(1023,VBAInfoContainer.class),
|
||||
VBAInfoAtom(1024,VBAInfoAtom.class),
|
||||
SSDocInfoAtom(1025,null),
|
||||
Summary(1026,null),
|
||||
DocRoutingSlip(1030,null),
|
||||
|
@ -63,7 +63,7 @@ public enum RecordTypes {
|
|||
NamedShowSlides(1042,null),
|
||||
SheetProperties(1044,null),
|
||||
RoundTripCustomTableStyles12Atom(1064,null),
|
||||
List(2000,null),
|
||||
List(2000,DocInfoListContainer.class),
|
||||
FontCollection(2005,FontCollection.class),
|
||||
BookmarkCollection(2019,null),
|
||||
SoundCollection(2020,SoundCollection.class),
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hslf.record;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
/**
|
||||
* An atom record that specifies a reference to the VBA project storage.
|
||||
*/
|
||||
public final class VBAInfoAtom extends RecordAtom {
|
||||
private static final long _type = RecordTypes.VBAInfoAtom.typeID;
|
||||
|
||||
/**
|
||||
* Record header.
|
||||
*/
|
||||
private byte[] _header;
|
||||
|
||||
/**
|
||||
* Record data.
|
||||
*/
|
||||
private long persistIdRef;
|
||||
private boolean hasMacros;
|
||||
private long version;
|
||||
|
||||
/**
|
||||
* Constructs an empty atom - not yet supported
|
||||
*/
|
||||
private VBAInfoAtom() {
|
||||
_header = new byte[8];
|
||||
// TODO: fix me
|
||||
LittleEndian.putUInt(_header, 0, _type);
|
||||
persistIdRef = 0;
|
||||
hasMacros = true;
|
||||
version = 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs the vba atom record from its source data.
|
||||
*
|
||||
* @param source the source data as a byte array.
|
||||
* @param start the start offset into the byte array.
|
||||
* @param len the length of the slice in the byte array.
|
||||
*/
|
||||
public VBAInfoAtom(byte[] source, int start, int len) {
|
||||
// Get the header.
|
||||
_header = new byte[8];
|
||||
System.arraycopy(source,start,_header,0,8);
|
||||
|
||||
// Get the record data.
|
||||
persistIdRef = LittleEndian.getUInt(source, start+8);
|
||||
hasMacros = (LittleEndian.getUInt(source, start+12) == 1);
|
||||
version = LittleEndian.getUInt(source, start+16);
|
||||
}
|
||||
/**
|
||||
* Gets the record type.
|
||||
* @return the record type.
|
||||
*/
|
||||
public long getRecordType() { return _type; }
|
||||
|
||||
/**
|
||||
* Write the contents of the record back, so it can be written
|
||||
* to disk
|
||||
*
|
||||
* @param out the output stream to write to.
|
||||
* @throws java.io.IOException if an error occurs.
|
||||
*/
|
||||
public void writeOut(OutputStream out) throws IOException {
|
||||
out.write(_header);
|
||||
LittleEndian.putUInt(persistIdRef, out);
|
||||
LittleEndian.putUInt(hasMacros ? 1 : 0, out);
|
||||
LittleEndian.putUInt(version, out);
|
||||
}
|
||||
|
||||
public long getPersistIdRef() {
|
||||
return persistIdRef;
|
||||
}
|
||||
|
||||
public void setPersistIdRef(long persistIdRef) {
|
||||
this.persistIdRef = persistIdRef;
|
||||
}
|
||||
|
||||
public boolean isHasMacros() {
|
||||
return hasMacros;
|
||||
}
|
||||
|
||||
public void setHasMacros(boolean hasMacros) {
|
||||
this.hasMacros = hasMacros;
|
||||
}
|
||||
|
||||
public long getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(long version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hslf.record;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
/**
|
||||
* A container record that specifies VBA information for the document.
|
||||
*/
|
||||
public final class VBAInfoContainer extends RecordContainer {
|
||||
private byte[] _header;
|
||||
private static long _type = RecordTypes.VBAInfo.typeID;
|
||||
|
||||
// Links to our more interesting children
|
||||
|
||||
/**
|
||||
* Set things up, and find our more interesting children
|
||||
*/
|
||||
protected VBAInfoContainer(byte[] source, int start, int len) {
|
||||
// Grab the header
|
||||
_header = new byte[8];
|
||||
System.arraycopy(source, start, _header, 0, 8);
|
||||
|
||||
// Find our children
|
||||
_children = Record.findChildRecords(source, start + 8, len - 8);
|
||||
|
||||
findInterestingChildren();
|
||||
}
|
||||
|
||||
/**
|
||||
* Go through our child records, picking out the ones that are
|
||||
* interesting, and saving those for use by the easy helper
|
||||
* methods.
|
||||
*/
|
||||
private void findInterestingChildren() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new VBAInfoContainer, with blank fields - not yet supported
|
||||
*/
|
||||
private VBAInfoContainer() {
|
||||
_header = new byte[8];
|
||||
_children = new Record[0];
|
||||
|
||||
// Setup our header block
|
||||
_header[0] = 0x0f; // We are a container record
|
||||
LittleEndian.putShort(_header, 2, (short) _type);
|
||||
|
||||
// Setup our child records
|
||||
findInterestingChildren();
|
||||
}
|
||||
|
||||
/**
|
||||
* We are of type 0x3FF
|
||||
*/
|
||||
public long getRecordType() {
|
||||
return _type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the contents of the record back, so it can be written
|
||||
* to disk
|
||||
*/
|
||||
public void writeOut(OutputStream out) throws IOException {
|
||||
writeOut(_header[0], _header[1], _type, _children, out);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.poi.hslf.usermodel;
|
||||
|
||||
import static org.apache.poi.POITestCase.assertContains;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
@ -30,7 +31,9 @@ import java.awt.geom.Path2D;
|
|||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.text.AttributedCharacterIterator;
|
||||
|
@ -43,6 +46,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.ddf.AbstractEscherOptRecord;
|
||||
import org.apache.poi.ddf.EscherArrayProperty;
|
||||
import org.apache.poi.ddf.EscherColorRef;
|
||||
|
@ -51,12 +55,18 @@ import org.apache.poi.hslf.HSLFTestDataSamples;
|
|||
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
|
||||
import org.apache.poi.hslf.extractor.PowerPointExtractor;
|
||||
import org.apache.poi.hslf.model.HeadersFooters;
|
||||
import org.apache.poi.hslf.record.DocInfoListContainer;
|
||||
import org.apache.poi.hslf.record.Document;
|
||||
import org.apache.poi.hslf.record.Record;
|
||||
import org.apache.poi.hslf.record.RecordTypes;
|
||||
import org.apache.poi.hslf.record.SlideListWithText;
|
||||
import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet;
|
||||
import org.apache.poi.hslf.record.TextHeaderAtom;
|
||||
import org.apache.poi.hslf.record.VBAInfoAtom;
|
||||
import org.apache.poi.hslf.record.VBAInfoContainer;
|
||||
import org.apache.poi.hssf.usermodel.DummyGraphics2d;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.macros.VBAMacroReader;
|
||||
import org.apache.poi.sl.draw.DrawFactory;
|
||||
import org.apache.poi.sl.draw.DrawPaint;
|
||||
import org.apache.poi.sl.draw.DrawTextParagraph;
|
||||
|
@ -72,6 +82,7 @@ import org.apache.poi.sl.usermodel.TextBox;
|
|||
import org.apache.poi.sl.usermodel.TextParagraph;
|
||||
import org.apache.poi.sl.usermodel.TextParagraph.TextAlign;
|
||||
import org.apache.poi.sl.usermodel.TextRun;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.StringUtil;
|
||||
import org.apache.poi.util.Units;
|
||||
|
@ -948,4 +959,51 @@ public final class TestBugs {
|
|||
|
||||
ppt2.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bug59302() throws IOException {
|
||||
//add extraction from PPT
|
||||
Map<String, String> macros = getMacrosFromHSLF("59302.ppt");
|
||||
assertNotNull("couldn't find macros", macros);
|
||||
assertNotNull("couldn't find second module", macros.get("Module2"));
|
||||
assertContains(macros.get("Module2"), "newMacro in Module2");
|
||||
|
||||
assertNotNull("couldn't find first module", macros.get("Module1"));
|
||||
assertContains(macros.get("Module1"), "Italicize");
|
||||
|
||||
macros = getMacrosFromHSLF("SimpleMacro.ppt");
|
||||
assertNotNull(macros.get("Module1"));
|
||||
assertContains(macros.get("Module1"), "This is a macro slideshow");
|
||||
}
|
||||
|
||||
//It isn't pretty, but it works...
|
||||
private Map<String, String> getMacrosFromHSLF(String fileName) throws IOException {
|
||||
InputStream is = null;
|
||||
NPOIFSFileSystem npoifs = null;
|
||||
try {
|
||||
is = new FileInputStream(POIDataSamples.getSlideShowInstance().getFile(fileName));
|
||||
npoifs = new NPOIFSFileSystem(is);
|
||||
//TODO: should we run the VBAMacroReader on this npoifs?
|
||||
//TBD: We know that ppt typically don't store macros in the regular place,
|
||||
//but _can_ they?
|
||||
|
||||
HSLFSlideShow ppt = new HSLFSlideShow(npoifs);
|
||||
|
||||
//get macro persist id
|
||||
DocInfoListContainer list = (DocInfoListContainer)ppt.getDocumentRecord().findFirstOfType(RecordTypes.List.typeID);
|
||||
VBAInfoContainer vbaInfo = (VBAInfoContainer)list.findFirstOfType(RecordTypes.VBAInfo.typeID);
|
||||
VBAInfoAtom vbaAtom = (VBAInfoAtom)vbaInfo.findFirstOfType(RecordTypes.VBAInfoAtom.typeID);
|
||||
long persistId = vbaAtom.getPersistIdRef();
|
||||
for (HSLFObjectData objData : ppt.getEmbeddedObjects()) {
|
||||
if (objData.getExOleObjStg().getPersistId() == persistId) {
|
||||
return new VBAMacroReader(objData.getData()).readMacros();
|
||||
}
|
||||
}
|
||||
|
||||
} finally {
|
||||
IOUtils.closeQuietly(npoifs);
|
||||
IOUtils.closeQuietly(is);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,7 +87,8 @@ public class TestVBAMacroReader {
|
|||
public void XSSFfromStream() throws Exception {
|
||||
fromStream(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm");
|
||||
}
|
||||
@Ignore("bug 59302: Found 0 macros")
|
||||
@Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" +
|
||||
"for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant")
|
||||
@Test
|
||||
public void HSLFfromStream() throws Exception {
|
||||
fromStream(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt");
|
||||
|
@ -123,7 +124,8 @@ public class TestVBAMacroReader {
|
|||
public void XSSFfromFile() throws Exception {
|
||||
fromFile(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm");
|
||||
}
|
||||
@Ignore("bug 59302: Found 0 macros")
|
||||
@Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" +
|
||||
"for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant")
|
||||
@Test
|
||||
public void HSLFfromFile() throws Exception {
|
||||
fromFile(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt");
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue