mirror of https://github.com/apache/poi.git
Support for getting OLE objects from HSSFWorkbook. See bug 43222 for details.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@573878 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f9bda3915e
commit
0c24b8f16c
|
@ -0,0 +1,184 @@
|
||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.poi.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A sub-record within the OBJ record which stores a reference to an object
|
||||||
|
* stored in a separate entry within the OLE2 compound file.
|
||||||
|
*
|
||||||
|
* @author Daniel Noll
|
||||||
|
*/
|
||||||
|
public class EmbeddedObjectRefSubRecord
|
||||||
|
extends SubRecord
|
||||||
|
{
|
||||||
|
public static final short sid = 0x9;
|
||||||
|
|
||||||
|
public short field_1_stream_id_offset; // Offset to stream ID from the point after this value.
|
||||||
|
public short[] field_2_unknown; // Unknown stuff at the front. TODO: Confirm that it's a short[]
|
||||||
|
// TODO: Consider making a utility class for these. I've discovered the same field ordering
|
||||||
|
// in FormatRecord and StringRecord, it may be elsewhere too.
|
||||||
|
public short field_3_unicode_len; // Length of Unicode string.
|
||||||
|
public boolean field_4_unicode_flag; // Flags whether the string is Unicode.
|
||||||
|
public String field_5_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
|
||||||
|
public int field_6_stream_id; // ID of the OLE stream containing the actual data.
|
||||||
|
|
||||||
|
public EmbeddedObjectRefSubRecord()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs an EmbeddedObjectRef record and sets its fields appropriately.
|
||||||
|
*
|
||||||
|
* @param in the record input stream.
|
||||||
|
*/
|
||||||
|
public EmbeddedObjectRefSubRecord(RecordInputStream in)
|
||||||
|
{
|
||||||
|
super(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks the sid matches the expected side for this record
|
||||||
|
*
|
||||||
|
* @param id the expected sid.
|
||||||
|
*/
|
||||||
|
protected void validateSid(short id)
|
||||||
|
{
|
||||||
|
if (id != sid)
|
||||||
|
{
|
||||||
|
throw new RecordFormatException("Not a EmbeddedObjectRef record");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public short getSid()
|
||||||
|
{
|
||||||
|
return sid;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fillFields(RecordInputStream in)
|
||||||
|
{
|
||||||
|
field_1_stream_id_offset = in.readShort();
|
||||||
|
field_2_unknown = in.readShortArray();
|
||||||
|
field_3_unicode_len = in.readShort();
|
||||||
|
field_4_unicode_flag = ( in.readByte() & 0x01 ) != 0;
|
||||||
|
|
||||||
|
if ( field_4_unicode_flag )
|
||||||
|
{
|
||||||
|
field_5_ole_classname = in.readUnicodeLEString( field_3_unicode_len );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
field_5_ole_classname = in.readCompressedUnicode( field_3_unicode_len );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Padded with NUL bytes. The -2 is because field_1_stream_id_offset
|
||||||
|
// is relative to after the offset field, whereas in.getRecordOffset()
|
||||||
|
// is relative to the start of this record.
|
||||||
|
while (in.getRecordOffset() - 2 < field_1_stream_id_offset)
|
||||||
|
{
|
||||||
|
in.readByte(); // discard
|
||||||
|
}
|
||||||
|
|
||||||
|
field_6_stream_id = in.readInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int serialize(int offset, byte[] data)
|
||||||
|
{
|
||||||
|
int pos = offset;
|
||||||
|
|
||||||
|
LittleEndian.putShort(data, pos, field_1_stream_id_offset); pos += 2;
|
||||||
|
LittleEndian.putShortArray(data, pos, field_2_unknown); pos += field_2_unknown.length * 2 + 2;
|
||||||
|
LittleEndian.putShort(data, pos, field_3_unicode_len); pos += 2;
|
||||||
|
data[pos] = field_4_unicode_flag ? (byte) 0x01 : (byte) 0x00; pos++;
|
||||||
|
|
||||||
|
if ( field_4_unicode_flag )
|
||||||
|
{
|
||||||
|
StringUtil.putUnicodeLE( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length() * 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
StringUtil.putCompressedUnicode( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Padded with NUL bytes.
|
||||||
|
pos = field_1_stream_id_offset;
|
||||||
|
|
||||||
|
LittleEndian.putInt(data, pos, field_6_stream_id); pos += 4;
|
||||||
|
|
||||||
|
return getRecordSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Size of record (exluding 4 byte header)
|
||||||
|
*/
|
||||||
|
public int getRecordSize()
|
||||||
|
{
|
||||||
|
// Conveniently this stores the length of all the crap before the final int value.
|
||||||
|
return field_1_stream_id_offset + 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the stream ID containing the actual data. The data itself
|
||||||
|
* can be found under a top-level directory entry in the OLE2 filesystem
|
||||||
|
* under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is
|
||||||
|
* this ID converted into hex (in big endian order, funnily enough.)
|
||||||
|
*
|
||||||
|
* @return the data stream ID.
|
||||||
|
*/
|
||||||
|
public int getStreamId()
|
||||||
|
{
|
||||||
|
return field_6_stream_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
StringBuffer buffer = new StringBuffer();
|
||||||
|
buffer.append("[ftPictFmla]\n");
|
||||||
|
buffer.append(" .streamIdOffset = ")
|
||||||
|
.append("0x").append(HexDump.toHex( field_1_stream_id_offset ))
|
||||||
|
.append(" (").append( field_1_stream_id_offset ).append(" )")
|
||||||
|
.append(System.getProperty("line.separator"));
|
||||||
|
buffer.append(" .unknown = ")
|
||||||
|
.append("0x").append(HexDump.toHex( field_2_unknown ))
|
||||||
|
.append(" (").append( field_2_unknown ).append(" )")
|
||||||
|
.append(System.getProperty("line.separator"));
|
||||||
|
buffer.append(" .unicodeLen = ")
|
||||||
|
.append("0x").append(HexDump.toHex( field_3_unicode_len ))
|
||||||
|
.append(" (").append( field_3_unicode_len ).append(" )")
|
||||||
|
.append(System.getProperty("line.separator"));
|
||||||
|
buffer.append(" .unicodeFlag = ")
|
||||||
|
.append("0x").append( field_4_unicode_flag ? 0x01 : 0x00 )
|
||||||
|
.append(" (").append( field_4_unicode_flag ).append(" )")
|
||||||
|
.append(System.getProperty("line.separator"));
|
||||||
|
buffer.append(" .oleClassname = ")
|
||||||
|
.append(field_5_ole_classname)
|
||||||
|
.append(System.getProperty("line.separator"));
|
||||||
|
buffer.append(" .streamId = ")
|
||||||
|
.append("0x").append(HexDump.toHex( field_6_stream_id ))
|
||||||
|
.append(" (").append( field_6_stream_id ).append(" )")
|
||||||
|
.append(System.getProperty("line.separator"));
|
||||||
|
buffer.append("[/ftPictFmla]");
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -58,6 +58,9 @@ abstract public class SubRecord
|
||||||
case CommonObjectDataSubRecord.sid:
|
case CommonObjectDataSubRecord.sid:
|
||||||
r = new CommonObjectDataSubRecord( in );
|
r = new CommonObjectDataSubRecord( in );
|
||||||
break;
|
break;
|
||||||
|
case EmbeddedObjectRefSubRecord.sid:
|
||||||
|
r = new EmbeddedObjectRefSubRecord( in );
|
||||||
|
break;
|
||||||
case GroupMarkerSubRecord.sid:
|
case GroupMarkerSubRecord.sid:
|
||||||
r = new GroupMarkerSubRecord( in );
|
r = new GroupMarkerSubRecord( in );
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Copyright 2002-2004 Apache Software Foundation
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.poi.hssf.usermodel;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord;
|
||||||
|
import org.apache.poi.hssf.record.ObjRecord;
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||||
|
import org.apache.poi.poifs.filesystem.Entry;
|
||||||
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
import org.apache.poi.util.HexDump;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc...
|
||||||
|
*
|
||||||
|
* @author Daniel Noll
|
||||||
|
*/
|
||||||
|
public class HSSFObjectData
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Underlying object record ultimately containing a reference to the object.
|
||||||
|
*/
|
||||||
|
private ObjRecord record;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reference to the filesystem, required for retrieving the object data.
|
||||||
|
*/
|
||||||
|
private POIFSFileSystem poifs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs object data by wrapping a lower level object record.
|
||||||
|
*
|
||||||
|
* @param record the low-level object record.
|
||||||
|
* @param poifs the filesystem, required for retrieving the object data.
|
||||||
|
*/
|
||||||
|
public HSSFObjectData(ObjRecord record, POIFSFileSystem poifs)
|
||||||
|
{
|
||||||
|
this.record = record;
|
||||||
|
this.poifs = poifs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the object data.
|
||||||
|
*
|
||||||
|
* @return the object data as an OLE2 directory.
|
||||||
|
* @throws IOException if there was an error reading the data.
|
||||||
|
*/
|
||||||
|
public DirectoryEntry getDirectory() throws IOException
|
||||||
|
{
|
||||||
|
Iterator subRecordIter = record.getSubRecords().iterator();
|
||||||
|
while (subRecordIter.hasNext())
|
||||||
|
{
|
||||||
|
Object subRecord = subRecordIter.next();
|
||||||
|
if (subRecord instanceof EmbeddedObjectRefSubRecord)
|
||||||
|
{
|
||||||
|
int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId();
|
||||||
|
String streamName = "MBD" + HexDump.toHex(streamId);
|
||||||
|
|
||||||
|
Entry entry = poifs.getRoot().getEntry(streamName);
|
||||||
|
if (entry instanceof DirectoryEntry)
|
||||||
|
{
|
||||||
|
return (DirectoryEntry) entry;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new IOException("Stream " + streamName + " was not an OLE2 directory");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new IllegalStateException("Object data does not contain a reference to an embedded object OLE2 directory");
|
||||||
|
}
|
||||||
|
}
|
|
@ -1332,6 +1332,7 @@ public class HSSFWorkbook
|
||||||
*/
|
*/
|
||||||
public List getAllPictures()
|
public List getAllPictures()
|
||||||
{
|
{
|
||||||
|
// The drawing group record always exists at the top level, so we won't need to do this recursively.
|
||||||
List pictures = new ArrayList();
|
List pictures = new ArrayList();
|
||||||
Iterator recordIter = workbook.getRecords().iterator();
|
Iterator recordIter = workbook.getRecords().iterator();
|
||||||
while (recordIter.hasNext())
|
while (recordIter.hasNext())
|
||||||
|
@ -1395,6 +1396,50 @@ public class HSSFWorkbook
|
||||||
this.workbook.unwriteProtectWorkbook();
|
this.workbook.unwriteProtectWorkbook();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all embedded OLE2 objects from the Workbook.
|
||||||
|
*
|
||||||
|
* @return the list of embedded objects (a list of {@link HSSFObjectData} objects.)
|
||||||
|
*/
|
||||||
|
public List getAllEmbeddedObjects()
|
||||||
|
{
|
||||||
|
List objects = new ArrayList();
|
||||||
|
for (int i = 0; i < getNumberOfSheets(); i++)
|
||||||
|
{
|
||||||
|
getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects);
|
||||||
|
}
|
||||||
|
return objects;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all embedded OLE2 objects from the Workbook.
|
||||||
|
*
|
||||||
|
* @param records the list of records to search.
|
||||||
|
* @param objects the list of embedded objects to populate.
|
||||||
|
*/
|
||||||
|
private void getAllEmbeddedObjects(List records, List objects)
|
||||||
|
{
|
||||||
|
Iterator recordIter = records.iterator();
|
||||||
|
while (recordIter.hasNext())
|
||||||
|
{
|
||||||
|
Object obj = recordIter.next();
|
||||||
|
if (obj instanceof ObjRecord)
|
||||||
|
{
|
||||||
|
// TODO: More convenient way of determining if there is stored binary.
|
||||||
|
// TODO: Link to the data stored in the other stream.
|
||||||
|
Iterator subRecordIter = ((ObjRecord) obj).getSubRecords().iterator();
|
||||||
|
while (subRecordIter.hasNext())
|
||||||
|
{
|
||||||
|
Object sub = subRecordIter.next();
|
||||||
|
if (sub instanceof EmbeddedObjectRefSubRecord)
|
||||||
|
{
|
||||||
|
objects.add(new HSSFObjectData((ObjRecord) obj, poifs));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private byte[] newUID()
|
private byte[] newUID()
|
||||||
{
|
{
|
||||||
byte[] bytes = new byte[16];
|
byte[] bytes = new byte[16];
|
||||||
|
|
|
@ -268,6 +268,25 @@ public class HexDump
|
||||||
return retVal.toString();
|
return retVal.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts the parameter to a hex value.
|
||||||
|
*
|
||||||
|
* @param value The value to convert
|
||||||
|
* @return A String representing the array of shorts
|
||||||
|
*/
|
||||||
|
public static String toHex(final short[] value)
|
||||||
|
{
|
||||||
|
StringBuffer retVal = new StringBuffer();
|
||||||
|
retVal.append('[');
|
||||||
|
for(int x = 0; x < value.length; x++)
|
||||||
|
{
|
||||||
|
retVal.append(toHex(value[x]));
|
||||||
|
retVal.append(", ");
|
||||||
|
}
|
||||||
|
retVal.append(']');
|
||||||
|
return retVal.toString();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Converts the parameter to a hex value breaking the results into
|
* <p>Converts the parameter to a hex value breaking the results into
|
||||||
* lines.</p>
|
* lines.</p>
|
||||||
|
|
|
@ -19,25 +19,43 @@ package org.apache.poi.hssf.usermodel;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
public class TestOLE2Embeding extends TestCase {
|
public class TestOLE2Embeding extends TestCase {
|
||||||
public void testEmbeding() throws Exception {
|
public void testEmbeding() throws Exception {
|
||||||
String dirname = System.getProperty("HSSF.testdata.path");
|
String dirname = System.getProperty("HSSF.testdata.path");
|
||||||
String filename = dirname + "/ole2-embedding.xls";
|
String filename = dirname + "/ole2-embedding.xls";
|
||||||
|
|
||||||
File file = new File(filename);
|
File file = new File(filename);
|
||||||
FileInputStream in = new FileInputStream(file);
|
FileInputStream in = new FileInputStream(file);
|
||||||
HSSFWorkbook workbook;
|
HSSFWorkbook workbook;
|
||||||
|
|
||||||
// This used to break, until bug #43116 was fixed
|
// This used to break, until bug #43116 was fixed
|
||||||
workbook = new HSSFWorkbook(in);
|
workbook = new HSSFWorkbook(in);
|
||||||
|
|
||||||
in.close();
|
in.close();
|
||||||
|
|
||||||
|
// Check we can get at the Escher layer still
|
||||||
|
workbook.getAllPictures();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmbeddedObjects() throws Exception {
|
||||||
|
String dirname = System.getProperty("HSSF.testdata.path");
|
||||||
|
String filename = dirname + "/ole2-embedding.xls";
|
||||||
|
|
||||||
|
File file = new File(filename);
|
||||||
|
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
|
||||||
|
List objects = workbook.getAllEmbeddedObjects();
|
||||||
|
assertEquals("Wrong number of objects", 2, objects.size());
|
||||||
|
assertEquals("Wrong name for first object", "MBD06CAB431",
|
||||||
|
((HSSFObjectData)
|
||||||
|
objects.get(0)).getDirectory().getName());
|
||||||
|
assertEquals("Wrong name for second object", "MBD06CAC85A",
|
||||||
|
((HSSFObjectData)
|
||||||
|
objects.get(1)).getDirectory().getName());
|
||||||
|
}
|
||||||
|
|
||||||
// Check we can get at the Escher layer still
|
|
||||||
workbook.getAllPictures();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue