mirror of https://github.com/apache/poi.git
Support for getting OLE objects from HSSFWorkbook. See bug 43222 for details.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@573878 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f9bda3915e
commit
0c24b8f16c
|
@ -0,0 +1,184 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
|
||||
|
||||
import org.apache.poi.util.*;
|
||||
|
||||
/**
|
||||
* A sub-record within the OBJ record which stores a reference to an object
|
||||
* stored in a separate entry within the OLE2 compound file.
|
||||
*
|
||||
* @author Daniel Noll
|
||||
*/
|
||||
public class EmbeddedObjectRefSubRecord
|
||||
extends SubRecord
|
||||
{
|
||||
public static final short sid = 0x9;
|
||||
|
||||
public short field_1_stream_id_offset; // Offset to stream ID from the point after this value.
|
||||
public short[] field_2_unknown; // Unknown stuff at the front. TODO: Confirm that it's a short[]
|
||||
// TODO: Consider making a utility class for these. I've discovered the same field ordering
|
||||
// in FormatRecord and StringRecord, it may be elsewhere too.
|
||||
public short field_3_unicode_len; // Length of Unicode string.
|
||||
public boolean field_4_unicode_flag; // Flags whether the string is Unicode.
|
||||
public String field_5_ole_classname; // Classname of the embedded OLE document (e.g. Word.Document.8)
|
||||
public int field_6_stream_id; // ID of the OLE stream containing the actual data.
|
||||
|
||||
public EmbeddedObjectRefSubRecord()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an EmbeddedObjectRef record and sets its fields appropriately.
|
||||
*
|
||||
* @param in the record input stream.
|
||||
*/
|
||||
public EmbeddedObjectRefSubRecord(RecordInputStream in)
|
||||
{
|
||||
super(in);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the sid matches the expected side for this record
|
||||
*
|
||||
* @param id the expected sid.
|
||||
*/
|
||||
protected void validateSid(short id)
|
||||
{
|
||||
if (id != sid)
|
||||
{
|
||||
throw new RecordFormatException("Not a EmbeddedObjectRef record");
|
||||
}
|
||||
}
|
||||
|
||||
public short getSid()
|
||||
{
|
||||
return sid;
|
||||
}
|
||||
|
||||
protected void fillFields(RecordInputStream in)
|
||||
{
|
||||
field_1_stream_id_offset = in.readShort();
|
||||
field_2_unknown = in.readShortArray();
|
||||
field_3_unicode_len = in.readShort();
|
||||
field_4_unicode_flag = ( in.readByte() & 0x01 ) != 0;
|
||||
|
||||
if ( field_4_unicode_flag )
|
||||
{
|
||||
field_5_ole_classname = in.readUnicodeLEString( field_3_unicode_len );
|
||||
}
|
||||
else
|
||||
{
|
||||
field_5_ole_classname = in.readCompressedUnicode( field_3_unicode_len );
|
||||
}
|
||||
|
||||
// Padded with NUL bytes. The -2 is because field_1_stream_id_offset
|
||||
// is relative to after the offset field, whereas in.getRecordOffset()
|
||||
// is relative to the start of this record.
|
||||
while (in.getRecordOffset() - 2 < field_1_stream_id_offset)
|
||||
{
|
||||
in.readByte(); // discard
|
||||
}
|
||||
|
||||
field_6_stream_id = in.readInt();
|
||||
}
|
||||
|
||||
public int serialize(int offset, byte[] data)
|
||||
{
|
||||
int pos = offset;
|
||||
|
||||
LittleEndian.putShort(data, pos, field_1_stream_id_offset); pos += 2;
|
||||
LittleEndian.putShortArray(data, pos, field_2_unknown); pos += field_2_unknown.length * 2 + 2;
|
||||
LittleEndian.putShort(data, pos, field_3_unicode_len); pos += 2;
|
||||
data[pos] = field_4_unicode_flag ? (byte) 0x01 : (byte) 0x00; pos++;
|
||||
|
||||
if ( field_4_unicode_flag )
|
||||
{
|
||||
StringUtil.putUnicodeLE( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length() * 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
StringUtil.putCompressedUnicode( field_5_ole_classname, data, pos ); pos += field_5_ole_classname.length();
|
||||
}
|
||||
|
||||
// Padded with NUL bytes.
|
||||
pos = field_1_stream_id_offset;
|
||||
|
||||
LittleEndian.putInt(data, pos, field_6_stream_id); pos += 4;
|
||||
|
||||
return getRecordSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Size of record (exluding 4 byte header)
|
||||
*/
|
||||
public int getRecordSize()
|
||||
{
|
||||
// Conveniently this stores the length of all the crap before the final int value.
|
||||
return field_1_stream_id_offset + 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stream ID containing the actual data. The data itself
|
||||
* can be found under a top-level directory entry in the OLE2 filesystem
|
||||
* under the name "MBD<var>xxxxxxxx</var>" where <var>xxxxxxxx</var> is
|
||||
* this ID converted into hex (in big endian order, funnily enough.)
|
||||
*
|
||||
* @return the data stream ID.
|
||||
*/
|
||||
public int getStreamId()
|
||||
{
|
||||
return field_6_stream_id;
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
buffer.append("[ftPictFmla]\n");
|
||||
buffer.append(" .streamIdOffset = ")
|
||||
.append("0x").append(HexDump.toHex( field_1_stream_id_offset ))
|
||||
.append(" (").append( field_1_stream_id_offset ).append(" )")
|
||||
.append(System.getProperty("line.separator"));
|
||||
buffer.append(" .unknown = ")
|
||||
.append("0x").append(HexDump.toHex( field_2_unknown ))
|
||||
.append(" (").append( field_2_unknown ).append(" )")
|
||||
.append(System.getProperty("line.separator"));
|
||||
buffer.append(" .unicodeLen = ")
|
||||
.append("0x").append(HexDump.toHex( field_3_unicode_len ))
|
||||
.append(" (").append( field_3_unicode_len ).append(" )")
|
||||
.append(System.getProperty("line.separator"));
|
||||
buffer.append(" .unicodeFlag = ")
|
||||
.append("0x").append( field_4_unicode_flag ? 0x01 : 0x00 )
|
||||
.append(" (").append( field_4_unicode_flag ).append(" )")
|
||||
.append(System.getProperty("line.separator"));
|
||||
buffer.append(" .oleClassname = ")
|
||||
.append(field_5_ole_classname)
|
||||
.append(System.getProperty("line.separator"));
|
||||
buffer.append(" .streamId = ")
|
||||
.append("0x").append(HexDump.toHex( field_6_stream_id ))
|
||||
.append(" (").append( field_6_stream_id ).append(" )")
|
||||
.append(System.getProperty("line.separator"));
|
||||
buffer.append("[/ftPictFmla]");
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -58,6 +58,9 @@ abstract public class SubRecord
|
|||
case CommonObjectDataSubRecord.sid:
|
||||
r = new CommonObjectDataSubRecord( in );
|
||||
break;
|
||||
case EmbeddedObjectRefSubRecord.sid:
|
||||
r = new EmbeddedObjectRefSubRecord( in );
|
||||
break;
|
||||
case GroupMarkerSubRecord.sid:
|
||||
r = new GroupMarkerSubRecord( in );
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/* ====================================================================
|
||||
Copyright 2002-2004 Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.hssf.usermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord;
|
||||
import org.apache.poi.hssf.record.ObjRecord;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.HexDump;
|
||||
|
||||
/**
|
||||
* Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc...
|
||||
*
|
||||
* @author Daniel Noll
|
||||
*/
|
||||
public class HSSFObjectData
|
||||
{
|
||||
/**
|
||||
* Underlying object record ultimately containing a reference to the object.
|
||||
*/
|
||||
private ObjRecord record;
|
||||
|
||||
/**
|
||||
* Reference to the filesystem, required for retrieving the object data.
|
||||
*/
|
||||
private POIFSFileSystem poifs;
|
||||
|
||||
/**
|
||||
* Constructs object data by wrapping a lower level object record.
|
||||
*
|
||||
* @param record the low-level object record.
|
||||
* @param poifs the filesystem, required for retrieving the object data.
|
||||
*/
|
||||
public HSSFObjectData(ObjRecord record, POIFSFileSystem poifs)
|
||||
{
|
||||
this.record = record;
|
||||
this.poifs = poifs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the object data.
|
||||
*
|
||||
* @return the object data as an OLE2 directory.
|
||||
* @throws IOException if there was an error reading the data.
|
||||
*/
|
||||
public DirectoryEntry getDirectory() throws IOException
|
||||
{
|
||||
Iterator subRecordIter = record.getSubRecords().iterator();
|
||||
while (subRecordIter.hasNext())
|
||||
{
|
||||
Object subRecord = subRecordIter.next();
|
||||
if (subRecord instanceof EmbeddedObjectRefSubRecord)
|
||||
{
|
||||
int streamId = ((EmbeddedObjectRefSubRecord) subRecord).getStreamId();
|
||||
String streamName = "MBD" + HexDump.toHex(streamId);
|
||||
|
||||
Entry entry = poifs.getRoot().getEntry(streamName);
|
||||
if (entry instanceof DirectoryEntry)
|
||||
{
|
||||
return (DirectoryEntry) entry;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IOException("Stream " + streamName + " was not an OLE2 directory");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new IllegalStateException("Object data does not contain a reference to an embedded object OLE2 directory");
|
||||
}
|
||||
}
|
|
@ -1332,6 +1332,7 @@ public class HSSFWorkbook
|
|||
*/
|
||||
public List getAllPictures()
|
||||
{
|
||||
// The drawing group record always exists at the top level, so we won't need to do this recursively.
|
||||
List pictures = new ArrayList();
|
||||
Iterator recordIter = workbook.getRecords().iterator();
|
||||
while (recordIter.hasNext())
|
||||
|
@ -1395,6 +1396,50 @@ public class HSSFWorkbook
|
|||
this.workbook.unwriteProtectWorkbook();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all embedded OLE2 objects from the Workbook.
|
||||
*
|
||||
* @return the list of embedded objects (a list of {@link HSSFObjectData} objects.)
|
||||
*/
|
||||
public List getAllEmbeddedObjects()
|
||||
{
|
||||
List objects = new ArrayList();
|
||||
for (int i = 0; i < getNumberOfSheets(); i++)
|
||||
{
|
||||
getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects);
|
||||
}
|
||||
return objects;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all embedded OLE2 objects from the Workbook.
|
||||
*
|
||||
* @param records the list of records to search.
|
||||
* @param objects the list of embedded objects to populate.
|
||||
*/
|
||||
private void getAllEmbeddedObjects(List records, List objects)
|
||||
{
|
||||
Iterator recordIter = records.iterator();
|
||||
while (recordIter.hasNext())
|
||||
{
|
||||
Object obj = recordIter.next();
|
||||
if (obj instanceof ObjRecord)
|
||||
{
|
||||
// TODO: More convenient way of determining if there is stored binary.
|
||||
// TODO: Link to the data stored in the other stream.
|
||||
Iterator subRecordIter = ((ObjRecord) obj).getSubRecords().iterator();
|
||||
while (subRecordIter.hasNext())
|
||||
{
|
||||
Object sub = subRecordIter.next();
|
||||
if (sub instanceof EmbeddedObjectRefSubRecord)
|
||||
{
|
||||
objects.add(new HSSFObjectData((ObjRecord) obj, poifs));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] newUID()
|
||||
{
|
||||
byte[] bytes = new byte[16];
|
||||
|
|
|
@ -268,6 +268,25 @@ public class HexDump
|
|||
return retVal.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the parameter to a hex value.
|
||||
*
|
||||
* @param value The value to convert
|
||||
* @return A String representing the array of shorts
|
||||
*/
|
||||
public static String toHex(final short[] value)
|
||||
{
|
||||
StringBuffer retVal = new StringBuffer();
|
||||
retVal.append('[');
|
||||
for(int x = 0; x < value.length; x++)
|
||||
{
|
||||
retVal.append(toHex(value[x]));
|
||||
retVal.append(", ");
|
||||
}
|
||||
retVal.append(']');
|
||||
return retVal.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Converts the parameter to a hex value breaking the results into
|
||||
* lines.</p>
|
||||
|
|
|
@ -19,25 +19,43 @@ package org.apache.poi.hssf.usermodel;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public class TestOLE2Embeding extends TestCase {
|
||||
public void testEmbeding() throws Exception {
|
||||
String dirname = System.getProperty("HSSF.testdata.path");
|
||||
String filename = dirname + "/ole2-embedding.xls";
|
||||
public void testEmbeding() throws Exception {
|
||||
String dirname = System.getProperty("HSSF.testdata.path");
|
||||
String filename = dirname + "/ole2-embedding.xls";
|
||||
|
||||
File file = new File(filename);
|
||||
FileInputStream in = new FileInputStream(file);
|
||||
HSSFWorkbook workbook;
|
||||
File file = new File(filename);
|
||||
FileInputStream in = new FileInputStream(file);
|
||||
HSSFWorkbook workbook;
|
||||
|
||||
// This used to break, until bug #43116 was fixed
|
||||
workbook = new HSSFWorkbook(in);
|
||||
// This used to break, until bug #43116 was fixed
|
||||
workbook = new HSSFWorkbook(in);
|
||||
|
||||
in.close();
|
||||
in.close();
|
||||
|
||||
// Check we can get at the Escher layer still
|
||||
workbook.getAllPictures();
|
||||
}
|
||||
|
||||
public void testEmbeddedObjects() throws Exception {
|
||||
String dirname = System.getProperty("HSSF.testdata.path");
|
||||
String filename = dirname + "/ole2-embedding.xls";
|
||||
|
||||
File file = new File(filename);
|
||||
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(file));
|
||||
List objects = workbook.getAllEmbeddedObjects();
|
||||
assertEquals("Wrong number of objects", 2, objects.size());
|
||||
assertEquals("Wrong name for first object", "MBD06CAB431",
|
||||
((HSSFObjectData)
|
||||
objects.get(0)).getDirectory().getName());
|
||||
assertEquals("Wrong name for second object", "MBD06CAC85A",
|
||||
((HSSFObjectData)
|
||||
objects.get(1)).getDirectory().getName());
|
||||
}
|
||||
|
||||
// Check we can get at the Escher layer still
|
||||
workbook.getAllPictures();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue