mirror of https://github.com/apache/poi.git
Bug 51891 - Fix StringIndexOutOfBoundsException : Ole10Native.<init> (parsing word file)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1563483 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bc936983a5
commit
e484fd75d0
|
@ -40,6 +40,7 @@ public class ClassID
|
|||
public static final ClassID WORD95 = new ClassID("{00020900-0000-0000-C000-000000000046}");
|
||||
public static final ClassID POWERPOINT97 = new ClassID("{64818D10-4F9B-11CF-86EA-00AA00B929E8}");
|
||||
public static final ClassID POWERPOINT95 = new ClassID("{EA7BAE70-FB3B-11CD-A903-00AA00510EA3}");
|
||||
public static final ClassID EQUATION30 = new ClassID("{0002CE02-0000-0000-C000-000000000046}");
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,13 +18,12 @@
|
|||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import org.apache.poi.util.HexDump;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianConsts;
|
||||
import org.apache.poi.util.LittleEndianOutputStream;
|
||||
import org.apache.poi.util.StringUtil;
|
||||
|
||||
/**
|
||||
|
@ -49,6 +48,28 @@ public class Ole10Native {
|
|||
private byte[] dataBuffer; // varying size, the actual native data
|
||||
private short flags3 = 0; // some final flags? or zero terminators?, sometimes not there
|
||||
|
||||
/**
|
||||
* the field encoding mode - merely a try-and-error guess ...
|
||||
**/
|
||||
private enum EncodingMode {
|
||||
/**
|
||||
* the data is stored in parsed format - including label, command, etc.
|
||||
*/
|
||||
parsed,
|
||||
/**
|
||||
* the data is stored raw after the length field
|
||||
*/
|
||||
unparsed,
|
||||
/**
|
||||
* the data is stored raw after the length field and the flags1 field
|
||||
*/
|
||||
compact;
|
||||
}
|
||||
|
||||
private EncodingMode mode;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Creates an instance of this class from an embedded OLE Object. The OLE Object is expected
|
||||
* to include a stream "{01}Ole10Native" which contains the actual
|
||||
|
@ -74,21 +95,12 @@ public class Ole10Native {
|
|||
* @throws Ole10NativeException on invalid or unexcepted data format
|
||||
*/
|
||||
public static Ole10Native createFromEmbeddedOleObject(DirectoryNode directory) throws IOException, Ole10NativeException {
|
||||
boolean plain = false;
|
||||
|
||||
try {
|
||||
directory.getEntry("\u0001Ole10ItemName");
|
||||
plain = true;
|
||||
} catch (FileNotFoundException ex) {
|
||||
plain = false;
|
||||
}
|
||||
|
||||
DocumentEntry nativeEntry =
|
||||
(DocumentEntry)directory.getEntry(OLE10_NATIVE);
|
||||
byte[] data = new byte[nativeEntry.getSize()];
|
||||
directory.createDocumentInputStream(nativeEntry).read(data);
|
||||
|
||||
return new Ole10Native(data, 0, plain);
|
||||
return new Ole10Native(data, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -99,6 +111,21 @@ public class Ole10Native {
|
|||
setFileName(filename);
|
||||
setCommand(command);
|
||||
setDataBuffer(data);
|
||||
mode = EncodingMode.parsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance and fills the fields based on the data in the given buffer.
|
||||
*
|
||||
* @param data The buffer containing the Ole10Native record
|
||||
* @param offset The start offset of the record in the buffer
|
||||
* @param plain as of POI 3.11 this parameter is ignored
|
||||
* @throws Ole10NativeException on invalid or unexcepted data format
|
||||
*
|
||||
* @deprecated parameter plain is ignored, use {@link #Ole10Native(byte[],int)}
|
||||
*/
|
||||
public Ole10Native(byte[] data, int offset, boolean plain) throws Ole10NativeException {
|
||||
this(data, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -109,38 +136,32 @@ public class Ole10Native {
|
|||
* @throws Ole10NativeException on invalid or unexcepted data format
|
||||
*/
|
||||
public Ole10Native(byte[] data, int offset) throws Ole10NativeException {
|
||||
this(data, offset, false);
|
||||
}
|
||||
/**
|
||||
* Creates an instance and fills the fields based on the data in the given buffer.
|
||||
*
|
||||
* @param data The buffer containing the Ole10Native record
|
||||
* @param offset The start offset of the record in the buffer
|
||||
* @param plain Specified 'plain' format without filename
|
||||
* @throws Ole10NativeException on invalid or unexcepted data format
|
||||
*/
|
||||
public Ole10Native(byte[] data, int offset, boolean plain) throws Ole10NativeException {
|
||||
int ofs = offset; // current offset, initialized to start
|
||||
|
||||
if (data.length<offset+2) {
|
||||
if (data.length < offset + 2) {
|
||||
throw new Ole10NativeException("data is too small");
|
||||
}
|
||||
|
||||
totalSize = LittleEndian.getInt(data, ofs);
|
||||
ofs += LittleEndianConsts.INT_SIZE;
|
||||
|
||||
if (plain) {
|
||||
dataBuffer = new byte[totalSize-4];
|
||||
System.arraycopy(data, 4, dataBuffer, 0, dataBuffer.length);
|
||||
// int dataSize = totalSize - 4;
|
||||
|
||||
byte[] oleLabel = new byte[8];
|
||||
System.arraycopy(dataBuffer, 0, oleLabel, 0, Math.min(dataBuffer.length, 8));
|
||||
label = "ole-"+ HexDump.toHex(oleLabel);
|
||||
fileName = label;
|
||||
command = label;
|
||||
mode = EncodingMode.unparsed;
|
||||
if (LittleEndian.getShort(data, ofs) == 2) {
|
||||
// some files like equations don't have a valid filename,
|
||||
// but somehow encode the formula right away in the ole10 header
|
||||
if (Character.isISOControl(data[ofs+LittleEndianConsts.SHORT_SIZE])) {
|
||||
mode = EncodingMode.compact;
|
||||
} else {
|
||||
mode = EncodingMode.parsed;
|
||||
}
|
||||
}
|
||||
|
||||
int dataSize;
|
||||
switch (mode) {
|
||||
case parsed: {
|
||||
flags1 = LittleEndian.getShort(data, ofs);
|
||||
|
||||
// structured format
|
||||
ofs += LittleEndianConsts.SHORT_SIZE;
|
||||
|
||||
int len = getStringLength(data, ofs);
|
||||
|
@ -159,7 +180,6 @@ public class Ole10Native {
|
|||
|
||||
len = LittleEndian.getInt(data, ofs);
|
||||
ofs += LittleEndianConsts.INT_SIZE;
|
||||
|
||||
command = StringUtil.getFromCompressedUnicode(data, ofs, len - 1);
|
||||
ofs += len;
|
||||
|
||||
|
@ -167,25 +187,36 @@ public class Ole10Native {
|
|||
throw new Ole10NativeException("Invalid Ole10Native");
|
||||
}
|
||||
|
||||
int dataSize = LittleEndian.getInt(data, ofs);
|
||||
dataSize = LittleEndian.getInt(data, ofs);
|
||||
ofs += LittleEndianConsts.INT_SIZE;
|
||||
|
||||
if (dataSize < 0 || totalSize - (ofs - LittleEndianConsts.INT_SIZE) < dataSize) {
|
||||
throw new Ole10NativeException("Invalid Ole10Native");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case compact:
|
||||
flags1 = LittleEndian.getShort(data, ofs);
|
||||
ofs += LittleEndianConsts.SHORT_SIZE;
|
||||
dataSize = totalSize - LittleEndianConsts.SHORT_SIZE;
|
||||
break;
|
||||
default:
|
||||
case unparsed:
|
||||
dataSize = totalSize;
|
||||
break;
|
||||
}
|
||||
|
||||
dataBuffer = new byte[dataSize];
|
||||
System.arraycopy(data, ofs, dataBuffer, 0, dataSize);
|
||||
ofs += dataSize;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper - determine length of zero terminated string (ASCIIZ).
|
||||
*/
|
||||
private static int getStringLength(byte[] data, int ofs) {
|
||||
int len = 0;
|
||||
while (len+ofs<data.length && data[ofs + len] != 0) {
|
||||
while (len + ofs < data.length && data[ofs + len] != 0) {
|
||||
len++;
|
||||
}
|
||||
len++;
|
||||
|
@ -193,8 +224,8 @@ public class Ole10Native {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the value of the totalSize field - the total length of the structure
|
||||
* is totalSize + 4 (value of this field + size of this field).
|
||||
* Returns the value of the totalSize field - the total length of the
|
||||
* structure is totalSize + 4 (value of this field + size of this field).
|
||||
*
|
||||
* @return the totalSize
|
||||
*/
|
||||
|
@ -212,8 +243,9 @@ public class Ole10Native {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the label field - usually the name of the file (without directory) but
|
||||
* probably may be any name specified during packaging/embedding the data.
|
||||
* Returns the label field - usually the name of the file (without
|
||||
* directory) but probably may be any name specified during
|
||||
* packaging/embedding the data.
|
||||
*
|
||||
* @return the label
|
||||
*/
|
||||
|
@ -251,7 +283,8 @@ public class Ole10Native {
|
|||
|
||||
/**
|
||||
* Returns the command field - usually the name of the file being embedded
|
||||
* including the full path, may be a command specified during embedding the file.
|
||||
* including the full path, may be a command specified during embedding the
|
||||
* file.
|
||||
*
|
||||
* @return the command
|
||||
*/
|
||||
|
@ -260,9 +293,9 @@ public class Ole10Native {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the size of the embedded file. If the size is 0 (zero), no data has been
|
||||
* embedded. To be sure, that no data has been embedded, check whether
|
||||
* {@link #getDataBuffer()} returns <code>null</code>.
|
||||
* Returns the size of the embedded file. If the size is 0 (zero), no data
|
||||
* has been embedded. To be sure, that no data has been embedded, check
|
||||
* whether {@link #getDataBuffer()} returns <code>null</code>.
|
||||
*
|
||||
* @return the dataSize
|
||||
*/
|
||||
|
@ -271,10 +304,11 @@ public class Ole10Native {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the buffer containing the embedded file's data, or <code>null</code>
|
||||
* if no data was embedded. Note that an embedding may provide information about
|
||||
* the data, but the actual data is not included. (So label, filename etc. are
|
||||
* available, but this method returns <code>null</code>.)
|
||||
* Returns the buffer containing the embedded file's data, or
|
||||
* <code>null</code> if no data was embedded. Note that an embedding may
|
||||
* provide information about the data, but the actual data is not included.
|
||||
* (So label, filename etc. are available, but this method returns
|
||||
* <code>null</code>.)
|
||||
*
|
||||
* @return the dataBuffer
|
||||
*/
|
||||
|
@ -298,47 +332,49 @@ public class Ole10Native {
|
|||
* children, then chuck on their header and return)
|
||||
*/
|
||||
public void writeOut(OutputStream out) throws IOException {
|
||||
byte intbuf[] = new byte[LittleEndianConsts.INT_SIZE];
|
||||
byte shortbuf[] = new byte[LittleEndianConsts.SHORT_SIZE];
|
||||
// byte intbuf[] = new byte[LittleEndianConsts.INT_SIZE];
|
||||
// byte shortbuf[] = new byte[LittleEndianConsts.SHORT_SIZE];
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
LittleEndianOutputStream leosOut = new LittleEndianOutputStream(out);
|
||||
|
||||
switch (mode) {
|
||||
case parsed: {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
bos.write(intbuf); // total size, will be determined later ..
|
||||
LittleEndianOutputStream leos = new LittleEndianOutputStream(bos);
|
||||
// total size, will be determined later ..
|
||||
|
||||
LittleEndian.putShort(shortbuf, 0, getFlags1());
|
||||
bos.write(shortbuf);
|
||||
leos.writeShort(getFlags1());
|
||||
leos.write(getLabel().getBytes(ISO1));
|
||||
leos.write(0);
|
||||
leos.write(getFileName().getBytes(ISO1));
|
||||
leos.write(0);
|
||||
leos.writeShort(getFlags2());
|
||||
leos.writeShort(getUnknown1());
|
||||
leos.writeInt(getCommand().length() + 1);
|
||||
leos.write(getCommand().getBytes(ISO1));
|
||||
leos.write(0);
|
||||
leos.writeInt(getDataSize());
|
||||
leos.write(getDataBuffer());
|
||||
leos.writeShort(getFlags3());
|
||||
leos.close(); // satisfy compiler ...
|
||||
|
||||
bos.write(getLabel().getBytes(ISO1));
|
||||
bos.write(0);
|
||||
leosOut.writeInt(bos.size()); // total size
|
||||
bos.writeTo(out);
|
||||
break;
|
||||
}
|
||||
case compact:
|
||||
leosOut.writeInt(getDataSize()+LittleEndianConsts.SHORT_SIZE);
|
||||
leosOut.writeShort(getFlags1());
|
||||
out.write(getDataBuffer());
|
||||
break;
|
||||
default:
|
||||
case unparsed:
|
||||
leosOut.writeInt(getDataSize());
|
||||
out.write(getDataBuffer());
|
||||
break;
|
||||
}
|
||||
|
||||
bos.write(getFileName().getBytes(ISO1));
|
||||
bos.write(0);
|
||||
|
||||
LittleEndian.putShort(shortbuf, 0, getFlags2());
|
||||
bos.write(shortbuf);
|
||||
|
||||
LittleEndian.putShort(shortbuf, 0, getUnknown1());
|
||||
bos.write(shortbuf);
|
||||
|
||||
LittleEndian.putInt(intbuf, 0, getCommand().length()+1);
|
||||
bos.write(intbuf);
|
||||
|
||||
bos.write(getCommand().getBytes(ISO1));
|
||||
bos.write(0);
|
||||
|
||||
LittleEndian.putInt(intbuf, 0, getDataBuffer().length);
|
||||
bos.write(intbuf);
|
||||
|
||||
bos.write(getDataBuffer());
|
||||
|
||||
LittleEndian.putShort(shortbuf, 0, getFlags3());
|
||||
bos.write(shortbuf);
|
||||
|
||||
// update total size - length of length-field (4 bytes)
|
||||
byte data[] = bos.toByteArray();
|
||||
totalSize = data.length - LittleEndianConsts.INT_SIZE;
|
||||
LittleEndian.putInt(data, 0, totalSize);
|
||||
|
||||
out.write(data);
|
||||
}
|
||||
|
||||
public void setFlags1(short flags1) {
|
||||
|
|
|
@ -17,27 +17,24 @@
|
|||
|
||||
package org.apache.poi.poifs;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestSuite;
|
||||
|
||||
import org.apache.poi.poifs.eventfilesystem.TestPOIFSReaderRegistry;
|
||||
import org.apache.poi.poifs.filesystem.AllPOIFSFileSystemTests;
|
||||
import org.apache.poi.poifs.nio.TestDataSource;
|
||||
import org.apache.poi.poifs.property.AllPOIFSPropertyTests;
|
||||
import org.apache.poi.poifs.storage.AllPOIFSStorageTests;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Suite;
|
||||
|
||||
/**
|
||||
* Test suite for all sub-packages of org.apache.poi.poifs
|
||||
*
|
||||
* @author Josh Micich
|
||||
*/
|
||||
@RunWith(Suite.class)
|
||||
@Suite.SuiteClasses({
|
||||
TestPOIFSReaderRegistry.class
|
||||
, TestDataSource.class
|
||||
, AllPOIFSFileSystemTests.class
|
||||
, AllPOIFSPropertyTests.class
|
||||
, AllPOIFSStorageTests.class
|
||||
})
|
||||
public final class AllPOIFSTests {
|
||||
public static Test suite() {
|
||||
TestSuite result = new TestSuite("Tests for org.apache.poi.poifs");
|
||||
result.addTestSuite(TestPOIFSReaderRegistry.class);
|
||||
result.addTestSuite(TestDataSource.class);
|
||||
result.addTest(AllPOIFSFileSystemTests.suite());
|
||||
result.addTest(AllPOIFSPropertyTests.suite());
|
||||
result.addTest(AllPOIFSStorageTests.suite());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,31 +17,27 @@
|
|||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestSuite;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Suite;
|
||||
|
||||
/**
|
||||
* Tests for org.apache.poi.poifs.filesystem<br/>
|
||||
*
|
||||
* @author Josh Micich
|
||||
*/
|
||||
@RunWith(Suite.class)
|
||||
@Suite.SuiteClasses({
|
||||
TestDirectoryNode.class
|
||||
, TestDocument.class
|
||||
, TestDocumentDescriptor.class
|
||||
, TestDocumentInputStream.class
|
||||
, TestDocumentNode.class
|
||||
, TestDocumentOutputStream.class
|
||||
, TestEmptyDocument.class
|
||||
, TestOffice2007XMLException.class
|
||||
, TestPOIFSDocumentPath.class
|
||||
, TestPOIFSFileSystem.class
|
||||
, TestNPOIFSFileSystem.class
|
||||
, TestPropertySorter.class
|
||||
, TestOle10Native.class
|
||||
})
|
||||
public final class AllPOIFSFileSystemTests {
|
||||
|
||||
public static Test suite() {
|
||||
TestSuite result = new TestSuite("Tests for org.apache.poi.poifs.filesystem");
|
||||
result.addTestSuite(TestDirectoryNode.class);
|
||||
result.addTestSuite(TestDocument.class);
|
||||
result.addTestSuite(TestDocumentDescriptor.class);
|
||||
result.addTestSuite(TestDocumentInputStream.class);
|
||||
result.addTestSuite(TestDocumentNode.class);
|
||||
result.addTestSuite(TestDocumentOutputStream.class);
|
||||
result.addTestSuite(TestEmptyDocument.class);
|
||||
result.addTestSuite(TestOffice2007XMLException.class);
|
||||
result.addTestSuite(TestPOIFSDocumentPath.class);
|
||||
result.addTestSuite(TestPOIFSFileSystem.class);
|
||||
result.addTestSuite(TestNPOIFSFileSystem.class);
|
||||
result.addTestSuite(TestPropertySorter.class);
|
||||
result.addTestSuite(TestOle10Native.class);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,14 +17,26 @@
|
|||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import static org.hamcrest.core.IsEqual.equalTo;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
public class TestOle10Native extends TestCase {
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestOle10Native {
|
||||
private static final POIDataSamples dataSamples = POIDataSamples.getPOIFSInstance();
|
||||
|
||||
@Test
|
||||
public void testOleNative() throws IOException, Ole10NativeException {
|
||||
POIFSFileSystem fs = new POIFSFileSystem(dataSamples.openResourceAsStream("oleObject1.bin"));
|
||||
|
||||
|
@ -33,4 +45,66 @@ public class TestOle10Native extends TestCase {
|
|||
assertEquals("File1.svg", ole.getLabel());
|
||||
assertEquals("D:\\Documents and Settings\\rsc\\My Documents\\file1.svg", ole.getCommand());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFiles() throws IOException, Ole10NativeException {
|
||||
File files[] = {
|
||||
// bug 51891
|
||||
POIDataSamples.getPOIFSInstance().getFile("multimedia.doc"),
|
||||
// tika bug 1072
|
||||
POIDataSamples.getPOIFSInstance().getFile("20-Force-on-a-current-S00.doc"),
|
||||
// other files containing ole10native records ...
|
||||
POIDataSamples.getDocumentInstance().getFile("Bug53380_3.doc"),
|
||||
POIDataSamples.getDocumentInstance().getFile("Bug47731.doc")
|
||||
};
|
||||
|
||||
for (File f : files) {
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(f, true);
|
||||
List<Entry> entries = new ArrayList<Entry>();
|
||||
findOle10(entries, fs.getRoot(), "/", "");
|
||||
|
||||
for (Entry e : entries) {
|
||||
ByteArrayOutputStream bosExp = new ByteArrayOutputStream();
|
||||
InputStream is = ((DirectoryNode)e.getParent()).createDocumentInputStream(e);
|
||||
IOUtils.copy(is,bosExp);
|
||||
is.close();
|
||||
|
||||
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode)e.getParent());
|
||||
|
||||
ByteArrayOutputStream bosAct = new ByteArrayOutputStream();
|
||||
ole.writeOut(bosAct);
|
||||
|
||||
assertThat(bosExp.toByteArray(), equalTo(bosAct.toByteArray()));
|
||||
}
|
||||
|
||||
fs.close();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void searchOle10Files() throws Exception {
|
||||
File dir = new File("test-data/document");
|
||||
for (File file : dir.listFiles(new FileFilter(){
|
||||
public boolean accept(File pathname) {
|
||||
return pathname.getName().endsWith("doc");
|
||||
}
|
||||
})) {
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(file, true);
|
||||
findOle10(null, fs.getRoot(), "/", file.getName());
|
||||
fs.close();
|
||||
}
|
||||
}*/
|
||||
|
||||
void findOle10(List<Entry> entries, DirectoryNode dn, String path, String filename) {
|
||||
Iterator<Entry> iter = dn.getEntries();
|
||||
while (iter.hasNext()) {
|
||||
Entry e = iter.next();
|
||||
if (Ole10Native.OLE10_NATIVE.equals(e.getName())) {
|
||||
if (entries != null) entries.add(e);
|
||||
// System.out.println(filename+" : "+path);
|
||||
} else if (e.isDirectoryEntry()) {
|
||||
findOle10(entries, (DirectoryNode)e, path+e.getName()+"/", filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue