Bug 51891 - Fix StringIndexOutOfBoundsException : Ole10Native.<init> (parsing word file)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1563483 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2014-02-01 21:45:48 +00:00
parent bc936983a5
commit e484fd75d0
7 changed files with 475 additions and 371 deletions

View File

@ -40,6 +40,7 @@ public class ClassID
public static final ClassID WORD95 = new ClassID("{00020900-0000-0000-C000-000000000046}");
public static final ClassID POWERPOINT97 = new ClassID("{64818D10-4F9B-11CF-86EA-00AA00B929E8}");
public static final ClassID POWERPOINT95 = new ClassID("{EA7BAE70-FB3B-11CD-A903-00AA00510EA3}");
public static final ClassID EQUATION30 = new ClassID("{0002CE02-0000-0000-C000-000000000046}");
/**

View File

@ -18,13 +18,12 @@
package org.apache.poi.poifs.filesystem;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.LittleEndianOutputStream;
import org.apache.poi.util.StringUtil;
/**
@ -49,6 +48,28 @@ public class Ole10Native {
private byte[] dataBuffer; // varying size, the actual native data
private short flags3 = 0; // some final flags? or zero terminators?, sometimes not there
/**
* the field encoding mode - merely a try-and-error guess ...
**/
private enum EncodingMode {
/**
* the data is stored in parsed format - including label, command, etc.
*/
parsed,
/**
* the data is stored raw after the length field
*/
unparsed,
/**
* the data is stored raw after the length field and the flags1 field
*/
compact;
}
private EncodingMode mode;
/**
* Creates an instance of this class from an embedded OLE Object. The OLE Object is expected
* to include a stream &quot;{01}Ole10Native&quot; which contains the actual
@ -74,21 +95,12 @@ public class Ole10Native {
* @throws Ole10NativeException on invalid or unexcepted data format
*/
public static Ole10Native createFromEmbeddedOleObject(DirectoryNode directory) throws IOException, Ole10NativeException {
boolean plain = false;
try {
directory.getEntry("\u0001Ole10ItemName");
plain = true;
} catch (FileNotFoundException ex) {
plain = false;
}
DocumentEntry nativeEntry =
(DocumentEntry)directory.getEntry(OLE10_NATIVE);
byte[] data = new byte[nativeEntry.getSize()];
directory.createDocumentInputStream(nativeEntry).read(data);
return new Ole10Native(data, 0, plain);
return new Ole10Native(data, 0);
}
/**
@ -99,6 +111,21 @@ public class Ole10Native {
setFileName(filename);
setCommand(command);
setDataBuffer(data);
mode = EncodingMode.parsed;
}
/**
* Creates an instance and fills the fields based on the data in the given buffer.
*
* @param data The buffer containing the Ole10Native record
* @param offset The start offset of the record in the buffer
* @param plain as of POI 3.11 this parameter is ignored
* @throws Ole10NativeException on invalid or unexcepted data format
*
* @deprecated parameter plain is ignored, use {@link #Ole10Native(byte[],int)}
*/
public Ole10Native(byte[] data, int offset, boolean plain) throws Ole10NativeException {
this(data, offset);
}
/**
@ -109,38 +136,32 @@ public class Ole10Native {
* @throws Ole10NativeException on invalid or unexcepted data format
*/
public Ole10Native(byte[] data, int offset) throws Ole10NativeException {
this(data, offset, false);
}
/**
* Creates an instance and fills the fields based on the data in the given buffer.
*
* @param data The buffer containing the Ole10Native record
* @param offset The start offset of the record in the buffer
* @param plain Specified 'plain' format without filename
* @throws Ole10NativeException on invalid or unexcepted data format
*/
public Ole10Native(byte[] data, int offset, boolean plain) throws Ole10NativeException {
int ofs = offset; // current offset, initialized to start
if (data.length<offset+2) {
if (data.length < offset + 2) {
throw new Ole10NativeException("data is too small");
}
totalSize = LittleEndian.getInt(data, ofs);
ofs += LittleEndianConsts.INT_SIZE;
if (plain) {
dataBuffer = new byte[totalSize-4];
System.arraycopy(data, 4, dataBuffer, 0, dataBuffer.length);
// int dataSize = totalSize - 4;
byte[] oleLabel = new byte[8];
System.arraycopy(dataBuffer, 0, oleLabel, 0, Math.min(dataBuffer.length, 8));
label = "ole-"+ HexDump.toHex(oleLabel);
fileName = label;
command = label;
mode = EncodingMode.unparsed;
if (LittleEndian.getShort(data, ofs) == 2) {
// some files like equations don't have a valid filename,
// but somehow encode the formula right away in the ole10 header
if (Character.isISOControl(data[ofs+LittleEndianConsts.SHORT_SIZE])) {
mode = EncodingMode.compact;
} else {
mode = EncodingMode.parsed;
}
}
int dataSize;
switch (mode) {
case parsed: {
flags1 = LittleEndian.getShort(data, ofs);
// structured format
ofs += LittleEndianConsts.SHORT_SIZE;
int len = getStringLength(data, ofs);
@ -159,7 +180,6 @@ public class Ole10Native {
len = LittleEndian.getInt(data, ofs);
ofs += LittleEndianConsts.INT_SIZE;
command = StringUtil.getFromCompressedUnicode(data, ofs, len - 1);
ofs += len;
@ -167,25 +187,36 @@ public class Ole10Native {
throw new Ole10NativeException("Invalid Ole10Native");
}
int dataSize = LittleEndian.getInt(data, ofs);
dataSize = LittleEndian.getInt(data, ofs);
ofs += LittleEndianConsts.INT_SIZE;
if (dataSize < 0 || totalSize - (ofs - LittleEndianConsts.INT_SIZE) < dataSize) {
throw new Ole10NativeException("Invalid Ole10Native");
}
break;
}
case compact:
flags1 = LittleEndian.getShort(data, ofs);
ofs += LittleEndianConsts.SHORT_SIZE;
dataSize = totalSize - LittleEndianConsts.SHORT_SIZE;
break;
default:
case unparsed:
dataSize = totalSize;
break;
}
dataBuffer = new byte[dataSize];
System.arraycopy(data, ofs, dataBuffer, 0, dataSize);
ofs += dataSize;
}
}
/*
* Helper - determine length of zero terminated string (ASCIIZ).
*/
private static int getStringLength(byte[] data, int ofs) {
int len = 0;
while (len+ofs<data.length && data[ofs + len] != 0) {
while (len + ofs < data.length && data[ofs + len] != 0) {
len++;
}
len++;
@ -193,8 +224,8 @@ public class Ole10Native {
}
/**
* Returns the value of the totalSize field - the total length of the structure
* is totalSize + 4 (value of this field + size of this field).
* Returns the value of the totalSize field - the total length of the
* structure is totalSize + 4 (value of this field + size of this field).
*
* @return the totalSize
*/
@ -212,8 +243,9 @@ public class Ole10Native {
}
/**
* Returns the label field - usually the name of the file (without directory) but
* probably may be any name specified during packaging/embedding the data.
* Returns the label field - usually the name of the file (without
* directory) but probably may be any name specified during
* packaging/embedding the data.
*
* @return the label
*/
@ -251,7 +283,8 @@ public class Ole10Native {
/**
* Returns the command field - usually the name of the file being embedded
* including the full path, may be a command specified during embedding the file.
* including the full path, may be a command specified during embedding the
* file.
*
* @return the command
*/
@ -260,9 +293,9 @@ public class Ole10Native {
}
/**
* Returns the size of the embedded file. If the size is 0 (zero), no data has been
* embedded. To be sure, that no data has been embedded, check whether
* {@link #getDataBuffer()} returns <code>null</code>.
* Returns the size of the embedded file. If the size is 0 (zero), no data
* has been embedded. To be sure, that no data has been embedded, check
* whether {@link #getDataBuffer()} returns <code>null</code>.
*
* @return the dataSize
*/
@ -271,10 +304,11 @@ public class Ole10Native {
}
/**
* Returns the buffer containing the embedded file's data, or <code>null</code>
* if no data was embedded. Note that an embedding may provide information about
* the data, but the actual data is not included. (So label, filename etc. are
* available, but this method returns <code>null</code>.)
* Returns the buffer containing the embedded file's data, or
* <code>null</code> if no data was embedded. Note that an embedding may
* provide information about the data, but the actual data is not included.
* (So label, filename etc. are available, but this method returns
* <code>null</code>.)
*
* @return the dataBuffer
*/
@ -298,47 +332,49 @@ public class Ole10Native {
* children, then chuck on their header and return)
*/
public void writeOut(OutputStream out) throws IOException {
byte intbuf[] = new byte[LittleEndianConsts.INT_SIZE];
byte shortbuf[] = new byte[LittleEndianConsts.SHORT_SIZE];
// byte intbuf[] = new byte[LittleEndianConsts.INT_SIZE];
// byte shortbuf[] = new byte[LittleEndianConsts.SHORT_SIZE];
@SuppressWarnings("resource")
LittleEndianOutputStream leosOut = new LittleEndianOutputStream(out);
switch (mode) {
case parsed: {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
bos.write(intbuf); // total size, will be determined later ..
LittleEndianOutputStream leos = new LittleEndianOutputStream(bos);
// total size, will be determined later ..
LittleEndian.putShort(shortbuf, 0, getFlags1());
bos.write(shortbuf);
leos.writeShort(getFlags1());
leos.write(getLabel().getBytes(ISO1));
leos.write(0);
leos.write(getFileName().getBytes(ISO1));
leos.write(0);
leos.writeShort(getFlags2());
leos.writeShort(getUnknown1());
leos.writeInt(getCommand().length() + 1);
leos.write(getCommand().getBytes(ISO1));
leos.write(0);
leos.writeInt(getDataSize());
leos.write(getDataBuffer());
leos.writeShort(getFlags3());
leos.close(); // satisfy compiler ...
bos.write(getLabel().getBytes(ISO1));
bos.write(0);
leosOut.writeInt(bos.size()); // total size
bos.writeTo(out);
break;
}
case compact:
leosOut.writeInt(getDataSize()+LittleEndianConsts.SHORT_SIZE);
leosOut.writeShort(getFlags1());
out.write(getDataBuffer());
break;
default:
case unparsed:
leosOut.writeInt(getDataSize());
out.write(getDataBuffer());
break;
}
bos.write(getFileName().getBytes(ISO1));
bos.write(0);
LittleEndian.putShort(shortbuf, 0, getFlags2());
bos.write(shortbuf);
LittleEndian.putShort(shortbuf, 0, getUnknown1());
bos.write(shortbuf);
LittleEndian.putInt(intbuf, 0, getCommand().length()+1);
bos.write(intbuf);
bos.write(getCommand().getBytes(ISO1));
bos.write(0);
LittleEndian.putInt(intbuf, 0, getDataBuffer().length);
bos.write(intbuf);
bos.write(getDataBuffer());
LittleEndian.putShort(shortbuf, 0, getFlags3());
bos.write(shortbuf);
// update total size - length of length-field (4 bytes)
byte data[] = bos.toByteArray();
totalSize = data.length - LittleEndianConsts.INT_SIZE;
LittleEndian.putInt(data, 0, totalSize);
out.write(data);
}
public void setFlags1(short flags1) {

View File

@ -17,27 +17,24 @@
package org.apache.poi.poifs;
import junit.framework.Test;
import junit.framework.TestSuite;
import org.apache.poi.poifs.eventfilesystem.TestPOIFSReaderRegistry;
import org.apache.poi.poifs.filesystem.AllPOIFSFileSystemTests;
import org.apache.poi.poifs.nio.TestDataSource;
import org.apache.poi.poifs.property.AllPOIFSPropertyTests;
import org.apache.poi.poifs.storage.AllPOIFSStorageTests;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
/**
* Test suite for all sub-packages of org.apache.poi.poifs
*
* @author Josh Micich
*/
@RunWith(Suite.class)
@Suite.SuiteClasses({
TestPOIFSReaderRegistry.class
, TestDataSource.class
, AllPOIFSFileSystemTests.class
, AllPOIFSPropertyTests.class
, AllPOIFSStorageTests.class
})
public final class AllPOIFSTests {
public static Test suite() {
TestSuite result = new TestSuite("Tests for org.apache.poi.poifs");
result.addTestSuite(TestPOIFSReaderRegistry.class);
result.addTestSuite(TestDataSource.class);
result.addTest(AllPOIFSFileSystemTests.suite());
result.addTest(AllPOIFSPropertyTests.suite());
result.addTest(AllPOIFSStorageTests.suite());
return result;
}
}

View File

@ -17,31 +17,27 @@
package org.apache.poi.poifs.filesystem;
import junit.framework.Test;
import junit.framework.TestSuite;
import org.junit.runner.RunWith;
import org.junit.runners.Suite;
/**
* Tests for org.apache.poi.poifs.filesystem<br/>
*
* @author Josh Micich
*/
@RunWith(Suite.class)
@Suite.SuiteClasses({
TestDirectoryNode.class
, TestDocument.class
, TestDocumentDescriptor.class
, TestDocumentInputStream.class
, TestDocumentNode.class
, TestDocumentOutputStream.class
, TestEmptyDocument.class
, TestOffice2007XMLException.class
, TestPOIFSDocumentPath.class
, TestPOIFSFileSystem.class
, TestNPOIFSFileSystem.class
, TestPropertySorter.class
, TestOle10Native.class
})
public final class AllPOIFSFileSystemTests {
public static Test suite() {
TestSuite result = new TestSuite("Tests for org.apache.poi.poifs.filesystem");
result.addTestSuite(TestDirectoryNode.class);
result.addTestSuite(TestDocument.class);
result.addTestSuite(TestDocumentDescriptor.class);
result.addTestSuite(TestDocumentInputStream.class);
result.addTestSuite(TestDocumentNode.class);
result.addTestSuite(TestDocumentOutputStream.class);
result.addTestSuite(TestEmptyDocument.class);
result.addTestSuite(TestOffice2007XMLException.class);
result.addTestSuite(TestPOIFSDocumentPath.class);
result.addTestSuite(TestPOIFSFileSystem.class);
result.addTestSuite(TestNPOIFSFileSystem.class);
result.addTestSuite(TestPropertySorter.class);
result.addTestSuite(TestOle10Native.class);
return result;
}
}

View File

@ -17,14 +17,26 @@
package org.apache.poi.poifs.filesystem;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class TestOle10Native extends TestCase {
import org.apache.poi.POIDataSamples;
import org.apache.poi.util.IOUtils;
import org.junit.Test;
public class TestOle10Native {
private static final POIDataSamples dataSamples = POIDataSamples.getPOIFSInstance();
@Test
public void testOleNative() throws IOException, Ole10NativeException {
POIFSFileSystem fs = new POIFSFileSystem(dataSamples.openResourceAsStream("oleObject1.bin"));
@ -33,4 +45,66 @@ public class TestOle10Native extends TestCase {
assertEquals("File1.svg", ole.getLabel());
assertEquals("D:\\Documents and Settings\\rsc\\My Documents\\file1.svg", ole.getCommand());
}
@Test
public void testFiles() throws IOException, Ole10NativeException {
File files[] = {
// bug 51891
POIDataSamples.getPOIFSInstance().getFile("multimedia.doc"),
// tika bug 1072
POIDataSamples.getPOIFSInstance().getFile("20-Force-on-a-current-S00.doc"),
// other files containing ole10native records ...
POIDataSamples.getDocumentInstance().getFile("Bug53380_3.doc"),
POIDataSamples.getDocumentInstance().getFile("Bug47731.doc")
};
for (File f : files) {
NPOIFSFileSystem fs = new NPOIFSFileSystem(f, true);
List<Entry> entries = new ArrayList<Entry>();
findOle10(entries, fs.getRoot(), "/", "");
for (Entry e : entries) {
ByteArrayOutputStream bosExp = new ByteArrayOutputStream();
InputStream is = ((DirectoryNode)e.getParent()).createDocumentInputStream(e);
IOUtils.copy(is,bosExp);
is.close();
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode)e.getParent());
ByteArrayOutputStream bosAct = new ByteArrayOutputStream();
ole.writeOut(bosAct);
assertThat(bosExp.toByteArray(), equalTo(bosAct.toByteArray()));
}
fs.close();
}
}
/*
void searchOle10Files() throws Exception {
File dir = new File("test-data/document");
for (File file : dir.listFiles(new FileFilter(){
public boolean accept(File pathname) {
return pathname.getName().endsWith("doc");
}
})) {
NPOIFSFileSystem fs = new NPOIFSFileSystem(file, true);
findOle10(null, fs.getRoot(), "/", file.getName());
fs.close();
}
}*/
void findOle10(List<Entry> entries, DirectoryNode dn, String path, String filename) {
Iterator<Entry> iter = dn.getEntries();
while (iter.hasNext()) {
Entry e = iter.next();
if (Ole10Native.OLE10_NATIVE.equals(e.getName())) {
if (entries != null) entries.add(e);
// System.out.println(filename+" : "+path);
} else if (e.isDirectoryEntry()) {
findOle10(entries, (DirectoryNode)e, path+e.getName()+"/", filename);
}
}
}
}

Binary file not shown.

Binary file not shown.