diff --git a/src/java/org/apache/poi/hpsf/ClassID.java b/src/java/org/apache/poi/hpsf/ClassID.java index a49d9df588..c78702c3c1 100644 --- a/src/java/org/apache/poi/hpsf/ClassID.java +++ b/src/java/org/apache/poi/hpsf/ClassID.java @@ -40,6 +40,7 @@ public class ClassID public static final ClassID WORD95 = new ClassID("{00020900-0000-0000-C000-000000000046}"); public static final ClassID POWERPOINT97 = new ClassID("{64818D10-4F9B-11CF-86EA-00AA00B929E8}"); public static final ClassID POWERPOINT95 = new ClassID("{EA7BAE70-FB3B-11CD-A903-00AA00510EA3}"); + public static final ClassID EQUATION30 = new ClassID("{0002CE02-0000-0000-C000-000000000046}"); /** diff --git a/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java b/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java index 2c950da3ff..122eb0a5ae 100644 --- a/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java +++ b/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java @@ -17,14 +17,13 @@ package org.apache.poi.poifs.filesystem; -import java.io.ByteArrayOutputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.OutputStream; - -import org.apache.poi.util.HexDump; -import org.apache.poi.util.LittleEndian; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianConsts; +import org.apache.poi.util.LittleEndianOutputStream; import org.apache.poi.util.StringUtil; /** @@ -35,341 +34,378 @@ import org.apache.poi.util.StringUtil; */ public class Ole10Native { - public static final String OLE10_NATIVE = "\u0001Ole10Native"; - protected static final String ISO1 = "ISO-8859-1"; - - // (the fields as they appear in the raw record:) - private int totalSize; // 4 bytes, total size of record not including this field - private short flags1 = 2; // 2 bytes, unknown, mostly [02 00] - private String label; // ASCIIZ, stored in this field without the terminating zero - private String fileName; // ASCIIZ, stored in this field without the terminating zero - private short flags2 = 0; // 2 bytes, unknown, mostly [00 00] - private short unknown1 = 3; // see below - private String command; // ASCIIZ, stored in this field without the terminating zero - private byte[] dataBuffer; // varying size, the actual native data - private short flags3 = 0; // some final flags? or zero terminators?, sometimes not there - - /** - * Creates an instance of this class from an embedded OLE Object. The OLE Object is expected - * to include a stream "{01}Ole10Native" which contains the actual - * data relevant for this class. - * - * @param poifs POI Filesystem object - * @return Returns an instance of this class - * @throws IOException on IO error - * @throws Ole10NativeException on invalid or unexcepted data format - */ - public static Ole10Native createFromEmbeddedOleObject(POIFSFileSystem poifs) throws IOException, Ole10NativeException { - return createFromEmbeddedOleObject(poifs.getRoot()); - } + public static final String OLE10_NATIVE = "\u0001Ole10Native"; + protected static final String ISO1 = "ISO-8859-1"; + + // (the fields as they appear in the raw record:) + private int totalSize; // 4 bytes, total size of record not including this field + private short flags1 = 2; // 2 bytes, unknown, mostly [02 00] + private String label; // ASCIIZ, stored in this field without the terminating zero + private String fileName; // ASCIIZ, stored in this field without the terminating zero + private short flags2 = 0; // 2 bytes, unknown, mostly [00 00] + private short unknown1 = 3; // see below + private String command; // ASCIIZ, stored in this field without the terminating zero + private byte[] dataBuffer; // varying size, the actual native data + private short flags3 = 0; // some final flags? or zero terminators?, sometimes not there + + /** + * the field encoding mode - merely a try-and-error guess ... + **/ + private enum EncodingMode { + /** + * the data is stored in parsed format - including label, command, etc. + */ + parsed, + /** + * the data is stored raw after the length field + */ + unparsed, + /** + * the data is stored raw after the length field and the flags1 field + */ + compact; + } + + private EncodingMode mode; + + + + /** + * Creates an instance of this class from an embedded OLE Object. The OLE Object is expected + * to include a stream "{01}Ole10Native" which contains the actual + * data relevant for this class. + * + * @param poifs POI Filesystem object + * @return Returns an instance of this class + * @throws IOException on IO error + * @throws Ole10NativeException on invalid or unexcepted data format + */ + public static Ole10Native createFromEmbeddedOleObject(POIFSFileSystem poifs) throws IOException, Ole10NativeException { + return createFromEmbeddedOleObject(poifs.getRoot()); + } + + /** + * Creates an instance of this class from an embedded OLE Object. The OLE Object is expected + * to include a stream "{01}Ole10Native" which contains the actual + * data relevant for this class. + * + * @param directory POI Filesystem object + * @return Returns an instance of this class + * @throws IOException on IO error + * @throws Ole10NativeException on invalid or unexcepted data format + */ + public static Ole10Native createFromEmbeddedOleObject(DirectoryNode directory) throws IOException, Ole10NativeException { + DocumentEntry nativeEntry = + (DocumentEntry)directory.getEntry(OLE10_NATIVE); + byte[] data = new byte[nativeEntry.getSize()]; + directory.createDocumentInputStream(nativeEntry).read(data); - /** - * Creates an instance of this class from an embedded OLE Object. The OLE Object is expected - * to include a stream "{01}Ole10Native" which contains the actual - * data relevant for this class. - * - * @param directory POI Filesystem object - * @return Returns an instance of this class - * @throws IOException on IO error - * @throws Ole10NativeException on invalid or unexcepted data format - */ - public static Ole10Native createFromEmbeddedOleObject(DirectoryNode directory) throws IOException, Ole10NativeException { - boolean plain = false; - - try { - directory.getEntry("\u0001Ole10ItemName"); - plain = true; - } catch (FileNotFoundException ex) { - plain = false; - } - - DocumentEntry nativeEntry = - (DocumentEntry)directory.getEntry(OLE10_NATIVE); - byte[] data = new byte[nativeEntry.getSize()]; - directory.createDocumentInputStream(nativeEntry).read(data); - - return new Ole10Native(data, 0, plain); - } - - /** - * Creates an instance and fills the fields based on ... the fields - */ - public Ole10Native(String label, String filename, String command, byte[] data) { - setLabel(label); - setFileName(filename); - setCommand(command); - setDataBuffer(data); - } - - /** - * Creates an instance and fills the fields based on the data in the given buffer. - * - * @param data The buffer containing the Ole10Native record - * @param offset The start offset of the record in the buffer - * @throws Ole10NativeException on invalid or unexcepted data format - */ - public Ole10Native(byte[] data, int offset) throws Ole10NativeException { - this(data, offset, false); - } - /** - * Creates an instance and fills the fields based on the data in the given buffer. - * - * @param data The buffer containing the Ole10Native record - * @param offset The start offset of the record in the buffer - * @param plain Specified 'plain' format without filename - * @throws Ole10NativeException on invalid or unexcepted data format - */ - public Ole10Native(byte[] data, int offset, boolean plain) throws Ole10NativeException { - int ofs = offset; // current offset, initialized to start - - if (data.lengthnull. - * - * @return the dataSize - */ - public int getDataSize() { - return dataBuffer.length; - } - - /** - * Returns the buffer containing the embedded file's data, or null - * if no data was embedded. Note that an embedding may provide information about - * the data, but the actual data is not included. (So label, filename etc. are - * available, but this method returns null.) - * - * @return the dataBuffer - */ - public byte[] getDataBuffer() { - return dataBuffer; - } + /** + * Returns flags2 - currently unknown - mostly 0x0000. + * + * @return the flags2 + */ + public short getFlags2() { + return flags2; + } - /** - * Returns the flags3 - currently unknown. - * - * @return the flags3 - */ - public short getFlags3() { - return flags3; - } + /** + * Returns unknown1 field - currently unknown. + * + * @return the unknown1 + */ + public short getUnknown1() { + return unknown1; + } + + /** + * Returns the command field - usually the name of the file being embedded + * including the full path, may be a command specified during embedding the + * file. + * + * @return the command + */ + public String getCommand() { + return command; + } + + /** + * Returns the size of the embedded file. If the size is 0 (zero), no data + * has been embedded. To be sure, that no data has been embedded, check + * whether {@link #getDataBuffer()} returns null. + * + * @return the dataSize + */ + public int getDataSize() { + return dataBuffer.length; + } + + /** + * Returns the buffer containing the embedded file's data, or + * null if no data was embedded. Note that an embedding may + * provide information about the data, but the actual data is not included. + * (So label, filename etc. are available, but this method returns + * null.) + * + * @return the dataBuffer + */ + public byte[] getDataBuffer() { + return dataBuffer; + } + + /** + * Returns the flags3 - currently unknown. + * + * @return the flags3 + */ + public short getFlags3() { + return flags3; + } + + /** + * Have the contents printer out into an OutputStream, used when writing a + * file back out to disk (Normally, atom classes will keep their bytes + * around, but non atom classes will just request the bytes from their + * children, then chuck on their header and return) + */ + public void writeOut(OutputStream out) throws IOException { + // byte intbuf[] = new byte[LittleEndianConsts.INT_SIZE]; + // byte shortbuf[] = new byte[LittleEndianConsts.SHORT_SIZE]; + + @SuppressWarnings("resource") + LittleEndianOutputStream leosOut = new LittleEndianOutputStream(out); + + switch (mode) { + case parsed: { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + LittleEndianOutputStream leos = new LittleEndianOutputStream(bos); + // total size, will be determined later .. + + leos.writeShort(getFlags1()); + leos.write(getLabel().getBytes(ISO1)); + leos.write(0); + leos.write(getFileName().getBytes(ISO1)); + leos.write(0); + leos.writeShort(getFlags2()); + leos.writeShort(getUnknown1()); + leos.writeInt(getCommand().length() + 1); + leos.write(getCommand().getBytes(ISO1)); + leos.write(0); + leos.writeInt(getDataSize()); + leos.write(getDataBuffer()); + leos.writeShort(getFlags3()); + leos.close(); // satisfy compiler ... + + leosOut.writeInt(bos.size()); // total size + bos.writeTo(out); + break; + } + case compact: + leosOut.writeInt(getDataSize()+LittleEndianConsts.SHORT_SIZE); + leosOut.writeShort(getFlags1()); + out.write(getDataBuffer()); + break; + default: + case unparsed: + leosOut.writeInt(getDataSize()); + out.write(getDataBuffer()); + break; + } + + } - /** - * Have the contents printer out into an OutputStream, used when writing a - * file back out to disk (Normally, atom classes will keep their bytes - * around, but non atom classes will just request the bytes from their - * children, then chuck on their header and return) - */ - public void writeOut(OutputStream out) throws IOException { - byte intbuf[] = new byte[LittleEndianConsts.INT_SIZE]; - byte shortbuf[] = new byte[LittleEndianConsts.SHORT_SIZE]; - - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - bos.write(intbuf); // total size, will be determined later .. - - LittleEndian.putShort(shortbuf, 0, getFlags1()); - bos.write(shortbuf); - - bos.write(getLabel().getBytes(ISO1)); - bos.write(0); - - bos.write(getFileName().getBytes(ISO1)); - bos.write(0); - - LittleEndian.putShort(shortbuf, 0, getFlags2()); - bos.write(shortbuf); - - LittleEndian.putShort(shortbuf, 0, getUnknown1()); - bos.write(shortbuf); - - LittleEndian.putInt(intbuf, 0, getCommand().length()+1); - bos.write(intbuf); - - bos.write(getCommand().getBytes(ISO1)); - bos.write(0); - - LittleEndian.putInt(intbuf, 0, getDataBuffer().length); - bos.write(intbuf); - - bos.write(getDataBuffer()); - - LittleEndian.putShort(shortbuf, 0, getFlags3()); - bos.write(shortbuf); - - // update total size - length of length-field (4 bytes) - byte data[] = bos.toByteArray(); - totalSize = data.length - LittleEndianConsts.INT_SIZE; - LittleEndian.putInt(data, 0, totalSize); - - out.write(data); - } - - public void setFlags1(short flags1) { - this.flags1 = flags1; - } - - public void setFlags2(short flags2) { - this.flags2 = flags2; - } - - public void setFlags3(short flags3) { - this.flags3 = flags3; - } - - public void setLabel(String label) { - this.label = label; - } - - public void setFileName(String fileName) { - this.fileName = fileName; - } - - public void setCommand(String command) { - this.command = command; - } - - public void setUnknown1(short unknown1) { - this.unknown1 = unknown1; - } - - public void setDataBuffer(byte dataBuffer[]) { - this.dataBuffer = dataBuffer; - } + public void setFlags1(short flags1) { + this.flags1 = flags1; + } + + public void setFlags2(short flags2) { + this.flags2 = flags2; + } + + public void setFlags3(short flags3) { + this.flags3 = flags3; + } + + public void setLabel(String label) { + this.label = label; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + public void setCommand(String command) { + this.command = command; + } + + public void setUnknown1(short unknown1) { + this.unknown1 = unknown1; + } + + public void setDataBuffer(byte dataBuffer[]) { + this.dataBuffer = dataBuffer; + } } diff --git a/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java b/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java index d4f81f7ee2..012cc3b644 100644 --- a/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java +++ b/src/testcases/org/apache/poi/poifs/AllPOIFSTests.java @@ -17,27 +17,24 @@ package org.apache.poi.poifs; -import junit.framework.Test; -import junit.framework.TestSuite; - import org.apache.poi.poifs.eventfilesystem.TestPOIFSReaderRegistry; import org.apache.poi.poifs.filesystem.AllPOIFSFileSystemTests; import org.apache.poi.poifs.nio.TestDataSource; import org.apache.poi.poifs.property.AllPOIFSPropertyTests; import org.apache.poi.poifs.storage.AllPOIFSStorageTests; +import org.junit.runner.RunWith; +import org.junit.runners.Suite; + /** * Test suite for all sub-packages of org.apache.poi.poifs - * - * @author Josh Micich */ +@RunWith(Suite.class) +@Suite.SuiteClasses({ + TestPOIFSReaderRegistry.class + , TestDataSource.class + , AllPOIFSFileSystemTests.class + , AllPOIFSPropertyTests.class + , AllPOIFSStorageTests.class +}) public final class AllPOIFSTests { - public static Test suite() { - TestSuite result = new TestSuite("Tests for org.apache.poi.poifs"); - result.addTestSuite(TestPOIFSReaderRegistry.class); - result.addTestSuite(TestDataSource.class); - result.addTest(AllPOIFSFileSystemTests.suite()); - result.addTest(AllPOIFSPropertyTests.suite()); - result.addTest(AllPOIFSStorageTests.suite()); - return result; - } } diff --git a/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java b/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java index bc46e17c9a..364aa1a6e2 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/AllPOIFSFileSystemTests.java @@ -17,31 +17,27 @@ package org.apache.poi.poifs.filesystem; -import junit.framework.Test; -import junit.framework.TestSuite; +import org.junit.runner.RunWith; +import org.junit.runners.Suite; /** * Tests for org.apache.poi.poifs.filesystem
- * - * @author Josh Micich */ +@RunWith(Suite.class) +@Suite.SuiteClasses({ + TestDirectoryNode.class + , TestDocument.class + , TestDocumentDescriptor.class + , TestDocumentInputStream.class + , TestDocumentNode.class + , TestDocumentOutputStream.class + , TestEmptyDocument.class + , TestOffice2007XMLException.class + , TestPOIFSDocumentPath.class + , TestPOIFSFileSystem.class + , TestNPOIFSFileSystem.class + , TestPropertySorter.class + , TestOle10Native.class +}) public final class AllPOIFSFileSystemTests { - - public static Test suite() { - TestSuite result = new TestSuite("Tests for org.apache.poi.poifs.filesystem"); - result.addTestSuite(TestDirectoryNode.class); - result.addTestSuite(TestDocument.class); - result.addTestSuite(TestDocumentDescriptor.class); - result.addTestSuite(TestDocumentInputStream.class); - result.addTestSuite(TestDocumentNode.class); - result.addTestSuite(TestDocumentOutputStream.class); - result.addTestSuite(TestEmptyDocument.class); - result.addTestSuite(TestOffice2007XMLException.class); - result.addTestSuite(TestPOIFSDocumentPath.class); - result.addTestSuite(TestPOIFSFileSystem.class); - result.addTestSuite(TestNPOIFSFileSystem.class); - result.addTestSuite(TestPropertySorter.class); - result.addTestSuite(TestOle10Native.class); - return result; - } } diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java b/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java index 1ff45be73f..32f0f96ab6 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java @@ -17,14 +17,26 @@ package org.apache.poi.poifs.filesystem; -import junit.framework.TestCase; -import org.apache.poi.POIDataSamples; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; -public class TestOle10Native extends TestCase { +import org.apache.poi.POIDataSamples; +import org.apache.poi.util.IOUtils; +import org.junit.Test; + +public class TestOle10Native { private static final POIDataSamples dataSamples = POIDataSamples.getPOIFSInstance(); + @Test public void testOleNative() throws IOException, Ole10NativeException { POIFSFileSystem fs = new POIFSFileSystem(dataSamples.openResourceAsStream("oleObject1.bin")); @@ -33,4 +45,66 @@ public class TestOle10Native extends TestCase { assertEquals("File1.svg", ole.getLabel()); assertEquals("D:\\Documents and Settings\\rsc\\My Documents\\file1.svg", ole.getCommand()); } + + @Test + public void testFiles() throws IOException, Ole10NativeException { + File files[] = { + // bug 51891 + POIDataSamples.getPOIFSInstance().getFile("multimedia.doc"), + // tika bug 1072 + POIDataSamples.getPOIFSInstance().getFile("20-Force-on-a-current-S00.doc"), + // other files containing ole10native records ... + POIDataSamples.getDocumentInstance().getFile("Bug53380_3.doc"), + POIDataSamples.getDocumentInstance().getFile("Bug47731.doc") + }; + + for (File f : files) { + NPOIFSFileSystem fs = new NPOIFSFileSystem(f, true); + List entries = new ArrayList(); + findOle10(entries, fs.getRoot(), "/", ""); + + for (Entry e : entries) { + ByteArrayOutputStream bosExp = new ByteArrayOutputStream(); + InputStream is = ((DirectoryNode)e.getParent()).createDocumentInputStream(e); + IOUtils.copy(is,bosExp); + is.close(); + + Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode)e.getParent()); + + ByteArrayOutputStream bosAct = new ByteArrayOutputStream(); + ole.writeOut(bosAct); + + assertThat(bosExp.toByteArray(), equalTo(bosAct.toByteArray())); + } + + fs.close(); + } + } + + /* + void searchOle10Files() throws Exception { + File dir = new File("test-data/document"); + for (File file : dir.listFiles(new FileFilter(){ + public boolean accept(File pathname) { + return pathname.getName().endsWith("doc"); + } + })) { + NPOIFSFileSystem fs = new NPOIFSFileSystem(file, true); + findOle10(null, fs.getRoot(), "/", file.getName()); + fs.close(); + } + }*/ + + void findOle10(List entries, DirectoryNode dn, String path, String filename) { + Iterator iter = dn.getEntries(); + while (iter.hasNext()) { + Entry e = iter.next(); + if (Ole10Native.OLE10_NATIVE.equals(e.getName())) { + if (entries != null) entries.add(e); + // System.out.println(filename+" : "+path); + } else if (e.isDirectoryEntry()) { + findOle10(entries, (DirectoryNode)e, path+e.getName()+"/", filename); + } + } + } } diff --git a/test-data/poifs/20-Force-on-a-current-S00.doc b/test-data/poifs/20-Force-on-a-current-S00.doc new file mode 100644 index 0000000000..3ad75ebc07 Binary files /dev/null and b/test-data/poifs/20-Force-on-a-current-S00.doc differ diff --git a/test-data/poifs/multimedia.doc b/test-data/poifs/multimedia.doc new file mode 100644 index 0000000000..51b356d4af Binary files /dev/null and b/test-data/poifs/multimedia.doc differ