diff --git a/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java b/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java index 0ac755d2d4..174a71c79a 100644 --- a/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java +++ b/src/examples/src/org/apache/poi/hpsf/examples/CopyCompare.java @@ -163,10 +163,20 @@ public final class CopyCompare { // Ensures that the directory hierarchy for a document in a POI fileystem is in place. // Get the root directory. It does not have to be created since it always exists in a POIFS. DirectoryEntry de = poiFs.getRoot(); + if ("/".equals(path.toString())) { + de.setStorageClsid(event.getStorageClassId()); + } for (int i=0; i + *
  • they must have entries with the same names
  • + *
  • no entries in one but not the other
  • + *
  • the size+contents of each entry must match
  • + *
  • the storage classid of the directories must match
  • + * * To exclude certain parts of the Directory from being checked, * use a {@link FilteringDirectoryNode} */ public static boolean areDirectoriesIdentical(DirectoryEntry dirA, DirectoryEntry dirB) { return new DirectoryDelegate(dirA).equals(new DirectoryDelegate(dirB)); } - + /** * Compares two {@link DocumentEntry} instances of a POI file system. * Documents that are not property set streams must be bitwise identical. @@ -185,6 +189,10 @@ public final class EntryUtils { return false; } + if (!dir.getStorageClsid().equals(dd.dir.getStorageClsid())) { + return false; + } + return entries().equals(dd.entries()); } } diff --git a/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java b/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java index 7fc7731f7a..c2d4a73ae4 100644 --- a/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java +++ b/src/java/org/apache/poi/poifs/filesystem/Ole10Native.java @@ -21,44 +21,69 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LittleEndianByteArrayInputStream; import org.apache.poi.util.LittleEndianConsts; +import org.apache.poi.util.LittleEndianInput; import org.apache.poi.util.LittleEndianOutputStream; import org.apache.poi.util.StringUtil; /** * Represents an Ole10Native record which is wrapped around certain binary - * files being embedded in OLE2 documents. + * files being embedded in OLE2 documents.

    + * + * Ole10Native objects come in different shapes: + *

    */ +@SuppressWarnings("unused") public class Ole10Native { public static final String OLE10_NATIVE = "\u0001Ole10Native"; - protected static final String ISO1 = "ISO-8859-1"; - //arbitrarily selected; may need to increase + private static final Charset ISO1 = StandardCharsets.ISO_8859_1; + // arbitrarily selected; may need to increase private static final int MAX_RECORD_LENGTH = 100_000_000; + // arbitrarily selected; may need to increase + private static final int MAX_STRING_LENGTH = 1024; /** * Default content of the \u0001Ole entry */ private static final byte[] OLE_MARKER_BYTES = - { 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + {1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; private static final String OLE_MARKER_NAME = "\u0001Ole"; - - // (the fields as they appear in the raw record:) - private int totalSize; // 4 bytes, total size of record not including this field - private short flags1 = 2; // 2 bytes, unknown, mostly [02 00] - private String label; // ASCIIZ, stored in this field without the terminating zero - private String fileName; // ASCIIZ, stored in this field without the terminating zero - private short flags2; // 2 bytes, unknown, mostly [00 00] - private short unknown1 = 3; // see below - private String command; // ASCIIZ, stored in this field without the terminating zero - private byte[] dataBuffer; // varying size, the actual native data - private short flags3; // some final flags? or zero terminators?, sometimes not there + // 4 bytes, total size of record not including this field + private int totalSize; + // 2 bytes, unknown, mostly [02 00] + private short flags1 = 2; + // ASCIIZ, stored in this field without the terminating zero + private String label; + // ASCIIZ, stored in this field without the terminating zero + private String fileName; + // 2 bytes, unknown, mostly [00 00] + private short flags2; + // see below + private short unknown1 = 3; + // ASCIIZ, stored in this field without the terminating zero + private String command; + // varying size, the actual native data + private byte[] dataBuffer; + // UTF16-LE String with leading length + private String command2; + // UTF16-LE String with leading length + private String label2; + // UTF16-LE String with leading length + private String fileName2; /** * the field encoding mode - merely a try-and-error guess ... @@ -81,7 +106,6 @@ public class Ole10Native { private EncodingMode mode; - /** * Creates an instance of this class from an embedded OLE Object. The OLE Object is expected * to include a stream "{01}Ole10Native" which contains the actual @@ -89,11 +113,11 @@ public class Ole10Native { * * @param poifs POI Filesystem object * @return Returns an instance of this class - * @throws IOException on IO error + * @throws IOException on IO error * @throws Ole10NativeException on invalid or unexcepted data format */ public static Ole10Native createFromEmbeddedOleObject(POIFSFileSystem poifs) throws IOException, Ole10NativeException { - return createFromEmbeddedOleObject(poifs.getRoot()); + return createFromEmbeddedOleObject(poifs.getRoot()); } /** @@ -103,26 +127,27 @@ public class Ole10Native { * * @param directory POI Filesystem object * @return Returns an instance of this class - * @throws IOException on IO error + * @throws IOException on IO error * @throws Ole10NativeException on invalid or unexcepted data format */ public static Ole10Native createFromEmbeddedOleObject(DirectoryNode directory) throws IOException, Ole10NativeException { - DocumentEntry nativeEntry = (DocumentEntry)directory.getEntry(OLE10_NATIVE); - try (DocumentInputStream dis = directory.createDocumentInputStream(nativeEntry)) { - byte[] data = IOUtils.toByteArray(dis, nativeEntry.getSize(), MAX_RECORD_LENGTH); - return new Ole10Native(data, 0); - } + DocumentEntry nativeEntry = (DocumentEntry) directory.getEntry(OLE10_NATIVE); + try (DocumentInputStream dis = directory.createDocumentInputStream(nativeEntry)) { + byte[] data = IOUtils.toByteArray(dis, nativeEntry.getSize(), MAX_RECORD_LENGTH); + return new Ole10Native(data, 0); + } } /** * Creates an instance and fills the fields based on ... the fields */ public Ole10Native(String label, String filename, String command, byte[] data) { - setLabel(label); - setFileName(filename); - setCommand(command); - setDataBuffer(data); - mode = EncodingMode.parsed; + setLabel(label); + setFileName(filename); + setCommand(command); + command2 = command; + setDataBuffer(data); + mode = EncodingMode.parsed; } /** @@ -132,81 +157,64 @@ public class Ole10Native { * @param offset The start offset of the record in the buffer * @throws Ole10NativeException on invalid or unexcepted data format */ - public Ole10Native(byte[] data, int offset) throws Ole10NativeException { - int ofs = offset; // current offset, initialized to start + public Ole10Native(final byte[] data, final int offset) throws Ole10NativeException { + LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(data, offset); - if (data.length < offset + 2) { - throw new Ole10NativeException("data is too small"); - } + totalSize = leis.readInt(); + leis.limit(totalSize + LittleEndianConsts.INT_SIZE); - totalSize = LittleEndian.getInt(data, ofs); - ofs += LittleEndianConsts.INT_SIZE; + leis.mark(0); - mode = EncodingMode.unparsed; - if (LittleEndian.getShort(data, ofs) == 2) { - // some files like equations don't have a valid filename, - // but somehow encode the formula right away in the ole10 header - if (Character.isISOControl(data[ofs+LittleEndianConsts.SHORT_SIZE])) { - mode = EncodingMode.compact; + try { + flags1 = leis.readShort(); + if (flags1 == 2) { + leis.mark(0); + // some files like equations don't have a valid filename, + // but somehow encode the formula right away in the ole10 header + boolean validFileName = !Character.isISOControl(leis.readByte()); + leis.reset(); + + if (validFileName) { + readParsed(leis); + } else { + readCompact(leis); + } } else { - mode = EncodingMode.parsed; + leis.reset(); + readUnparsed(leis); } + } catch (IOException e) { + throw new Ole10NativeException("Invalid Ole10Native", e); } + } - int dataSize; - switch (mode) { - case parsed: { - flags1 = LittleEndian.getShort(data, ofs); + private void readParsed(LittleEndianByteArrayInputStream leis) throws Ole10NativeException, IOException { + mode = EncodingMode.parsed; + label = readAsciiZ(leis); + fileName = readAsciiZ(leis); + flags2 = leis.readShort(); + unknown1 = leis.readShort(); + command = readAsciiLen(leis); + dataBuffer = IOUtils.toByteArray(leis, leis.readInt(), MAX_RECORD_LENGTH); - // structured format - ofs += LittleEndianConsts.SHORT_SIZE; - - int len = getStringLength(data, ofs); - label = StringUtil.getFromCompressedUnicode(data, ofs, len - 1); - ofs += len; - - len = getStringLength(data, ofs); - fileName = StringUtil.getFromCompressedUnicode(data, ofs, len - 1); - ofs += len; - - flags2 = LittleEndian.getShort(data, ofs); - ofs += LittleEndianConsts.SHORT_SIZE; - - unknown1 = LittleEndian.getShort(data, ofs); - ofs += LittleEndianConsts.SHORT_SIZE; - - len = LittleEndian.getInt(data, ofs); - ofs += LittleEndianConsts.INT_SIZE; - command = StringUtil.getFromCompressedUnicode(data, ofs, len - 1); - ofs += len; - - if (totalSize < ofs) { - throw new Ole10NativeException("Invalid Ole10Native"); - } - - dataSize = LittleEndian.getInt(data, ofs); - ofs += LittleEndianConsts.INT_SIZE; - - if (dataSize < 0 || totalSize - (ofs - LittleEndianConsts.INT_SIZE) < dataSize) { - throw new Ole10NativeException("Invalid Ole10Native"); - } - break; - } - case compact: - flags1 = LittleEndian.getShort(data, ofs); - ofs += LittleEndianConsts.SHORT_SIZE; - dataSize = totalSize - LittleEndianConsts.SHORT_SIZE; - break; - default: - case unparsed: - dataSize = totalSize; - break; + leis.mark(0); + short lowSize = leis.readShort(); + if (lowSize != 0) { + leis.reset(); + command2 = readUtf16(leis); + label2 = readUtf16(leis); + fileName2 = readUtf16(leis); } + } - if ((long)dataSize + (long)ofs > (long)data.length) { //cast to avoid overflow - throw new Ole10NativeException("Invalid Ole10Native: declared data length > available data"); - } - dataBuffer = IOUtils.safelyClone(data, ofs, dataSize, MAX_RECORD_LENGTH); + private void readCompact(LittleEndianByteArrayInputStream leis) throws IOException { + mode = EncodingMode.compact; + dataBuffer = IOUtils.toByteArray(leis, totalSize - LittleEndianConsts.SHORT_SIZE, MAX_RECORD_LENGTH); + } + + private void readUnparsed(LittleEndianByteArrayInputStream leis) throws IOException { + mode = EncodingMode.unparsed; + dataBuffer = IOUtils.toByteArray(leis, totalSize, MAX_RECORD_LENGTH); } /** @@ -230,16 +238,30 @@ public class Ole10Native { } - /* - * Helper - determine length of zero terminated string (ASCIIZ). + /** + * Read zero terminated string (ASCIIZ). */ - private static int getStringLength(byte[] data, int ofs) { - int len = 0; - while (len + ofs < data.length && data[ofs + len] != 0) { - len++; + private static String readAsciiZ(LittleEndianInput is) throws Ole10NativeException { + // arbitrary sized buffer - not sure how big strings can get in an Ole10 record + byte[] buf = new byte[MAX_STRING_LENGTH]; + for (int i=0; i oleShapes = new ArrayList<>(); + List ole10s = new ArrayList<>(); + List digests = new ArrayList<>(); + + final boolean digestMatch = + wb.getSheetAt(0).getDrawingPatriarch().getShapes().stream() + .map(s -> (XSSFObjectData)s) + .filter(oleShapes::add) + .map(TestEmbedOLEPackage::extractOle10Native) + .filter(ole10s::add) + .map(TestEmbedOLEPackage::digest) + .allMatch("FUJBVHTAZ0ly/TNDNmEj1gQ4a2TbZwDMVF4WUkDQLaM="::equals); + + assertEquals(2, oleShapes.size()); + assertEquals("Package", oleShapes.get(0).getOLE2ClassName()); + assertEquals("Package2", oleShapes.get(1).getOLE2ClassName()); + assertTrue(digestMatch); + + final String expLabel = "Apache_POI_project_logo_(2018).pdf"; + final String expFilenName = "C:\\Dell\\Apache_POI_project_logo_(2018).pdf"; + final String expCmd1 = "C:\\Users\\KIWIWI~1\\AppData\\Local\\Temp\\{84287F34-B79C-4F3A-9A92-6BB664586F48}\\Apache_POI_project_logo_(2018).pdf"; + final String expCmd2 = "C:\\Users\\KIWIWI~1\\AppData\\Local\\Temp\\{84287F34-B79C-4F3A-9A92-6BB664586F48}\\Apache_POI_project_logo_(2).pdf"; + + assertTrue(ole10s.stream().map(Ole10Native::getLabel).allMatch(expLabel::equals)); + assertTrue(ole10s.stream().map(Ole10Native::getFileName).allMatch(expFilenName::equals)); + assertEquals(expCmd1, ole10s.get(0).getCommand()); + assertEquals(expCmd2, ole10s.get(1).getCommand()); + + for (Ole10Native o : ole10s) { + assertEquals(o.getLabel(), o.getLabel2()); + assertEquals(o.getCommand(), o.getCommand2()); + assertEquals(o.getFileName(), o.getFileName2()); + } + + Ole10Native scratch = new Ole10Native(expLabel, expFilenName, expCmd1, ole10s.get(0).getDataBuffer()); + scratch.setLabel2(expLabel); + scratch.setFileName2(expFilenName); + scratch.setCommand2(expCmd1); + + try (POIFSFileSystem scratchFS = new POIFSFileSystem(); + POIFSFileSystem ole1FS = new POIFSFileSystem(new ByteArrayInputStream(oleShapes.get(0).getObjectData()))) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + scratch.writeOut(bos); + scratchFS.createDocument(new ByteArrayInputStream(bos.toByteArray()), Ole10Native.OLE10_NATIVE); + scratchFS.getRoot().setStorageClsid(ClassIDPredefined.OLE_V1_PACKAGE.getClassID()); + assertTrue(EntryUtils.areDirectoriesIdentical(ole1FS.getRoot(), scratchFS.getRoot())); + } + } + } + + private static Ole10Native extractOle10Native(XSSFObjectData objectData) { + try (InputStream is = objectData.getObjectPart().getInputStream(); + POIFSFileSystem poifs = new POIFSFileSystem(is)) { + return Ole10Native.createFromEmbeddedOleObject(poifs); + } catch (IOException | Ole10NativeException e) { + throw new AssertionError(e.getMessage(), e); + } + } + + private static String digest(Ole10Native ole10) { + MessageDigest sha = CryptoFunctions.getMessageDigest(HashAlgorithm.sha256); + byte[] digest = sha.digest(ole10.getDataBuffer()); + return Base64.encodeBase64String(digest); + } + @Test public void embedXSSF() throws IOException { Workbook wb1 = new XSSFWorkbook(); @@ -71,9 +156,9 @@ public class TestEmbedOLEPackage { public void embedHSSF() throws IOException { assumeFalse(xslfOnly()); - Workbook wb1 = new HSSFWorkbook(); + HSSFWorkbook wb1 = new HSSFWorkbook(); addEmbeddedObjects(wb1); - Workbook wb2 = HSSFTestDataSamples.writeOutAndReadBack((HSSFWorkbook)wb1); + Workbook wb2 = HSSFTestDataSamples.writeOutAndReadBack(wb1); validateEmbeddedObjects(wb2); wb2.close(); @@ -97,17 +182,17 @@ public class TestEmbedOLEPackage { } } } - + static void addEmbeddedObjects(Workbook wb) throws IOException { boolean ooxml = wb.getClass().getName().toLowerCase(Locale.ROOT).contains("xssf"); int picIdx = wb.addPicture(samplePNG, Workbook.PICTURE_TYPE_PNG); byte[] data = (ooxml) ? samplePPTX : samplePPT; String ext = (ooxml) ? ".pptx" : ".ppt"; - + int oleIdx1a = wb.addOlePackage(data, "dummy1a"+ext, "dummy1a"+ext, "dummy1a"+ext); int oleIdx1b = wb.addOlePackage(data, "dummy1b"+ext, "dummy1b"+ext, "dummy1b"+ext); int oleIdx2 = wb.addOlePackage(data, "dummy2"+ext, "dummy2"+ext, "dummy2"+ext); - + Sheet sh1 = wb.createSheet(); Drawing pat1 = sh1.createDrawingPatriarch(); ClientAnchor anchor1a = pat1.createAnchor(0, 0, 0, 0, 1, 1, 3, 6); @@ -120,7 +205,7 @@ public class TestEmbedOLEPackage { ClientAnchor anchor2 = pat2.createAnchor(0, 0, 0, 0, 1, 1, 3, 6); pat2.createObjectData(anchor2, oleIdx2, picIdx); } - + static byte[] getSamplePPT(boolean ooxml) throws IOException, ReflectiveOperationException { SlideShow ppt = (ooxml) ? new XMLSlideShow() : (SlideShow)Class.forName("org.apache.poi.hslf.usermodel.HSLFSlideShow").newInstance(); diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java b/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java index ad686358d0..4de08c4c5c 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestOle10Native.java @@ -17,11 +17,9 @@ package org.apache.poi.poifs.filesystem; -import static org.apache.poi.POITestCase.assertContains; import static org.hamcrest.core.IsEqual.equalTo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; -import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; import java.io.File; @@ -33,11 +31,17 @@ import java.util.List; import org.apache.poi.POIDataSamples; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.RecordFormatException; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; public class TestOle10Native { private static final POIDataSamples dataSamples = POIDataSamples.getPOIFSInstance(); + @Rule + public ExpectedException thrown = ExpectedException.none(); + @Test public void testOleNative() throws IOException, Ole10NativeException { POIFSFileSystem fs = new POIFSFileSystem(dataSamples.openResourceAsStream("oleObject1.bin")); @@ -59,26 +63,26 @@ public class TestOle10Native { POIDataSamples.getDocumentInstance().getFile("Bug53380_3.doc"), POIDataSamples.getDocumentInstance().getFile("Bug47731.doc") }; - + for (File f : files) { POIFSFileSystem fs = new POIFSFileSystem(f, true); List entries = new ArrayList<>(); findOle10(entries, fs.getRoot(), "/"); - + for (Entry e : entries) { ByteArrayOutputStream bosExp = new ByteArrayOutputStream(); InputStream is = ((DirectoryNode)e.getParent()).createDocumentInputStream(e); IOUtils.copy(is,bosExp); is.close(); - + Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode)e.getParent()); - + ByteArrayOutputStream bosAct = new ByteArrayOutputStream(); ole.writeOut(bosAct); - + assertThat(bosExp.toByteArray(), equalTo(bosAct.toByteArray())); } - + fs.close(); } } @@ -97,14 +101,11 @@ public class TestOle10Native { } @Test - public void testOleNativeOOM() throws IOException { + public void testOleNativeOOM() throws IOException, Ole10NativeException { POIFSFileSystem fs = new POIFSFileSystem(dataSamples.openResourceAsStream("60256.bin")); - try { - Ole10Native.createFromEmbeddedOleObject(fs); - fail("Should have thrown exception because OLENative lacks a length parameter"); - } catch (Ole10NativeException e) { - assertContains(e.getMessage(), "declared data length"); - } + thrown.expect(RecordFormatException.class); + thrown.expectMessage("Tried to allocate"); + Ole10Native.createFromEmbeddedOleObject(fs); } } diff --git a/test-data/spreadsheet/bug64512_embed.xlsx b/test-data/spreadsheet/bug64512_embed.xlsx new file mode 100755 index 0000000000..1c5fd50f1e Binary files /dev/null and b/test-data/spreadsheet/bug64512_embed.xlsx differ