diff --git a/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java b/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java index 58658fac71..0657b9bb63 100644 --- a/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java +++ b/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java @@ -17,22 +17,22 @@ package org.apache.poi.poifs.filesystem; -import org.apache.poi.EncryptedDocumentException; -import org.apache.poi.poifs.common.POIFSConstants; -import org.apache.poi.poifs.crypt.Decryptor; -import org.apache.poi.poifs.crypt.EncryptionInfo; -import org.apache.poi.util.IOUtils; - import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.security.GeneralSecurityException; +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.util.Internal; +import org.apache.poi.util.Removal; + /** * A small base class for the various factories, e.g. WorkbookFactory, * SlideShowFactory to combine common code here. */ +@Internal public class DocumentFactoryHelper { /** * Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using @@ -81,36 +81,19 @@ public class DocumentFactoryHelper { /** * Checks that the supplied InputStream (which MUST - * support mark and reset, or be a PushbackInputStream) - * has a OOXML (zip) header at the start of it. - * If your InputStream does not support mark / reset, - * then wrap it in a PushBackInputStream, then be + * support mark and reset) has a OOXML (zip) header at the start of it.
+ * + * If unsure if your InputStream does support mark / reset, + * use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make * sure to always use that, and not the original! - * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream + * + * @param inp An InputStream which supports either mark/reset + * + * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == FileMagic.OOXML instead */ + @Deprecated + @Removal(version="4.0") public static boolean hasOOXMLHeader(InputStream inp) throws IOException { - // We want to peek at the first 4 bytes - inp.mark(4); - - byte[] header = new byte[4]; - int bytesRead = IOUtils.readFully(inp, header); - - // Wind back those 4 bytes - if(inp instanceof PushbackInputStream) { - PushbackInputStream pin = (PushbackInputStream)inp; - pin.unread(header, 0, bytesRead); - } else { - inp.reset(); - } - - // Did it match the ooxml zip signature? - return ( - bytesRead == 4 && - header[0] == POIFSConstants.OOXML_FILE_HEADER[0] && - header[1] == POIFSConstants.OOXML_FILE_HEADER[1] && - header[2] == POIFSConstants.OOXML_FILE_HEADER[2] && - header[3] == POIFSConstants.OOXML_FILE_HEADER[3] - ); + return FileMagic.valueOf(inp) == FileMagic.OOXML; } - } diff --git a/src/java/org/apache/poi/poifs/filesystem/FileMagic.java b/src/java/org/apache/poi/poifs/filesystem/FileMagic.java new file mode 100644 index 0000000000..6f67e4848a --- /dev/null +++ b/src/java/org/apache/poi/poifs/filesystem/FileMagic.java @@ -0,0 +1,155 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.filesystem; + +import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER; +import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.poifs.storage.HeaderBlockConstants; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LocaleUtil; + +/** + * The file magic number, i.e. the file identification based on the first bytes + * of the file + */ +public enum FileMagic { + /** OLE2 / BIFF8+ stream used for Office 97 and higher documents */ + OLE2(HeaderBlockConstants._signature), + /** OOXML / ZIP stream */ + OOXML(OOXML_FILE_HEADER), + /** XML file */ + XML(RAW_XML_FILE_HEADER), + /** BIFF2 raw stream - for Excel 2 */ + BIFF2(new byte[]{ + 0x09, 0x00, // sid=0x0009 + 0x04, 0x00, // size=0x0004 + 0x00, 0x00, // unused + 0x70, 0x00 // 0x70 = multiple values + }), + /** BIFF3 raw stream - for Excel 3 */ + BIFF3(new byte[]{ + 0x09, 0x02, // sid=0x0209 + 0x06, 0x00, // size=0x0006 + 0x00, 0x00, // unused + 0x70, 0x00 // 0x70 = multiple values + }), + /** BIFF4 raw stream - for Excel 4 */ + BIFF4(new byte[]{ + 0x09, 0x04, // sid=0x0409 + 0x06, 0x00, // size=0x0006 + 0x00, 0x00, // unused + 0x70, 0x00 // 0x70 = multiple values + },new byte[]{ + 0x09, 0x04, // sid=0x0409 + 0x06, 0x00, // size=0x0006 + 0x00, 0x00, // unused + 0x00, 0x01 + }), + /** Old MS Write raw stream */ + MSWRITE( + new byte[]{0x31, (byte)0xbe, 0x00, 0x00 }, + new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }), + /** RTF document */ + RTF("{\\rtf"), + /** PDF document */ + PDF("%PDF"), + // keep UNKNOWN always as last enum! + /** UNKNOWN magic */ + UNKNOWN(new byte[0]); + + final byte[][] magic; + + FileMagic(long magic) { + this.magic = new byte[1][8]; + LittleEndian.putLong(this.magic[0], 0, magic); + } + + FileMagic(byte[]... magic) { + this.magic = magic; + } + + FileMagic(String magic) { + this(magic.getBytes(LocaleUtil.CHARSET_1252)); + } + + public static FileMagic valueOf(byte[] magic) { + for (FileMagic fm : values()) { + int i=0; + boolean found = true; + for (byte[] ma : fm.magic) { + for (byte m : ma) { + byte d = magic[i++]; + if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) { + found = false; + break; + } + } + if (found) { + return fm; + } + } + } + return UNKNOWN; + } + + /** + * Get the file magic of the supplied InputStream (which MUST + * support mark and reset).
+ * + * If unsure if your InputStream does support mark / reset, + * use {@link #prepareToCheckMagic(InputStream)} to wrap it and make + * sure to always use that, and not the original!
+ * + * Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean, + * that the ZIP stream has leading junk bytes + * + * @param inp An InputStream which supports either mark/reset + */ + public static FileMagic valueOf(InputStream inp) throws IOException { + if (!inp.markSupported()) { + throw new IOException("getFileMagic() only operates on streams which support mark(int)"); + } + + // Grab the first 8 bytes + byte[] data = IOUtils.peekFirst8Bytes(inp); + + return FileMagic.valueOf(data); + } + + + /** + * Checks if an {@link InputStream} can be reseted (i.e. used for checking the header magic) and wraps it if not + * + * @param stream stream to be checked for wrapping + * @return a mark enabled stream + */ + public static InputStream prepareToCheckMagic(InputStream stream) { + if (stream.markSupported()) { + return stream; + } + // we used to process the data via a PushbackInputStream, but user code could provide a too small one + // so we use a BufferedInputStream instead now + return new BufferedInputStream(stream); + } +} \ No newline at end of file diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java index 52e0662b0f..b3c1158b27 100644 --- a/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java @@ -19,6 +19,7 @@ package org.apache.poi.poifs.filesystem; +import java.io.ByteArrayInputStream; import java.io.Closeable; import java.io.File; import java.io.FileInputStream; @@ -26,7 +27,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.PushbackInputStream; import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.FileChannel; @@ -51,14 +51,13 @@ import org.apache.poi.poifs.storage.BATBlock.BATBlockAndIndex; import org.apache.poi.poifs.storage.BlockAllocationTableReader; import org.apache.poi.poifs.storage.BlockAllocationTableWriter; import org.apache.poi.poifs.storage.HeaderBlock; -import org.apache.poi.poifs.storage.HeaderBlockConstants; import org.apache.poi.poifs.storage.HeaderBlockWriter; import org.apache.poi.util.CloseIgnoringInputStream; import org.apache.poi.util.IOUtils; import org.apache.poi.util.Internal; -import org.apache.poi.util.LongField; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; +import org.apache.poi.util.Removal; /** *
This is the main class of the POIFS system; it manages the entire @@ -353,44 +352,38 @@ public class NPOIFSFileSystem extends BlockStore /** * Checks that the supplied InputStream (which MUST - * support mark and reset, or be a PushbackInputStream) - * has a POIFS (OLE2) header at the start of it. - * If your InputStream does not support mark / reset, - * then wrap it in a PushBackInputStream, then be - * sure to always use that and not the original! + * support mark and reset) has a POIFS (OLE2) header at the start of it. + * If unsure if your InputStream does support mark / reset, + * use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make + * sure to always use that, and not the original! * * After the method call, the InputStream is at the * same position as of the time of entering the method. * - * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream + * @param inp An InputStream which supports mark/reset + * + * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead */ + @Deprecated + @Removal(version="4.0") public static boolean hasPOIFSHeader(InputStream inp) throws IOException { - // We want to peek at the first 8 bytes - inp.mark(8); - - byte[] header = new byte[8]; - int bytesRead = IOUtils.readFully(inp, header); - LongField signature = new LongField(HeaderBlockConstants._signature_offset, header); - - // Wind back those 8 bytes - if(inp instanceof PushbackInputStream) { - PushbackInputStream pin = (PushbackInputStream)inp; - pin.unread(header, 0, bytesRead); - } else { - inp.reset(); - } - - // Did it match the signature? - return (signature.get() == HeaderBlockConstants._signature); + return FileMagic.valueOf(inp) == FileMagic.OLE2; } /** * Checks if the supplied first 8 bytes of a stream / file * has a POIFS (OLE2) header. + * + * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead */ + @Deprecated + @Removal(version="4.0") public static boolean hasPOIFSHeader(byte[] header8Bytes) { - LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); - return (signature.get() == HeaderBlockConstants._signature); + try { + return hasPOIFSHeader(new ByteArrayInputStream(header8Bytes)); + } catch (IOException e) { + throw new RuntimeException("invalid header check", e); + } } /** diff --git a/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java index 9cdc384440..0dc1798328 100644 --- a/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java @@ -42,16 +42,14 @@ import org.apache.poi.poifs.storage.BlockAllocationTableWriter; import org.apache.poi.poifs.storage.BlockList; import org.apache.poi.poifs.storage.BlockWritable; import org.apache.poi.poifs.storage.HeaderBlock; -import org.apache.poi.poifs.storage.HeaderBlockConstants; import org.apache.poi.poifs.storage.HeaderBlockWriter; import org.apache.poi.poifs.storage.RawDataBlockList; import org.apache.poi.poifs.storage.SmallBlockTableReader; import org.apache.poi.poifs.storage.SmallBlockTableWriter; import org.apache.poi.util.CloseIgnoringInputStream; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LongField; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; +import org.apache.poi.util.Removal; /** *
This is the main class of the POIFS system; it manages the entire @@ -200,27 +198,34 @@ public class OPOIFSFileSystem /** * Checks that the supplied InputStream (which MUST - * support mark and reset, or be a PushbackInputStream) - * has a POIFS (OLE2) header at the start of it. - * If your InputStream does not support mark / reset, - * then wrap it in a PushBackInputStream, then be + * support mark and reset) has a POIFS (OLE2) header at the start of it. + * If unsure if your InputStream does support mark / reset, + * use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make * sure to always use that, and not the original! - * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream + * + * After the method call, the InputStream is at the + * same position as of the time of entering the method. + * + * @param inp An InputStream which supports either mark/reset + * + * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead */ + @Deprecated + @Removal(version="4.0") public static boolean hasPOIFSHeader(InputStream inp) throws IOException { - // We want to peek at the first 8 bytes - byte[] header = IOUtils.peekFirst8Bytes(inp); - return hasPOIFSHeader(header); + return NPOIFSFileSystem.hasPOIFSHeader(inp); } + /** * Checks if the supplied first 8 bytes of a stream / file * has a POIFS (OLE2) header. + * + * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead */ + @Deprecated + @Removal(version="4.0") public static boolean hasPOIFSHeader(byte[] header8Bytes) { - LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes); - - // Did it match the signature? - return (signature.get() == HeaderBlockConstants._signature); + return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes); } /** diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java index 31a54fb06f..57fabfd805 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java @@ -114,27 +114,6 @@ public class POIFSFileSystem super(file); } - /** - * Checks that the supplied InputStream (which MUST - * support mark and reset, or be a PushbackInputStream) - * has a POIFS (OLE2) header at the start of it. - * If your InputStream does not support mark / reset, - * then wrap it in a PushBackInputStream, then be - * sure to always use that, and not the original! - * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream - */ - public static boolean hasPOIFSHeader(InputStream inp) throws IOException { - return NPOIFSFileSystem.hasPOIFSHeader(inp); - } - - /** - * Checks if the supplied first 8 bytes of a stream / file - * has a POIFS (OLE2) header. - */ - public static boolean hasPOIFSHeader(byte[] header8Bytes) { - return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes); - } - /** * Creates a new {@link POIFSFileSystem} in a new {@link File}. * Use {@link #POIFSFileSystem(File)} to open an existing File, diff --git a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java index 83eb7295c7..bad012e293 100644 --- a/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java +++ b/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java @@ -17,8 +17,8 @@ package org.apache.poi.poifs.macros; -import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; import static org.apache.poi.util.StringUtil.endsWithIgnoreCase; +import static org.apache.poi.util.StringUtil.startsWithIgnoreCase; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -27,7 +27,6 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.nio.charset.Charset; import java.util.HashMap; import java.util.Map; @@ -38,6 +37,7 @@ import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.DocumentNode; import org.apache.poi.poifs.filesystem.Entry; +import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.util.CodePageUtil; @@ -67,13 +67,12 @@ public class VBAMacroReader implements Closeable { private NPOIFSFileSystem fs; public VBAMacroReader(InputStream rstream) throws IOException { - PushbackInputStream stream = new PushbackInputStream(rstream, 8); - byte[] header8 = IOUtils.peekFirst8Bytes(stream); - - if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { - fs = new NPOIFSFileSystem(stream); + InputStream is = FileMagic.prepareToCheckMagic(rstream); + FileMagic fm = FileMagic.valueOf(is); + if (fm == FileMagic.OLE2) { + fs = new NPOIFSFileSystem(is); } else { - openOOXML(stream); + openOOXML(is); } } diff --git a/src/java/org/apache/poi/poifs/storage/HeaderBlock.java b/src/java/org/apache/poi/poifs/storage/HeaderBlock.java index fe64f61c83..5319a2173f 100644 --- a/src/java/org/apache/poi/poifs/storage/HeaderBlock.java +++ b/src/java/org/apache/poi/poifs/storage/HeaderBlock.java @@ -26,6 +26,7 @@ import java.util.Arrays; import org.apache.poi.hssf.OldExcelFormatException; import org.apache.poi.poifs.common.POIFSBigBlockSize; import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.NotOLE2FileException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.util.HexDump; @@ -40,41 +41,6 @@ import org.apache.poi.util.ShortField; * The block containing the archive header */ public final class HeaderBlock implements HeaderBlockConstants { - private static final byte[] MAGIC_BIFF2 = { - 0x09, 0x00, // sid=0x0009 - 0x04, 0x00, // size=0x0004 - 0x00, 0x00, // unused - 0x70, 0x00 // 0x70 = multiple values - }; - - private static final byte[] MAGIC_BIFF3 = { - 0x09, 0x02, // sid=0x0209 - 0x06, 0x00, // size=0x0006 - 0x00, 0x00, // unused - 0x70, 0x00 // 0x70 = multiple values - }; - - private static final byte[] MAGIC_BIFF4a = { - 0x09, 0x04, // sid=0x0409 - 0x06, 0x00, // size=0x0006 - 0x00, 0x00, // unused - 0x70, 0x00 // 0x70 = multiple values - }; - - private static final byte[] MAGIC_BIFF4b = { - 0x09, 0x04, // sid=0x0409 - 0x06, 0x00, // size=0x0006 - 0x00, 0x00, // unused - 0x00, 0x01 - }; - - private static final byte[] MAGIC_MSWRITEa = { - 0x31, (byte)0xbe, 0x00, 0x00 - }; - private static final byte[] MAGIC_MSWRITEb = { - 0x32, (byte)0xbe, 0x00, 0x00 - }; - private static final byte _default_value = ( byte ) 0xFF; /** @@ -151,53 +117,35 @@ public final class HeaderBlock implements HeaderBlockConstants { this._data = data.clone(); // verify signature - long signature = LittleEndian.getLong(_data, _signature_offset); - - if (signature != _signature) { - // Is it one of the usual suspects? - if (cmp(POIFSConstants.OOXML_FILE_HEADER, data)) { - throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " - + "You are calling the part of POI that deals with OLE2 Office Documents. " - + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); - } - - if (cmp(POIFSConstants.RAW_XML_FILE_HEADER, data)) { - throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " - + "Formats such as Office 2003 XML are not supported"); - } - - // Old MS Write raw stream - if (cmp(MAGIC_MSWRITEa, data) || cmp(MAGIC_MSWRITEb, data)) { - throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " - + "Apache POI doesn't currently support this format"); - } - - // BIFF2 raw stream - if (cmp(MAGIC_BIFF2, data)) { - throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. " - + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); - } - - // BIFF3 raw stream - if (cmp(MAGIC_BIFF3, data)) { - throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. " - + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); - } - - // BIFF4 raw stream - if (cmp(MAGIC_BIFF4a, data) || cmp(MAGIC_BIFF4b, data)) { - throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. " - + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); - } - - // Give a generic error if the OLE2 signature isn't found - throw new NotOLE2FileException("Invalid header signature; read " - + HexDump.longToHex(signature) + ", expected " - + HexDump.longToHex(_signature) + " - Your file appears " - + "not to be a valid OLE2 document"); - } - - + FileMagic fm = FileMagic.valueOf(data); + + switch (fm) { + case OLE2: + break; + case OOXML: + throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. " + + "You are calling the part of POI that deals with OLE2 Office Documents. " + + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)"); + case XML: + throw new NotOLE2FileException("The supplied data appears to be a raw XML file. " + + "Formats such as Office 2003 XML are not supported"); + case MSWRITE: + throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. " + + "Apache POI doesn't currently support this format"); + case BIFF2: + case BIFF3: + case BIFF4: + throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. " + + "HSSF only supports the BIFF8 format, try OldExcelExtractor"); + default: + // Give a generic error if the OLE2 signature isn't found + String exp = HexDump.longToHex(_signature); + String act = HexDump.longToHex(LittleEndian.getLong(data, 0)); + throw new NotOLE2FileException( + "Invalid header signature; read " + act + ", expected " + exp + + " - Your file appears not to be a valid OLE2 document"); + } + // Figure out our block size if (_data[30] == 12) { this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS; @@ -434,15 +382,4 @@ public final class HeaderBlock implements HeaderBlockConstants { stream.write(0); } } - - private static boolean cmp(byte[] magic, byte[] data) { - int i=0; - for (byte m : magic) { - byte d = data[i++]; - if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) { - return false; - } - } - return true; - } } diff --git a/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java b/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java index 97d9466173..cdee782c32 100644 --- a/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java +++ b/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java @@ -20,7 +20,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -30,6 +29,7 @@ import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; +import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.util.IOUtils; @@ -94,9 +94,7 @@ public class SlideShowFactory { * Creates the appropriate HSLFSlideShow / XMLSlideShow from * the given InputStream. * - *
Your input stream MUST either support mark/reset, or - * be wrapped as a {@link PushbackInputStream}! Note that - * using an {@link InputStream} has a higher memory footprint + *
Note that using an {@link InputStream} has a higher memory footprint * than using a {@link File}.
* *Note that in order to properly release resources the @@ -118,9 +116,8 @@ public class SlideShowFactory { /** * Creates the appropriate HSLFSlideShow / XMLSlideShow from * the given InputStream, which may be password protected. - *
Your input stream MUST either support mark/reset, or - * be wrapped as a {@link PushbackInputStream}! Note that - * using an {@link InputStream} has a higher memory footprint + * + *
Note that using an {@link InputStream} has a higher memory footprint * than using a {@link File}.
* *Note that in order to properly release resources the @@ -137,23 +134,18 @@ public class SlideShowFactory { * @throws EncryptedDocumentException If the wrong password is given for a protected file */ public static SlideShow,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException { - // If clearly doesn't do mark/reset, wrap up - if (! inp.markSupported()) { - inp = new PushbackInputStream(inp, 8); - } - - // Ensure that there is at least some data there - byte[] header8 = IOUtils.peekFirst8Bytes(inp); - - // Try to create - if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { - NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); + InputStream is = FileMagic.prepareToCheckMagic(inp); + FileMagic fm = FileMagic.valueOf(is); + + switch (fm) { + case OLE2: + NPOIFSFileSystem fs = new NPOIFSFileSystem(is); return create(fs, password); + case OOXML: + return createXSLFSlideShow(is); + default: + throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); } - if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { - return createXSLFSlideShow(inp); - } - throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); } /** diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index cfe37a03be..b51c31d78e 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -21,7 +21,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.util.ArrayList; import java.util.Iterator; @@ -45,8 +44,8 @@ import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.crypt.EncryptionInfo; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; -import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; import org.apache.poi.poifs.filesystem.Entry; +import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NotOLE2FileException; import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; @@ -175,21 +174,20 @@ public class ExtractorFactory { } public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException { - // Figure out the kind of stream - // If clearly doesn't do mark/reset, wrap up - if (! inp.markSupported()) { - inp = new PushbackInputStream(inp, 8); - } + InputStream is = FileMagic.prepareToCheckMagic(inp); - if (NPOIFSFileSystem.hasPOIFSHeader(inp)) { - NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); + FileMagic fm = FileMagic.valueOf(is); + + switch (fm) { + case OLE2: + NPOIFSFileSystem fs = new NPOIFSFileSystem(is); boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs); + case OOXML: + return createExtractor(OPCPackage.open(is)); + default: + throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); } - if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { - return createExtractor(OPCPackage.open(inp)); - } - throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); } /** diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java index 9ed602603e..b59b814bd3 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java @@ -22,7 +22,6 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.Enumeration; @@ -38,12 +37,11 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.ZipPackage; import org.apache.poi.openxml4j.util.ZipSecureFile; import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream; -import org.apache.poi.poifs.common.POIFSConstants; -import org.apache.poi.poifs.storage.HeaderBlockConstants; -import org.apache.poi.util.IOUtils; -import org.apache.poi.util.LittleEndian; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.util.Internal; import org.apache.poi.util.Removal; +@Internal public final class ZipHelper { /** * Forward slash use to convert part name between OPC and zip item naming @@ -172,59 +170,29 @@ public final class ZipHelper { * Warning - this will consume the first few bytes of the stream, * you should push-back or reset the stream after use! */ - public static void verifyZipHeader(InputStream stream) - throws NotOfficeXmlFileException, IOException { - // Grab the first 8 bytes - byte[] data = new byte[8]; - IOUtils.readFully(stream, data); - - // OLE2? - long signature = LittleEndian.getLong(data); - if (signature == HeaderBlockConstants._signature) { + public static void verifyZipHeader(InputStream stream) throws NotOfficeXmlFileException, IOException { + InputStream is = FileMagic.prepareToCheckMagic(stream); + FileMagic fm = FileMagic.valueOf(is); + + switch (fm) { + case OLE2: throw new OLE2NotOfficeXmlFileException( "The supplied data appears to be in the OLE2 Format. " + "You are calling the part of POI that deals with OOXML "+ "(Office Open XML) Documents. You need to call a different " + "part of POI to process this data (eg HSSF instead of XSSF)"); - } - - // Raw XML? - byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER; - if (data[0] == RAW_XML_FILE_HEADER[0] && - data[1] == RAW_XML_FILE_HEADER[1] && - data[2] == RAW_XML_FILE_HEADER[2] && - data[3] == RAW_XML_FILE_HEADER[3] && - data[4] == RAW_XML_FILE_HEADER[4]) { + case XML: throw new NotOfficeXmlFileException( "The supplied data appears to be a raw XML file. " + "Formats such as Office 2003 XML are not supported"); + default: + case OOXML: + case UNKNOWN: + // Don't check for a Zip header, as to maintain backwards + // compatibility we need to let them seek over junk at the + // start before beginning processing. + break; } - - // Don't check for a Zip header, as to maintain backwards - // compatibility we need to let them seek over junk at the - // start before beginning processing. - - // Put things back - if (stream instanceof PushbackInputStream) { - ((PushbackInputStream)stream).unread(data); - } else if (stream.markSupported()) { - stream.reset(); - } else if (stream instanceof FileInputStream) { - // File open check, about to be closed, nothing to do - } else { - // Oh dear... I hope you know what you're doing! - } - } - - private static InputStream prepareToCheckHeader(InputStream stream) { - if (stream instanceof PushbackInputStream) { - return stream; - } - if (stream.markSupported()) { - stream.mark(8); - return stream; - } - return new PushbackInputStream(stream, 8); } /** @@ -237,7 +205,7 @@ public final class ZipHelper { @SuppressWarnings("resource") public static ThresholdInputStream openZipStream(InputStream stream) throws IOException { // Peek at the first few bytes to sanity check - InputStream checkedStream = prepareToCheckHeader(stream); + InputStream checkedStream = FileMagic.prepareToCheckMagic(stream); verifyZipHeader(checkedStream); // Open as a proper zip stream diff --git a/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java b/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java index 47af1bde74..f5d2737c86 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java @@ -198,10 +198,11 @@ public class ZipSecureFile extends ZipFile { public static class ThresholdInputStream extends PushbackInputStream { long counter = 0; + long markPos = 0; ThresholdInputStream cis; public ThresholdInputStream(InputStream is, ThresholdInputStream cis) { - super(is,1); + super(is); this.cis = cis; } @@ -225,14 +226,15 @@ public class ZipSecureFile extends ZipFile { @Override public long skip(long n) throws IOException { - counter = 0; - return in.skip(n); + long s = in.skip(n); + counter += s; + return s; } @Override public synchronized void reset() throws IOException { - counter = 0; - in.reset(); + counter = markPos; + super.reset(); } public void advance(int advance) throws IOException { @@ -263,10 +265,10 @@ public class ZipSecureFile extends ZipFile { } // one of the limits was reached, report it - throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data. " - + "This may indicate that the file is used to inflate memory usage and thus could pose a security risk. " - + "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit. " - + "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + (((double)cis.counter)/counter) + throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data.\n" + + "This may indicate that the file is used to inflate memory usage and thus could pose a security risk.\n" + + "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit.\n" + + "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + ratio + "\n" + "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO); } @@ -322,6 +324,7 @@ public class ZipSecureFile extends ZipFile { @Override public synchronized void mark(int readlimit) { + markPos = counter; in.mark(readlimit); } } diff --git a/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java b/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java index cb97ad252a..5bbe155063 100644 --- a/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java +++ b/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java @@ -16,11 +16,11 @@ ==================================================================== */ package org.apache.poi.ss.usermodel; +import java.io.BufferedInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import org.apache.poi.EmptyFileException; import org.apache.poi.EncryptedDocumentException; @@ -32,6 +32,7 @@ import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; +import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -127,7 +128,7 @@ public class WorkbookFactory { * the given InputStream. * *
Your input stream MUST either support mark/reset, or - * be wrapped as a {@link PushbackInputStream}! Note that + * be wrapped as a {@link BufferedInputStream}! Note that * using an {@link InputStream} has a higher memory footprint * than using a {@link File}.
* @@ -150,16 +151,15 @@ public class WorkbookFactory { /** * Creates the appropriate HSSFWorkbook / XSSFWorkbook from - * the given InputStream, which may be password protected. - *Your input stream MUST either support mark/reset, or - * be wrapped as a {@link PushbackInputStream}! Note that - * using an {@link InputStream} has a higher memory footprint - * than using a {@link File}.
+ * the given InputStream, which may be password protected.+ * + * Note that using an {@link InputStream} has a higher memory footprint + * than using a {@link File}.
* - *
Note that in order to properly release resources the + * Note that in order to properly release resources the * Workbook should be closed after use. Note also that loading * from an InputStream requires more memory than loading - * from a File, so prefer {@link #create(File)} where possible.
+ * from a File, so prefer {@link #create(File)} where possible. * * @param inp The {@link InputStream} to read data from. * @param password The password that should be used or null if no password is necessary. @@ -172,23 +172,19 @@ public class WorkbookFactory { * @throws EmptyFileException If an empty stream is given */ public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException { - // If clearly doesn't do mark/reset, wrap up - if (! inp.markSupported()) { - inp = new PushbackInputStream(inp, 8); - } - - // Ensure that there is at least some data there - byte[] header8 = IOUtils.peekFirst8Bytes(inp); - - // Try to create - if (NPOIFSFileSystem.hasPOIFSHeader(header8)) { - NPOIFSFileSystem fs = new NPOIFSFileSystem(inp); + InputStream is = FileMagic.prepareToCheckMagic(inp); + + FileMagic fm = FileMagic.valueOf(is); + + switch (fm) { + case OLE2: + NPOIFSFileSystem fs = new NPOIFSFileSystem(is); return create(fs, password); + case OOXML: + return new XSSFWorkbook(OPCPackage.open(is)); + default: + throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); } - if (DocumentFactoryHelper.hasOOXMLHeader(inp)) { - return new XSSFWorkbook(OPCPackage.open(inp)); - } - throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream"); } /** diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java index b5df89f641..a928956be5 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java @@ -20,7 +20,6 @@ package org.apache.poi.xssf.usermodel; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import javax.xml.namespace.QName; @@ -29,7 +28,7 @@ import org.apache.poi.POIXMLException; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.poifs.filesystem.DirectoryEntry; -import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; +import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.ss.usermodel.ObjectData; import org.apache.poi.util.IOUtils; @@ -161,17 +160,8 @@ public class XSSFObjectData extends XSSFSimpleShape implements ObjectData { InputStream is = null; try { is = getObjectPart().getInputStream(); - - // If clearly doesn't do mark/reset, wrap up - if (! is.markSupported()) { - is = new PushbackInputStream(is, 8); - } - - // Ensure that there is at least some data there - byte[] header8 = IOUtils.peekFirst8Bytes(is); - - // Try to create - return NPOIFSFileSystem.hasPOIFSHeader(header8); + is = FileMagic.prepareToCheckMagic(is); + return FileMagic.valueOf(is) == FileMagic.OLE2; } catch (IOException e) { LOG.log(POILogger.WARN, "can't determine if directory entry exists", e); return false; diff --git a/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java index d5f8985318..4275c83599 100644 --- a/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java +++ b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java @@ -19,68 +19,70 @@ package org.apache.poi; -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.PushbackInputStream; -import java.util.Arrays; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; -import junit.framework.TestCase; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.poifs.filesystem.DocumentFactoryHelper; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.util.IOUtils; +import org.junit.Test; /** * Class to test that HXF correctly detects OOXML * documents */ -public class TestDetectAsOOXML extends TestCase -{ - public void testOpensProperly() throws Exception - { +public class TestDetectAsOOXML { + @Test + public void testOpensProperly() throws IOException, InvalidFormatException { OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx")); } - public void testDetectAsPOIFS() throws Exception { - InputStream in; - - // ooxml file is - in = new PushbackInputStream( - HSSFTestDataSamples.openSampleFileStream("SampleSS.xlsx"), 10 - ); - assertTrue(DocumentFactoryHelper.hasOOXMLHeader(in)); - in.close(); - - // xls file isn't - in = new PushbackInputStream( - HSSFTestDataSamples.openSampleFileStream("SampleSS.xls"), 10 - ); - assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); - in.close(); - - // text file isn't - in = new PushbackInputStream( - HSSFTestDataSamples.openSampleFileStream("SampleSS.txt"), 10 - ); - assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); - in.close(); + @Test + public void testDetectAsPOIFS() throws IOException { + Object fileAndMagic[][] = { + { "SampleSS.xlsx", FileMagic.OOXML }, + { "SampleSS.xls", FileMagic.OLE2 }, + { "SampleSS.txt", FileMagic.UNKNOWN } + }; + + for (Object fm[] : fileAndMagic) { + InputStream is = HSSFTestDataSamples.openSampleFileStream((String)fm[0]); + is = FileMagic.prepareToCheckMagic(is); + FileMagic act = FileMagic.valueOf(is); + + if (act == FileMagic.OOXML) { + assertTrue(DocumentFactoryHelper.hasOOXMLHeader(is)); + } + + assertEquals("file magic failed for "+fm[0], fm[1], act); + is.close(); + } } + @Test public void testFileCorruption() throws Exception { // create test InputStream - byte[] testData = { (byte)1, (byte)2, (byte)3 }; + byte[] testData = { 1, 2, 3 }; ByteArrayInputStream testInput = new ByteArrayInputStream(testData); + InputStream is = FileMagic.prepareToCheckMagic(testInput); // detect header - InputStream in = new PushbackInputStream(testInput, 10); - assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in)); + assertFalse(DocumentFactoryHelper.hasOOXMLHeader(is)); // check if InputStream is still intact - byte[] test = new byte[3]; - assertEquals(3, in.read(test)); - assertTrue(Arrays.equals(testData, test)); - assertEquals(-1, in.read()); - in.close(); + byte[] act = IOUtils.toByteArray(is); + assertArrayEquals(testData, act); + assertEquals(-1, is.read()); + is.close(); } } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java index a788028654..44b3c3184f 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java @@ -17,29 +17,23 @@ package org.apache.poi.openxml4j.opc; -import org.apache.poi.*; -import org.apache.poi.extractor.ExtractorFactory; -import org.apache.poi.hssf.HSSFTestDataSamples; -import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; -import org.apache.poi.openxml4j.exceptions.*; -import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; -import org.apache.poi.openxml4j.opc.internal.FileHelper; -import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; -import org.apache.poi.openxml4j.opc.internal.ZipHelper; -import org.apache.poi.openxml4j.util.ZipSecureFile; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.ss.usermodel.WorkbookFactory; -import org.apache.poi.util.*; -import org.apache.poi.xssf.XSSFTestDataSamples; -import org.apache.xmlbeans.XmlException; -import org.junit.Ignore; -import org.junit.Test; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; -import java.io.*; +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PushbackInputStream; import java.lang.reflect.InvocationTargetException; import java.net.URI; import java.net.URISyntaxException; @@ -52,7 +46,41 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import java.util.zip.ZipOutputStream; -import static org.junit.Assert.*; +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.POIDataSamples; +import org.apache.poi.POITestCase; +import org.apache.poi.POITextExtractor; +import org.apache.poi.POIXMLException; +import org.apache.poi.UnsupportedFileFormatException; +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.InvalidOperationException; +import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException; +import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException; +import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.internal.ContentTypeManager; +import org.apache.poi.openxml4j.opc.internal.FileHelper; +import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart; +import org.apache.poi.openxml4j.opc.internal.ZipHelper; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; +import org.apache.poi.util.DocumentHelper; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; +import org.apache.poi.util.TempFile; +import org.apache.poi.xssf.XSSFTestDataSamples; +import org.apache.xmlbeans.XmlException; +import org.junit.Ignore; +import org.junit.Test; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; public final class TestPackage { private static final POILogger logger = POILogFactory.getLogger(TestPackage.class); @@ -947,20 +975,32 @@ public final class TestPackage { } // bug 60128 - @Test + @Test(expected=NotOfficeXmlFileException.class) public void testCorruptFile() throws IOException, InvalidFormatException { - OPCPackage pkg = null; File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx"); + OPCPackage.open(file, PackageAccess.READ); + } + + // bug 61381 + @Test + public void testTooShortFilterStreams() throws IOException, InvalidFormatException { + File xssf = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx"); + File hssf = POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls"); + + InputStream isList[] = { + new PushbackInputStream(new FileInputStream(xssf), 2), + new BufferedInputStream(new FileInputStream(xssf), 2), + new PushbackInputStream(new FileInputStream(hssf), 2), + new BufferedInputStream(new FileInputStream(hssf), 2), + }; + try { - pkg = OPCPackage.open(file, PackageAccess.READ); - } catch (NotOfficeXmlFileException e) { - /*System.out.println(e.getClass().getName()); - System.out.println(e.getMessage()); - e.printStackTrace();*/ - // ignore exception + for (InputStream is : isList) { + WorkbookFactory.create(is).close(); + } } finally { - if (pkg != null) { - pkg.close(); + for (InputStream is : isList) { + IOUtils.closeQuietly(is); } } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java index 42639decbb..096b04be1c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java @@ -20,7 +20,6 @@ package org.apache.poi.hwpf; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.security.GeneralSecurityException; import org.apache.poi.EncryptedDocumentException; @@ -47,6 +46,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentInputStream; +import org.apache.poi.poifs.filesystem.FileMagic; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.BoundedInputStream; import org.apache.poi.util.IOUtils; @@ -116,22 +116,14 @@ public abstract class HWPFDocumentCore extends POIDocument { * POIFSFileSystem from it, and returns that. */ public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException { - // Open a PushbackInputStream, so we can peek at the first few bytes - PushbackInputStream pis = new PushbackInputStream(istream,6); - byte[] first6 = IOUtils.toByteArray(pis, 6); + InputStream is = FileMagic.prepareToCheckMagic(istream); + FileMagic fm = FileMagic.valueOf(is); - // Does it start with {\rtf ? If so, it's really RTF - if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' - && first6[3] == 't' && first6[4] == 'f') { - throw new IllegalArgumentException("The document is really a RTF file"); - } else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) { - throw new IllegalArgumentException("The document is really a PDF file"); - } + if (fm != FileMagic.OLE2) { + throw new IllegalArgumentException("The document is really a "+fm+" file"); + } - // OK, so it's neither RTF nor PDF - // Open a POIFSFileSystem on the (pushed back) stream - pis.unread(first6); - return new POIFSFileSystem(pis); + return new POIFSFileSystem(is); } /** diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java b/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java index 775760aabd..e78e19f375 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java @@ -22,7 +22,6 @@ import static org.apache.poi.POITestCase.assertContains; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PushbackInputStream; import java.util.Arrays; import org.apache.poi.hssf.HSSFTestDataSamples; @@ -86,8 +85,9 @@ public class TestOfficeXMLException extends TestCase { // text file isn't confirmIsPOIFS("SampleSS.txt", false); } + private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException { - InputStream in = new PushbackInputStream(openSampleStream(sampleFileName), 10); + InputStream in = FileMagic.prepareToCheckMagic(openSampleStream(sampleFileName)); try { boolean actualResult; try { @@ -108,7 +108,7 @@ public class TestOfficeXMLException extends TestCase { InputStream testInput = new ByteArrayInputStream(testData); // detect header - InputStream in = new PushbackInputStream(testInput, 10); + InputStream in = FileMagic.prepareToCheckMagic(testInput); assertFalse(POIFSFileSystem.hasPOIFSHeader(in)); // check if InputStream is still intact @@ -126,7 +126,7 @@ public class TestOfficeXMLException extends TestCase { InputStream testInput = new ByteArrayInputStream(testData); // detect header - InputStream in = new PushbackInputStream(testInput, 10); + InputStream in = FileMagic.prepareToCheckMagic(testInput); assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in)); // check if InputStream is still intact