#61381 - PushbackInputStreams passed to ZipHelper may not hold 8 bytes

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1804854 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2017-08-11 20:47:48 +00:00
parent 299f33b8d8
commit a98350e40b
17 changed files with 455 additions and 423 deletions

View File

@ -17,22 +17,22 @@
package org.apache.poi.poifs.filesystem;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.util.IOUtils;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.security.GeneralSecurityException;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
/**
* A small base class for the various factories, e.g. WorkbookFactory,
* SlideShowFactory to combine common code here.
*/
@Internal
public class DocumentFactoryHelper {
/**
* Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using
@ -81,36 +81,19 @@ public class DocumentFactoryHelper {
/**
* Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream)
* has a OOXML (zip) header at the start of it.
* If your InputStream does not support mark / reset,
* then wrap it in a PushBackInputStream, then be
* support mark and reset) has a OOXML (zip) header at the start of it.<p>
*
* If unsure if your InputStream does support mark / reset,
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
* sure to always use that, and not the original!
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
*
* @param inp An InputStream which supports either mark/reset
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == FileMagic.OOXML instead
*/
@Deprecated
@Removal(version="4.0")
public static boolean hasOOXMLHeader(InputStream inp) throws IOException {
// We want to peek at the first 4 bytes
inp.mark(4);
byte[] header = new byte[4];
int bytesRead = IOUtils.readFully(inp, header);
// Wind back those 4 bytes
if(inp instanceof PushbackInputStream) {
PushbackInputStream pin = (PushbackInputStream)inp;
pin.unread(header, 0, bytesRead);
} else {
inp.reset();
}
// Did it match the ooxml zip signature?
return (
bytesRead == 4 &&
header[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
header[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
header[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
);
return FileMagic.valueOf(inp) == FileMagic.OOXML;
}
}

View File

@ -0,0 +1,155 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.filesystem;
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LocaleUtil;
/**
* The file magic number, i.e. the file identification based on the first bytes
* of the file
*/
public enum FileMagic {
/** OLE2 / BIFF8+ stream used for Office 97 and higher documents */
OLE2(HeaderBlockConstants._signature),
/** OOXML / ZIP stream */
OOXML(OOXML_FILE_HEADER),
/** XML file */
XML(RAW_XML_FILE_HEADER),
/** BIFF2 raw stream - for Excel 2 */
BIFF2(new byte[]{
0x09, 0x00, // sid=0x0009
0x04, 0x00, // size=0x0004
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
}),
/** BIFF3 raw stream - for Excel 3 */
BIFF3(new byte[]{
0x09, 0x02, // sid=0x0209
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
}),
/** BIFF4 raw stream - for Excel 4 */
BIFF4(new byte[]{
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
},new byte[]{
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x00, 0x01
}),
/** Old MS Write raw stream */
MSWRITE(
new byte[]{0x31, (byte)0xbe, 0x00, 0x00 },
new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }),
/** RTF document */
RTF("{\\rtf"),
/** PDF document */
PDF("%PDF"),
// keep UNKNOWN always as last enum!
/** UNKNOWN magic */
UNKNOWN(new byte[0]);
final byte[][] magic;
FileMagic(long magic) {
this.magic = new byte[1][8];
LittleEndian.putLong(this.magic[0], 0, magic);
}
FileMagic(byte[]... magic) {
this.magic = magic;
}
FileMagic(String magic) {
this(magic.getBytes(LocaleUtil.CHARSET_1252));
}
public static FileMagic valueOf(byte[] magic) {
for (FileMagic fm : values()) {
int i=0;
boolean found = true;
for (byte[] ma : fm.magic) {
for (byte m : ma) {
byte d = magic[i++];
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
found = false;
break;
}
}
if (found) {
return fm;
}
}
}
return UNKNOWN;
}
/**
* Get the file magic of the supplied InputStream (which MUST
* support mark and reset).<p>
*
* If unsure if your InputStream does support mark / reset,
* use {@link #prepareToCheckMagic(InputStream)} to wrap it and make
* sure to always use that, and not the original!<p>
*
* Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean,
* that the ZIP stream has leading junk bytes
*
* @param inp An InputStream which supports either mark/reset
*/
public static FileMagic valueOf(InputStream inp) throws IOException {
if (!inp.markSupported()) {
throw new IOException("getFileMagic() only operates on streams which support mark(int)");
}
// Grab the first 8 bytes
byte[] data = IOUtils.peekFirst8Bytes(inp);
return FileMagic.valueOf(data);
}
/**
* Checks if an {@link InputStream} can be reseted (i.e. used for checking the header magic) and wraps it if not
*
* @param stream stream to be checked for wrapping
* @return a mark enabled stream
*/
public static InputStream prepareToCheckMagic(InputStream stream) {
if (stream.markSupported()) {
return stream;
}
// we used to process the data via a PushbackInputStream, but user code could provide a too small one
// so we use a BufferedInputStream instead now
return new BufferedInputStream(stream);
}
}

View File

@ -19,6 +19,7 @@
package org.apache.poi.poifs.filesystem;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
@ -26,7 +27,6 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
@ -51,14 +51,13 @@ import org.apache.poi.poifs.storage.BATBlock.BATBlockAndIndex;
import org.apache.poi.poifs.storage.BlockAllocationTableReader;
import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.poifs.storage.HeaderBlockWriter;
import org.apache.poi.util.CloseIgnoringInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LongField;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.Removal;
/**
* <p>This is the main class of the POIFS system; it manages the entire
@ -353,44 +352,38 @@ public class NPOIFSFileSystem extends BlockStore
/**
* Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream)
* has a POIFS (OLE2) header at the start of it.
* If your InputStream does not support mark / reset,
* then wrap it in a PushBackInputStream, then be
* sure to always use that and not the original!
* support mark and reset) has a POIFS (OLE2) header at the start of it.
* If unsure if your InputStream does support mark / reset,
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
* sure to always use that, and not the original!
*
* After the method call, the InputStream is at the
* same position as of the time of entering the method.
*
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
* @param inp An InputStream which supports mark/reset
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
// We want to peek at the first 8 bytes
inp.mark(8);
byte[] header = new byte[8];
int bytesRead = IOUtils.readFully(inp, header);
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header);
// Wind back those 8 bytes
if(inp instanceof PushbackInputStream) {
PushbackInputStream pin = (PushbackInputStream)inp;
pin.unread(header, 0, bytesRead);
} else {
inp.reset();
}
// Did it match the signature?
return (signature.get() == HeaderBlockConstants._signature);
return FileMagic.valueOf(inp) == FileMagic.OLE2;
}
/**
* Checks if the supplied first 8 bytes of a stream / file
* has a POIFS (OLE2) header.
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(byte[] header8Bytes) {
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes);
return (signature.get() == HeaderBlockConstants._signature);
try {
return hasPOIFSHeader(new ByteArrayInputStream(header8Bytes));
} catch (IOException e) {
throw new RuntimeException("invalid header check", e);
}
}
/**

View File

@ -42,16 +42,14 @@ import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
import org.apache.poi.poifs.storage.BlockList;
import org.apache.poi.poifs.storage.BlockWritable;
import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.poifs.storage.HeaderBlockWriter;
import org.apache.poi.poifs.storage.RawDataBlockList;
import org.apache.poi.poifs.storage.SmallBlockTableReader;
import org.apache.poi.poifs.storage.SmallBlockTableWriter;
import org.apache.poi.util.CloseIgnoringInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LongField;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.Removal;
/**
* <p>This is the main class of the POIFS system; it manages the entire
@ -200,27 +198,34 @@ public class OPOIFSFileSystem
/**
* Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream)
* has a POIFS (OLE2) header at the start of it.
* If your InputStream does not support mark / reset,
* then wrap it in a PushBackInputStream, then be
* support mark and reset) has a POIFS (OLE2) header at the start of it.
* If unsure if your InputStream does support mark / reset,
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
* sure to always use that, and not the original!
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
*
* After the method call, the InputStream is at the
* same position as of the time of entering the method.
*
* @param inp An InputStream which supports either mark/reset
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
// We want to peek at the first 8 bytes
byte[] header = IOUtils.peekFirst8Bytes(inp);
return hasPOIFSHeader(header);
return NPOIFSFileSystem.hasPOIFSHeader(inp);
}
/**
* Checks if the supplied first 8 bytes of a stream / file
* has a POIFS (OLE2) header.
*
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
*/
@Deprecated
@Removal(version="4.0")
public static boolean hasPOIFSHeader(byte[] header8Bytes) {
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes);
// Did it match the signature?
return (signature.get() == HeaderBlockConstants._signature);
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
}
/**

View File

@ -114,27 +114,6 @@ public class POIFSFileSystem
super(file);
}
/**
* Checks that the supplied InputStream (which MUST
* support mark and reset, or be a PushbackInputStream)
* has a POIFS (OLE2) header at the start of it.
* If your InputStream does not support mark / reset,
* then wrap it in a PushBackInputStream, then be
* sure to always use that, and not the original!
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
*/
public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
return NPOIFSFileSystem.hasPOIFSHeader(inp);
}
/**
* Checks if the supplied first 8 bytes of a stream / file
* has a POIFS (OLE2) header.
*/
public static boolean hasPOIFSHeader(byte[] header8Bytes) {
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
}
/**
* Creates a new {@link POIFSFileSystem} in a new {@link File}.
* Use {@link #POIFSFileSystem(File)} to open an existing File,

View File

@ -17,8 +17,8 @@
package org.apache.poi.poifs.macros;
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@ -27,7 +27,6 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
@ -38,6 +37,7 @@ import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.CodePageUtil;
@ -67,13 +67,12 @@ public class VBAMacroReader implements Closeable {
private NPOIFSFileSystem fs;
public VBAMacroReader(InputStream rstream) throws IOException {
PushbackInputStream stream = new PushbackInputStream(rstream, 8);
byte[] header8 = IOUtils.peekFirst8Bytes(stream);
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
fs = new NPOIFSFileSystem(stream);
InputStream is = FileMagic.prepareToCheckMagic(rstream);
FileMagic fm = FileMagic.valueOf(is);
if (fm == FileMagic.OLE2) {
fs = new NPOIFSFileSystem(is);
} else {
openOOXML(stream);
openOOXML(is);
}
}

View File

@ -26,6 +26,7 @@ import java.util.Arrays;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.HexDump;
@ -40,41 +41,6 @@ import org.apache.poi.util.ShortField;
* The block containing the archive header
*/
public final class HeaderBlock implements HeaderBlockConstants {
private static final byte[] MAGIC_BIFF2 = {
0x09, 0x00, // sid=0x0009
0x04, 0x00, // size=0x0004
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
};
private static final byte[] MAGIC_BIFF3 = {
0x09, 0x02, // sid=0x0209
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
};
private static final byte[] MAGIC_BIFF4a = {
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x70, 0x00 // 0x70 = multiple values
};
private static final byte[] MAGIC_BIFF4b = {
0x09, 0x04, // sid=0x0409
0x06, 0x00, // size=0x0006
0x00, 0x00, // unused
0x00, 0x01
};
private static final byte[] MAGIC_MSWRITEa = {
0x31, (byte)0xbe, 0x00, 0x00
};
private static final byte[] MAGIC_MSWRITEb = {
0x32, (byte)0xbe, 0x00, 0x00
};
private static final byte _default_value = ( byte ) 0xFF;
/**
@ -151,53 +117,35 @@ public final class HeaderBlock implements HeaderBlockConstants {
this._data = data.clone();
// verify signature
long signature = LittleEndian.getLong(_data, _signature_offset);
if (signature != _signature) {
// Is it one of the usual suspects?
if (cmp(POIFSConstants.OOXML_FILE_HEADER, data)) {
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
+ "You are calling the part of POI that deals with OLE2 Office Documents. "
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
}
if (cmp(POIFSConstants.RAW_XML_FILE_HEADER, data)) {
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
+ "Formats such as Office 2003 XML are not supported");
}
// Old MS Write raw stream
if (cmp(MAGIC_MSWRITEa, data) || cmp(MAGIC_MSWRITEb, data)) {
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
+ "Apache POI doesn't currently support this format");
}
// BIFF2 raw stream
if (cmp(MAGIC_BIFF2, data)) {
throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. "
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
// BIFF3 raw stream
if (cmp(MAGIC_BIFF3, data)) {
throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. "
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
// BIFF4 raw stream
if (cmp(MAGIC_BIFF4a, data) || cmp(MAGIC_BIFF4b, data)) {
throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. "
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
// Give a generic error if the OLE2 signature isn't found
throw new NotOLE2FileException("Invalid header signature; read "
+ HexDump.longToHex(signature) + ", expected "
+ HexDump.longToHex(_signature) + " - Your file appears "
+ "not to be a valid OLE2 document");
}
FileMagic fm = FileMagic.valueOf(data);
switch (fm) {
case OLE2:
break;
case OOXML:
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
+ "You are calling the part of POI that deals with OLE2 Office Documents. "
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
case XML:
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
+ "Formats such as Office 2003 XML are not supported");
case MSWRITE:
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
+ "Apache POI doesn't currently support this format");
case BIFF2:
case BIFF3:
case BIFF4:
throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. "
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
default:
// Give a generic error if the OLE2 signature isn't found
String exp = HexDump.longToHex(_signature);
String act = HexDump.longToHex(LittleEndian.getLong(data, 0));
throw new NotOLE2FileException(
"Invalid header signature; read " + act + ", expected " + exp +
" - Your file appears not to be a valid OLE2 document");
}
// Figure out our block size
if (_data[30] == 12) {
this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS;
@ -434,15 +382,4 @@ public final class HeaderBlock implements HeaderBlockConstants {
stream.write(0);
}
}
private static boolean cmp(byte[] magic, byte[] data) {
int i=0;
for (byte m : magic) {
byte d = data[i++];
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
return false;
}
}
return true;
}
}

View File

@ -20,7 +20,6 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
@ -30,6 +29,7 @@ import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.IOUtils;
@ -94,9 +94,7 @@ public class SlideShowFactory {
* Creates the appropriate HSLFSlideShow / XMLSlideShow from
* the given InputStream.
*
* <p>Your input stream MUST either support mark/reset, or
* be wrapped as a {@link PushbackInputStream}! Note that
* using an {@link InputStream} has a higher memory footprint
* <p>Note that using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p>
*
* <p>Note that in order to properly release resources the
@ -118,9 +116,8 @@ public class SlideShowFactory {
/**
* Creates the appropriate HSLFSlideShow / XMLSlideShow from
* the given InputStream, which may be password protected.
* <p>Your input stream MUST either support mark/reset, or
* be wrapped as a {@link PushbackInputStream}! Note that
* using an {@link InputStream} has a higher memory footprint
*
* <p>Note that using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p>
*
* <p>Note that in order to properly release resources the
@ -137,23 +134,18 @@ public class SlideShowFactory {
* @throws EncryptedDocumentException If the wrong password is given for a protected file
*/
public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException {
// If clearly doesn't do mark/reset, wrap up
if (! inp.markSupported()) {
inp = new PushbackInputStream(inp, 8);
}
// Ensure that there is at least some data there
byte[] header8 = IOUtils.peekFirst8Bytes(inp);
// Try to create
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
InputStream is = FileMagic.prepareToCheckMagic(inp);
FileMagic fm = FileMagic.valueOf(is);
switch (fm) {
case OLE2:
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
return create(fs, password);
case OOXML:
return createXSLFSlideShow(is);
default:
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
}
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
return createXSLFSlideShow(inp);
}
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
}
/**

View File

@ -21,7 +21,6 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.ArrayList;
import java.util.Iterator;
@ -45,8 +44,8 @@ import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
@ -175,21 +174,20 @@ public class ExtractorFactory {
}
public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
// Figure out the kind of stream
// If clearly doesn't do mark/reset, wrap up
if (! inp.markSupported()) {
inp = new PushbackInputStream(inp, 8);
}
InputStream is = FileMagic.prepareToCheckMagic(inp);
if (NPOIFSFileSystem.hasPOIFSHeader(inp)) {
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
FileMagic fm = FileMagic.valueOf(is);
switch (fm) {
case OLE2:
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs);
case OOXML:
return createExtractor(OPCPackage.open(is));
default:
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
}
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
return createExtractor(OPCPackage.open(inp));
}
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
}
/**

View File

@ -22,7 +22,6 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Enumeration;
@ -38,12 +37,11 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.ZipPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.storage.HeaderBlockConstants;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.util.Internal;
import org.apache.poi.util.Removal;
@Internal
public final class ZipHelper {
/**
* Forward slash use to convert part name between OPC and zip item naming
@ -172,59 +170,29 @@ public final class ZipHelper {
* Warning - this will consume the first few bytes of the stream,
* you should push-back or reset the stream after use!
*/
public static void verifyZipHeader(InputStream stream)
throws NotOfficeXmlFileException, IOException {
// Grab the first 8 bytes
byte[] data = new byte[8];
IOUtils.readFully(stream, data);
// OLE2?
long signature = LittleEndian.getLong(data);
if (signature == HeaderBlockConstants._signature) {
public static void verifyZipHeader(InputStream stream) throws NotOfficeXmlFileException, IOException {
InputStream is = FileMagic.prepareToCheckMagic(stream);
FileMagic fm = FileMagic.valueOf(is);
switch (fm) {
case OLE2:
throw new OLE2NotOfficeXmlFileException(
"The supplied data appears to be in the OLE2 Format. " +
"You are calling the part of POI that deals with OOXML "+
"(Office Open XML) Documents. You need to call a different " +
"part of POI to process this data (eg HSSF instead of XSSF)");
}
// Raw XML?
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
if (data[0] == RAW_XML_FILE_HEADER[0] &&
data[1] == RAW_XML_FILE_HEADER[1] &&
data[2] == RAW_XML_FILE_HEADER[2] &&
data[3] == RAW_XML_FILE_HEADER[3] &&
data[4] == RAW_XML_FILE_HEADER[4]) {
case XML:
throw new NotOfficeXmlFileException(
"The supplied data appears to be a raw XML file. " +
"Formats such as Office 2003 XML are not supported");
default:
case OOXML:
case UNKNOWN:
// Don't check for a Zip header, as to maintain backwards
// compatibility we need to let them seek over junk at the
// start before beginning processing.
break;
}
// Don't check for a Zip header, as to maintain backwards
// compatibility we need to let them seek over junk at the
// start before beginning processing.
// Put things back
if (stream instanceof PushbackInputStream) {
((PushbackInputStream)stream).unread(data);
} else if (stream.markSupported()) {
stream.reset();
} else if (stream instanceof FileInputStream) {
// File open check, about to be closed, nothing to do
} else {
// Oh dear... I hope you know what you're doing!
}
}
private static InputStream prepareToCheckHeader(InputStream stream) {
if (stream instanceof PushbackInputStream) {
return stream;
}
if (stream.markSupported()) {
stream.mark(8);
return stream;
}
return new PushbackInputStream(stream, 8);
}
/**
@ -237,7 +205,7 @@ public final class ZipHelper {
@SuppressWarnings("resource")
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
// Peek at the first few bytes to sanity check
InputStream checkedStream = prepareToCheckHeader(stream);
InputStream checkedStream = FileMagic.prepareToCheckMagic(stream);
verifyZipHeader(checkedStream);
// Open as a proper zip stream

View File

@ -198,10 +198,11 @@ public class ZipSecureFile extends ZipFile {
public static class ThresholdInputStream extends PushbackInputStream {
long counter = 0;
long markPos = 0;
ThresholdInputStream cis;
public ThresholdInputStream(InputStream is, ThresholdInputStream cis) {
super(is,1);
super(is);
this.cis = cis;
}
@ -225,14 +226,15 @@ public class ZipSecureFile extends ZipFile {
@Override
public long skip(long n) throws IOException {
counter = 0;
return in.skip(n);
long s = in.skip(n);
counter += s;
return s;
}
@Override
public synchronized void reset() throws IOException {
counter = 0;
in.reset();
counter = markPos;
super.reset();
}
public void advance(int advance) throws IOException {
@ -263,10 +265,10 @@ public class ZipSecureFile extends ZipFile {
}
// one of the limits was reached, report it
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data. "
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit. "
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + (((double)cis.counter)/counter)
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data.\n"
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk.\n"
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit.\n"
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + ratio + "\n"
+ "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO);
}
@ -322,6 +324,7 @@ public class ZipSecureFile extends ZipFile {
@Override
public synchronized void mark(int readlimit) {
markPos = counter;
in.mark(readlimit);
}
}

View File

@ -16,11 +16,11 @@
==================================================================== */
package org.apache.poi.ss.usermodel;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import org.apache.poi.EmptyFileException;
import org.apache.poi.EncryptedDocumentException;
@ -32,6 +32,7 @@ import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -127,7 +128,7 @@ public class WorkbookFactory {
* the given InputStream.
*
* <p>Your input stream MUST either support mark/reset, or
* be wrapped as a {@link PushbackInputStream}! Note that
* be wrapped as a {@link BufferedInputStream}! Note that
* using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p>
*
@ -150,16 +151,15 @@ public class WorkbookFactory {
/**
* Creates the appropriate HSSFWorkbook / XSSFWorkbook from
* the given InputStream, which may be password protected.
* <p>Your input stream MUST either support mark/reset, or
* be wrapped as a {@link PushbackInputStream}! Note that
* using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.</p>
* the given InputStream, which may be password protected.<p>
*
* Note that using an {@link InputStream} has a higher memory footprint
* than using a {@link File}.<p>
*
* <p>Note that in order to properly release resources the
* Note that in order to properly release resources the
* Workbook should be closed after use. Note also that loading
* from an InputStream requires more memory than loading
* from a File, so prefer {@link #create(File)} where possible.</p>
* from a File, so prefer {@link #create(File)} where possible.
*
* @param inp The {@link InputStream} to read data from.
* @param password The password that should be used or null if no password is necessary.
@ -172,23 +172,19 @@ public class WorkbookFactory {
* @throws EmptyFileException If an empty stream is given
*/
public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
// If clearly doesn't do mark/reset, wrap up
if (! inp.markSupported()) {
inp = new PushbackInputStream(inp, 8);
}
// Ensure that there is at least some data there
byte[] header8 = IOUtils.peekFirst8Bytes(inp);
// Try to create
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
InputStream is = FileMagic.prepareToCheckMagic(inp);
FileMagic fm = FileMagic.valueOf(is);
switch (fm) {
case OLE2:
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
return create(fs, password);
case OOXML:
return new XSSFWorkbook(OPCPackage.open(is));
default:
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
}
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
return new XSSFWorkbook(OPCPackage.open(inp));
}
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
}
/**

View File

@ -20,7 +20,6 @@ package org.apache.poi.xssf.usermodel;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import javax.xml.namespace.QName;
@ -29,7 +28,7 @@ import org.apache.poi.POIXMLException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.ObjectData;
import org.apache.poi.util.IOUtils;
@ -161,17 +160,8 @@ public class XSSFObjectData extends XSSFSimpleShape implements ObjectData {
InputStream is = null;
try {
is = getObjectPart().getInputStream();
// If clearly doesn't do mark/reset, wrap up
if (! is.markSupported()) {
is = new PushbackInputStream(is, 8);
}
// Ensure that there is at least some data there
byte[] header8 = IOUtils.peekFirst8Bytes(is);
// Try to create
return NPOIFSFileSystem.hasPOIFSHeader(header8);
is = FileMagic.prepareToCheckMagic(is);
return FileMagic.valueOf(is) == FileMagic.OLE2;
} catch (IOException e) {
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e);
return false;

View File

@ -19,68 +19,70 @@
package org.apache.poi;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.Arrays;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import junit.framework.TestCase;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.util.IOUtils;
import org.junit.Test;
/**
* Class to test that HXF correctly detects OOXML
* documents
*/
public class TestDetectAsOOXML extends TestCase
{
public void testOpensProperly() throws Exception
{
public class TestDetectAsOOXML {
@Test
public void testOpensProperly() throws IOException, InvalidFormatException {
OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx"));
}
public void testDetectAsPOIFS() throws Exception {
InputStream in;
// ooxml file is
in = new PushbackInputStream(
HSSFTestDataSamples.openSampleFileStream("SampleSS.xlsx"), 10
);
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(in));
in.close();
// xls file isn't
in = new PushbackInputStream(
HSSFTestDataSamples.openSampleFileStream("SampleSS.xls"), 10
);
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
in.close();
// text file isn't
in = new PushbackInputStream(
HSSFTestDataSamples.openSampleFileStream("SampleSS.txt"), 10
);
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
in.close();
@Test
public void testDetectAsPOIFS() throws IOException {
Object fileAndMagic[][] = {
{ "SampleSS.xlsx", FileMagic.OOXML },
{ "SampleSS.xls", FileMagic.OLE2 },
{ "SampleSS.txt", FileMagic.UNKNOWN }
};
for (Object fm[] : fileAndMagic) {
InputStream is = HSSFTestDataSamples.openSampleFileStream((String)fm[0]);
is = FileMagic.prepareToCheckMagic(is);
FileMagic act = FileMagic.valueOf(is);
if (act == FileMagic.OOXML) {
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(is));
}
assertEquals("file magic failed for "+fm[0], fm[1], act);
is.close();
}
}
@Test
public void testFileCorruption() throws Exception {
// create test InputStream
byte[] testData = { (byte)1, (byte)2, (byte)3 };
byte[] testData = { 1, 2, 3 };
ByteArrayInputStream testInput = new ByteArrayInputStream(testData);
InputStream is = FileMagic.prepareToCheckMagic(testInput);
// detect header
InputStream in = new PushbackInputStream(testInput, 10);
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(is));
// check if InputStream is still intact
byte[] test = new byte[3];
assertEquals(3, in.read(test));
assertTrue(Arrays.equals(testData, test));
assertEquals(-1, in.read());
in.close();
byte[] act = IOUtils.toByteArray(is);
assertArrayEquals(testData, act);
assertEquals(-1, is.read());
is.close();
}
}

View File

@ -17,29 +17,23 @@
package org.apache.poi.openxml4j.opc;
import org.apache.poi.*;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.*;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
import org.apache.poi.openxml4j.opc.internal.FileHelper;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.util.*;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.xmlbeans.XmlException;
import org.junit.Ignore;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.*;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PushbackInputStream;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
@ -52,7 +46,41 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import static org.junit.Assert.*;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POITestCase;
import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLException;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
import org.apache.poi.openxml4j.opc.internal.FileHelper;
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.util.DocumentHelper;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.TempFile;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.apache.xmlbeans.XmlException;
import org.junit.Ignore;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public final class TestPackage {
private static final POILogger logger = POILogFactory.getLogger(TestPackage.class);
@ -947,20 +975,32 @@ public final class TestPackage {
}
// bug 60128
@Test
@Test(expected=NotOfficeXmlFileException.class)
public void testCorruptFile() throws IOException, InvalidFormatException {
OPCPackage pkg = null;
File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx");
OPCPackage.open(file, PackageAccess.READ);
}
// bug 61381
@Test
public void testTooShortFilterStreams() throws IOException, InvalidFormatException {
File xssf = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx");
File hssf = POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls");
InputStream isList[] = {
new PushbackInputStream(new FileInputStream(xssf), 2),
new BufferedInputStream(new FileInputStream(xssf), 2),
new PushbackInputStream(new FileInputStream(hssf), 2),
new BufferedInputStream(new FileInputStream(hssf), 2),
};
try {
pkg = OPCPackage.open(file, PackageAccess.READ);
} catch (NotOfficeXmlFileException e) {
/*System.out.println(e.getClass().getName());
System.out.println(e.getMessage());
e.printStackTrace();*/
// ignore exception
for (InputStream is : isList) {
WorkbookFactory.create(is).close();
}
} finally {
if (pkg != null) {
pkg.close();
for (InputStream is : isList) {
IOUtils.closeQuietly(is);
}
}
}

View File

@ -20,7 +20,6 @@ package org.apache.poi.hwpf;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.security.GeneralSecurityException;
import org.apache.poi.EncryptedDocumentException;
@ -47,6 +46,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.BoundedInputStream;
import org.apache.poi.util.IOUtils;
@ -116,22 +116,14 @@ public abstract class HWPFDocumentCore extends POIDocument {
* POIFSFileSystem from it, and returns that.
*/
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
// Open a PushbackInputStream, so we can peek at the first few bytes
PushbackInputStream pis = new PushbackInputStream(istream,6);
byte[] first6 = IOUtils.toByteArray(pis, 6);
InputStream is = FileMagic.prepareToCheckMagic(istream);
FileMagic fm = FileMagic.valueOf(is);
// Does it start with {\rtf ? If so, it's really RTF
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
&& first6[3] == 't' && first6[4] == 'f') {
throw new IllegalArgumentException("The document is really a RTF file");
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
throw new IllegalArgumentException("The document is really a PDF file");
}
if (fm != FileMagic.OLE2) {
throw new IllegalArgumentException("The document is really a "+fm+" file");
}
// OK, so it's neither RTF nor PDF
// Open a POIFSFileSystem on the (pushed back) stream
pis.unread(first6);
return new POIFSFileSystem(pis);
return new POIFSFileSystem(is);
}
/**

View File

@ -22,7 +22,6 @@ import static org.apache.poi.POITestCase.assertContains;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.Arrays;
import org.apache.poi.hssf.HSSFTestDataSamples;
@ -86,8 +85,9 @@ public class TestOfficeXMLException extends TestCase {
// text file isn't
confirmIsPOIFS("SampleSS.txt", false);
}
private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException {
InputStream in = new PushbackInputStream(openSampleStream(sampleFileName), 10);
InputStream in = FileMagic.prepareToCheckMagic(openSampleStream(sampleFileName));
try {
boolean actualResult;
try {
@ -108,7 +108,7 @@ public class TestOfficeXMLException extends TestCase {
InputStream testInput = new ByteArrayInputStream(testData);
// detect header
InputStream in = new PushbackInputStream(testInput, 10);
InputStream in = FileMagic.prepareToCheckMagic(testInput);
assertFalse(POIFSFileSystem.hasPOIFSHeader(in));
// check if InputStream is still intact
@ -126,7 +126,7 @@ public class TestOfficeXMLException extends TestCase {
InputStream testInput = new ByteArrayInputStream(testData);
// detect header
InputStream in = new PushbackInputStream(testInput, 10);
InputStream in = FileMagic.prepareToCheckMagic(testInput);
assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in));
// check if InputStream is still intact