mirror of https://github.com/apache/poi.git
#61381 - PushbackInputStreams passed to ZipHelper may not hold 8 bytes
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1804854 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
299f33b8d8
commit
a98350e40b
|
@ -17,22 +17,22 @@
|
|||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.poifs.crypt.Decryptor;
|
||||
import org.apache.poi.poifs.crypt.EncryptionInfo;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.security.GeneralSecurityException;
|
||||
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.poifs.crypt.Decryptor;
|
||||
import org.apache.poi.poifs.crypt.EncryptionInfo;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.Removal;
|
||||
|
||||
/**
|
||||
* A small base class for the various factories, e.g. WorkbookFactory,
|
||||
* SlideShowFactory to combine common code here.
|
||||
*/
|
||||
@Internal
|
||||
public class DocumentFactoryHelper {
|
||||
/**
|
||||
* Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using
|
||||
|
@ -81,36 +81,19 @@ public class DocumentFactoryHelper {
|
|||
|
||||
/**
|
||||
* Checks that the supplied InputStream (which MUST
|
||||
* support mark and reset, or be a PushbackInputStream)
|
||||
* has a OOXML (zip) header at the start of it.
|
||||
* If your InputStream does not support mark / reset,
|
||||
* then wrap it in a PushBackInputStream, then be
|
||||
* support mark and reset) has a OOXML (zip) header at the start of it.<p>
|
||||
*
|
||||
* If unsure if your InputStream does support mark / reset,
|
||||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
|
||||
* sure to always use that, and not the original!
|
||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
|
||||
*
|
||||
* @param inp An InputStream which supports either mark/reset
|
||||
*
|
||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == FileMagic.OOXML instead
|
||||
*/
|
||||
@Deprecated
|
||||
@Removal(version="4.0")
|
||||
public static boolean hasOOXMLHeader(InputStream inp) throws IOException {
|
||||
// We want to peek at the first 4 bytes
|
||||
inp.mark(4);
|
||||
|
||||
byte[] header = new byte[4];
|
||||
int bytesRead = IOUtils.readFully(inp, header);
|
||||
|
||||
// Wind back those 4 bytes
|
||||
if(inp instanceof PushbackInputStream) {
|
||||
PushbackInputStream pin = (PushbackInputStream)inp;
|
||||
pin.unread(header, 0, bytesRead);
|
||||
} else {
|
||||
inp.reset();
|
||||
return FileMagic.valueOf(inp) == FileMagic.OOXML;
|
||||
}
|
||||
|
||||
// Did it match the ooxml zip signature?
|
||||
return (
|
||||
bytesRead == 4 &&
|
||||
header[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
|
||||
header[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
|
||||
header[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
|
||||
header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,155 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
|
||||
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LocaleUtil;
|
||||
|
||||
/**
|
||||
* The file magic number, i.e. the file identification based on the first bytes
|
||||
* of the file
|
||||
*/
|
||||
public enum FileMagic {
|
||||
/** OLE2 / BIFF8+ stream used for Office 97 and higher documents */
|
||||
OLE2(HeaderBlockConstants._signature),
|
||||
/** OOXML / ZIP stream */
|
||||
OOXML(OOXML_FILE_HEADER),
|
||||
/** XML file */
|
||||
XML(RAW_XML_FILE_HEADER),
|
||||
/** BIFF2 raw stream - for Excel 2 */
|
||||
BIFF2(new byte[]{
|
||||
0x09, 0x00, // sid=0x0009
|
||||
0x04, 0x00, // size=0x0004
|
||||
0x00, 0x00, // unused
|
||||
0x70, 0x00 // 0x70 = multiple values
|
||||
}),
|
||||
/** BIFF3 raw stream - for Excel 3 */
|
||||
BIFF3(new byte[]{
|
||||
0x09, 0x02, // sid=0x0209
|
||||
0x06, 0x00, // size=0x0006
|
||||
0x00, 0x00, // unused
|
||||
0x70, 0x00 // 0x70 = multiple values
|
||||
}),
|
||||
/** BIFF4 raw stream - for Excel 4 */
|
||||
BIFF4(new byte[]{
|
||||
0x09, 0x04, // sid=0x0409
|
||||
0x06, 0x00, // size=0x0006
|
||||
0x00, 0x00, // unused
|
||||
0x70, 0x00 // 0x70 = multiple values
|
||||
},new byte[]{
|
||||
0x09, 0x04, // sid=0x0409
|
||||
0x06, 0x00, // size=0x0006
|
||||
0x00, 0x00, // unused
|
||||
0x00, 0x01
|
||||
}),
|
||||
/** Old MS Write raw stream */
|
||||
MSWRITE(
|
||||
new byte[]{0x31, (byte)0xbe, 0x00, 0x00 },
|
||||
new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }),
|
||||
/** RTF document */
|
||||
RTF("{\\rtf"),
|
||||
/** PDF document */
|
||||
PDF("%PDF"),
|
||||
// keep UNKNOWN always as last enum!
|
||||
/** UNKNOWN magic */
|
||||
UNKNOWN(new byte[0]);
|
||||
|
||||
final byte[][] magic;
|
||||
|
||||
FileMagic(long magic) {
|
||||
this.magic = new byte[1][8];
|
||||
LittleEndian.putLong(this.magic[0], 0, magic);
|
||||
}
|
||||
|
||||
FileMagic(byte[]... magic) {
|
||||
this.magic = magic;
|
||||
}
|
||||
|
||||
FileMagic(String magic) {
|
||||
this(magic.getBytes(LocaleUtil.CHARSET_1252));
|
||||
}
|
||||
|
||||
public static FileMagic valueOf(byte[] magic) {
|
||||
for (FileMagic fm : values()) {
|
||||
int i=0;
|
||||
boolean found = true;
|
||||
for (byte[] ma : fm.magic) {
|
||||
for (byte m : ma) {
|
||||
byte d = magic[i++];
|
||||
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
|
||||
found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
return fm;
|
||||
}
|
||||
}
|
||||
}
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file magic of the supplied InputStream (which MUST
|
||||
* support mark and reset).<p>
|
||||
*
|
||||
* If unsure if your InputStream does support mark / reset,
|
||||
* use {@link #prepareToCheckMagic(InputStream)} to wrap it and make
|
||||
* sure to always use that, and not the original!<p>
|
||||
*
|
||||
* Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean,
|
||||
* that the ZIP stream has leading junk bytes
|
||||
*
|
||||
* @param inp An InputStream which supports either mark/reset
|
||||
*/
|
||||
public static FileMagic valueOf(InputStream inp) throws IOException {
|
||||
if (!inp.markSupported()) {
|
||||
throw new IOException("getFileMagic() only operates on streams which support mark(int)");
|
||||
}
|
||||
|
||||
// Grab the first 8 bytes
|
||||
byte[] data = IOUtils.peekFirst8Bytes(inp);
|
||||
|
||||
return FileMagic.valueOf(data);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if an {@link InputStream} can be reseted (i.e. used for checking the header magic) and wraps it if not
|
||||
*
|
||||
* @param stream stream to be checked for wrapping
|
||||
* @return a mark enabled stream
|
||||
*/
|
||||
public static InputStream prepareToCheckMagic(InputStream stream) {
|
||||
if (stream.markSupported()) {
|
||||
return stream;
|
||||
}
|
||||
// we used to process the data via a PushbackInputStream, but user code could provide a too small one
|
||||
// so we use a BufferedInputStream instead now
|
||||
return new BufferedInputStream(stream);
|
||||
}
|
||||
}
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
@ -26,7 +27,6 @@ import java.io.FileOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.Channels;
|
||||
import java.nio.channels.FileChannel;
|
||||
|
@ -51,14 +51,13 @@ import org.apache.poi.poifs.storage.BATBlock.BATBlockAndIndex;
|
|||
import org.apache.poi.poifs.storage.BlockAllocationTableReader;
|
||||
import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
|
||||
import org.apache.poi.poifs.storage.HeaderBlock;
|
||||
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||
import org.apache.poi.poifs.storage.HeaderBlockWriter;
|
||||
import org.apache.poi.util.CloseIgnoringInputStream;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LongField;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.util.Removal;
|
||||
|
||||
/**
|
||||
* <p>This is the main class of the POIFS system; it manages the entire
|
||||
|
@ -353,44 +352,38 @@ public class NPOIFSFileSystem extends BlockStore
|
|||
|
||||
/**
|
||||
* Checks that the supplied InputStream (which MUST
|
||||
* support mark and reset, or be a PushbackInputStream)
|
||||
* has a POIFS (OLE2) header at the start of it.
|
||||
* If your InputStream does not support mark / reset,
|
||||
* then wrap it in a PushBackInputStream, then be
|
||||
* sure to always use that and not the original!
|
||||
* support mark and reset) has a POIFS (OLE2) header at the start of it.
|
||||
* If unsure if your InputStream does support mark / reset,
|
||||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
|
||||
* sure to always use that, and not the original!
|
||||
*
|
||||
* After the method call, the InputStream is at the
|
||||
* same position as of the time of entering the method.
|
||||
*
|
||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
|
||||
* @param inp An InputStream which supports mark/reset
|
||||
*
|
||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
|
||||
*/
|
||||
@Deprecated
|
||||
@Removal(version="4.0")
|
||||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
|
||||
// We want to peek at the first 8 bytes
|
||||
inp.mark(8);
|
||||
|
||||
byte[] header = new byte[8];
|
||||
int bytesRead = IOUtils.readFully(inp, header);
|
||||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header);
|
||||
|
||||
// Wind back those 8 bytes
|
||||
if(inp instanceof PushbackInputStream) {
|
||||
PushbackInputStream pin = (PushbackInputStream)inp;
|
||||
pin.unread(header, 0, bytesRead);
|
||||
} else {
|
||||
inp.reset();
|
||||
}
|
||||
|
||||
// Did it match the signature?
|
||||
return (signature.get() == HeaderBlockConstants._signature);
|
||||
return FileMagic.valueOf(inp) == FileMagic.OLE2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the supplied first 8 bytes of a stream / file
|
||||
* has a POIFS (OLE2) header.
|
||||
*
|
||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
|
||||
*/
|
||||
@Deprecated
|
||||
@Removal(version="4.0")
|
||||
public static boolean hasPOIFSHeader(byte[] header8Bytes) {
|
||||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes);
|
||||
return (signature.get() == HeaderBlockConstants._signature);
|
||||
try {
|
||||
return hasPOIFSHeader(new ByteArrayInputStream(header8Bytes));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("invalid header check", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -42,16 +42,14 @@ import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
|
|||
import org.apache.poi.poifs.storage.BlockList;
|
||||
import org.apache.poi.poifs.storage.BlockWritable;
|
||||
import org.apache.poi.poifs.storage.HeaderBlock;
|
||||
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||
import org.apache.poi.poifs.storage.HeaderBlockWriter;
|
||||
import org.apache.poi.poifs.storage.RawDataBlockList;
|
||||
import org.apache.poi.poifs.storage.SmallBlockTableReader;
|
||||
import org.apache.poi.poifs.storage.SmallBlockTableWriter;
|
||||
import org.apache.poi.util.CloseIgnoringInputStream;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LongField;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.util.Removal;
|
||||
|
||||
/**
|
||||
* <p>This is the main class of the POIFS system; it manages the entire
|
||||
|
@ -200,27 +198,34 @@ public class OPOIFSFileSystem
|
|||
|
||||
/**
|
||||
* Checks that the supplied InputStream (which MUST
|
||||
* support mark and reset, or be a PushbackInputStream)
|
||||
* has a POIFS (OLE2) header at the start of it.
|
||||
* If your InputStream does not support mark / reset,
|
||||
* then wrap it in a PushBackInputStream, then be
|
||||
* support mark and reset) has a POIFS (OLE2) header at the start of it.
|
||||
* If unsure if your InputStream does support mark / reset,
|
||||
* use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
|
||||
* sure to always use that, and not the original!
|
||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
|
||||
*
|
||||
* After the method call, the InputStream is at the
|
||||
* same position as of the time of entering the method.
|
||||
*
|
||||
* @param inp An InputStream which supports either mark/reset
|
||||
*
|
||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
|
||||
*/
|
||||
@Deprecated
|
||||
@Removal(version="4.0")
|
||||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
|
||||
// We want to peek at the first 8 bytes
|
||||
byte[] header = IOUtils.peekFirst8Bytes(inp);
|
||||
return hasPOIFSHeader(header);
|
||||
return NPOIFSFileSystem.hasPOIFSHeader(inp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the supplied first 8 bytes of a stream / file
|
||||
* has a POIFS (OLE2) header.
|
||||
*
|
||||
* @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
|
||||
*/
|
||||
@Deprecated
|
||||
@Removal(version="4.0")
|
||||
public static boolean hasPOIFSHeader(byte[] header8Bytes) {
|
||||
LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes);
|
||||
|
||||
// Did it match the signature?
|
||||
return (signature.get() == HeaderBlockConstants._signature);
|
||||
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -114,27 +114,6 @@ public class POIFSFileSystem
|
|||
super(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that the supplied InputStream (which MUST
|
||||
* support mark and reset, or be a PushbackInputStream)
|
||||
* has a POIFS (OLE2) header at the start of it.
|
||||
* If your InputStream does not support mark / reset,
|
||||
* then wrap it in a PushBackInputStream, then be
|
||||
* sure to always use that, and not the original!
|
||||
* @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
|
||||
*/
|
||||
public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
|
||||
return NPOIFSFileSystem.hasPOIFSHeader(inp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the supplied first 8 bytes of a stream / file
|
||||
* has a POIFS (OLE2) header.
|
||||
*/
|
||||
public static boolean hasPOIFSHeader(byte[] header8Bytes) {
|
||||
return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link POIFSFileSystem} in a new {@link File}.
|
||||
* Use {@link #POIFSFileSystem(File)} to open an existing File,
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
|
||||
package org.apache.poi.poifs.macros;
|
||||
|
||||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
|
||||
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
|
||||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
@ -27,7 +27,6 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -38,6 +37,7 @@ import org.apache.poi.poifs.filesystem.DirectoryNode;
|
|||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
||||
import org.apache.poi.poifs.filesystem.DocumentNode;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||
import org.apache.poi.util.CodePageUtil;
|
||||
|
@ -67,13 +67,12 @@ public class VBAMacroReader implements Closeable {
|
|||
private NPOIFSFileSystem fs;
|
||||
|
||||
public VBAMacroReader(InputStream rstream) throws IOException {
|
||||
PushbackInputStream stream = new PushbackInputStream(rstream, 8);
|
||||
byte[] header8 = IOUtils.peekFirst8Bytes(stream);
|
||||
|
||||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
|
||||
fs = new NPOIFSFileSystem(stream);
|
||||
InputStream is = FileMagic.prepareToCheckMagic(rstream);
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
if (fm == FileMagic.OLE2) {
|
||||
fs = new NPOIFSFileSystem(is);
|
||||
} else {
|
||||
openOOXML(stream);
|
||||
openOOXML(is);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.Arrays;
|
|||
import org.apache.poi.hssf.OldExcelFormatException;
|
||||
import org.apache.poi.poifs.common.POIFSBigBlockSize;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
|
||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||
import org.apache.poi.util.HexDump;
|
||||
|
@ -40,41 +41,6 @@ import org.apache.poi.util.ShortField;
|
|||
* The block containing the archive header
|
||||
*/
|
||||
public final class HeaderBlock implements HeaderBlockConstants {
|
||||
private static final byte[] MAGIC_BIFF2 = {
|
||||
0x09, 0x00, // sid=0x0009
|
||||
0x04, 0x00, // size=0x0004
|
||||
0x00, 0x00, // unused
|
||||
0x70, 0x00 // 0x70 = multiple values
|
||||
};
|
||||
|
||||
private static final byte[] MAGIC_BIFF3 = {
|
||||
0x09, 0x02, // sid=0x0209
|
||||
0x06, 0x00, // size=0x0006
|
||||
0x00, 0x00, // unused
|
||||
0x70, 0x00 // 0x70 = multiple values
|
||||
};
|
||||
|
||||
private static final byte[] MAGIC_BIFF4a = {
|
||||
0x09, 0x04, // sid=0x0409
|
||||
0x06, 0x00, // size=0x0006
|
||||
0x00, 0x00, // unused
|
||||
0x70, 0x00 // 0x70 = multiple values
|
||||
};
|
||||
|
||||
private static final byte[] MAGIC_BIFF4b = {
|
||||
0x09, 0x04, // sid=0x0409
|
||||
0x06, 0x00, // size=0x0006
|
||||
0x00, 0x00, // unused
|
||||
0x00, 0x01
|
||||
};
|
||||
|
||||
private static final byte[] MAGIC_MSWRITEa = {
|
||||
0x31, (byte)0xbe, 0x00, 0x00
|
||||
};
|
||||
private static final byte[] MAGIC_MSWRITEb = {
|
||||
0x32, (byte)0xbe, 0x00, 0x00
|
||||
};
|
||||
|
||||
private static final byte _default_value = ( byte ) 0xFF;
|
||||
|
||||
/**
|
||||
|
@ -151,53 +117,35 @@ public final class HeaderBlock implements HeaderBlockConstants {
|
|||
this._data = data.clone();
|
||||
|
||||
// verify signature
|
||||
long signature = LittleEndian.getLong(_data, _signature_offset);
|
||||
FileMagic fm = FileMagic.valueOf(data);
|
||||
|
||||
if (signature != _signature) {
|
||||
// Is it one of the usual suspects?
|
||||
if (cmp(POIFSConstants.OOXML_FILE_HEADER, data)) {
|
||||
switch (fm) {
|
||||
case OLE2:
|
||||
break;
|
||||
case OOXML:
|
||||
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
|
||||
+ "You are calling the part of POI that deals with OLE2 Office Documents. "
|
||||
+ "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
|
||||
}
|
||||
|
||||
if (cmp(POIFSConstants.RAW_XML_FILE_HEADER, data)) {
|
||||
case XML:
|
||||
throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
|
||||
+ "Formats such as Office 2003 XML are not supported");
|
||||
}
|
||||
|
||||
// Old MS Write raw stream
|
||||
if (cmp(MAGIC_MSWRITEa, data) || cmp(MAGIC_MSWRITEb, data)) {
|
||||
case MSWRITE:
|
||||
throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
|
||||
+ "Apache POI doesn't currently support this format");
|
||||
}
|
||||
|
||||
// BIFF2 raw stream
|
||||
if (cmp(MAGIC_BIFF2, data)) {
|
||||
throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. "
|
||||
case BIFF2:
|
||||
case BIFF3:
|
||||
case BIFF4:
|
||||
throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. "
|
||||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
|
||||
}
|
||||
|
||||
// BIFF3 raw stream
|
||||
if (cmp(MAGIC_BIFF3, data)) {
|
||||
throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. "
|
||||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
|
||||
}
|
||||
|
||||
// BIFF4 raw stream
|
||||
if (cmp(MAGIC_BIFF4a, data) || cmp(MAGIC_BIFF4b, data)) {
|
||||
throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. "
|
||||
+ "HSSF only supports the BIFF8 format, try OldExcelExtractor");
|
||||
}
|
||||
|
||||
default:
|
||||
// Give a generic error if the OLE2 signature isn't found
|
||||
throw new NotOLE2FileException("Invalid header signature; read "
|
||||
+ HexDump.longToHex(signature) + ", expected "
|
||||
+ HexDump.longToHex(_signature) + " - Your file appears "
|
||||
+ "not to be a valid OLE2 document");
|
||||
String exp = HexDump.longToHex(_signature);
|
||||
String act = HexDump.longToHex(LittleEndian.getLong(data, 0));
|
||||
throw new NotOLE2FileException(
|
||||
"Invalid header signature; read " + act + ", expected " + exp +
|
||||
" - Your file appears not to be a valid OLE2 document");
|
||||
}
|
||||
|
||||
|
||||
// Figure out our block size
|
||||
if (_data[30] == 12) {
|
||||
this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS;
|
||||
|
@ -434,15 +382,4 @@ public final class HeaderBlock implements HeaderBlockConstants {
|
|||
stream.write(0);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean cmp(byte[] magic, byte[] data) {
|
||||
int i=0;
|
||||
for (byte m : magic) {
|
||||
byte d = data[i++];
|
||||
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
|
@ -30,6 +29,7 @@ import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
|
|||
import org.apache.poi.poifs.crypt.Decryptor;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
@ -94,9 +94,7 @@ public class SlideShowFactory {
|
|||
* Creates the appropriate HSLFSlideShow / XMLSlideShow from
|
||||
* the given InputStream.
|
||||
*
|
||||
* <p>Your input stream MUST either support mark/reset, or
|
||||
* be wrapped as a {@link PushbackInputStream}! Note that
|
||||
* using an {@link InputStream} has a higher memory footprint
|
||||
* <p>Note that using an {@link InputStream} has a higher memory footprint
|
||||
* than using a {@link File}.</p>
|
||||
*
|
||||
* <p>Note that in order to properly release resources the
|
||||
|
@ -118,9 +116,8 @@ public class SlideShowFactory {
|
|||
/**
|
||||
* Creates the appropriate HSLFSlideShow / XMLSlideShow from
|
||||
* the given InputStream, which may be password protected.
|
||||
* <p>Your input stream MUST either support mark/reset, or
|
||||
* be wrapped as a {@link PushbackInputStream}! Note that
|
||||
* using an {@link InputStream} has a higher memory footprint
|
||||
*
|
||||
* <p>Note that using an {@link InputStream} has a higher memory footprint
|
||||
* than using a {@link File}.</p>
|
||||
*
|
||||
* <p>Note that in order to properly release resources the
|
||||
|
@ -137,24 +134,19 @@ public class SlideShowFactory {
|
|||
* @throws EncryptedDocumentException If the wrong password is given for a protected file
|
||||
*/
|
||||
public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException {
|
||||
// If clearly doesn't do mark/reset, wrap up
|
||||
if (! inp.markSupported()) {
|
||||
inp = new PushbackInputStream(inp, 8);
|
||||
}
|
||||
InputStream is = FileMagic.prepareToCheckMagic(inp);
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
|
||||
// Ensure that there is at least some data there
|
||||
byte[] header8 = IOUtils.peekFirst8Bytes(inp);
|
||||
|
||||
// Try to create
|
||||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
|
||||
switch (fm) {
|
||||
case OLE2:
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
|
||||
return create(fs, password);
|
||||
}
|
||||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
|
||||
return createXSLFSlideShow(inp);
|
||||
}
|
||||
case OOXML:
|
||||
return createXSLFSlideShow(is);
|
||||
default:
|
||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the appropriate HSLFSlideShow / XMLSlideShow from
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
@ -45,8 +44,8 @@ import org.apache.poi.poifs.crypt.Decryptor;
|
|||
import org.apache.poi.poifs.crypt.EncryptionInfo;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
|
||||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
||||
|
@ -175,22 +174,21 @@ public class ExtractorFactory {
|
|||
}
|
||||
|
||||
public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
|
||||
// Figure out the kind of stream
|
||||
// If clearly doesn't do mark/reset, wrap up
|
||||
if (! inp.markSupported()) {
|
||||
inp = new PushbackInputStream(inp, 8);
|
||||
}
|
||||
InputStream is = FileMagic.prepareToCheckMagic(inp);
|
||||
|
||||
if (NPOIFSFileSystem.hasPOIFSHeader(inp)) {
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
|
||||
switch (fm) {
|
||||
case OLE2:
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
|
||||
boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY);
|
||||
return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs);
|
||||
}
|
||||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
|
||||
return createExtractor(OPCPackage.open(inp));
|
||||
}
|
||||
case OOXML:
|
||||
return createExtractor(OPCPackage.open(is));
|
||||
default:
|
||||
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to determine the actual type of file and produces a matching text-extractor for it.
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.io.FileInputStream;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.Enumeration;
|
||||
|
@ -38,12 +37,11 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
|||
import org.apache.poi.openxml4j.opc.ZipPackage;
|
||||
import org.apache.poi.openxml4j.util.ZipSecureFile;
|
||||
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.Removal;
|
||||
|
||||
@Internal
|
||||
public final class ZipHelper {
|
||||
/**
|
||||
* Forward slash use to convert part name between OPC and zip item naming
|
||||
|
@ -172,61 +170,31 @@ public final class ZipHelper {
|
|||
* Warning - this will consume the first few bytes of the stream,
|
||||
* you should push-back or reset the stream after use!
|
||||
*/
|
||||
public static void verifyZipHeader(InputStream stream)
|
||||
throws NotOfficeXmlFileException, IOException {
|
||||
// Grab the first 8 bytes
|
||||
byte[] data = new byte[8];
|
||||
IOUtils.readFully(stream, data);
|
||||
public static void verifyZipHeader(InputStream stream) throws NotOfficeXmlFileException, IOException {
|
||||
InputStream is = FileMagic.prepareToCheckMagic(stream);
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
|
||||
// OLE2?
|
||||
long signature = LittleEndian.getLong(data);
|
||||
if (signature == HeaderBlockConstants._signature) {
|
||||
switch (fm) {
|
||||
case OLE2:
|
||||
throw new OLE2NotOfficeXmlFileException(
|
||||
"The supplied data appears to be in the OLE2 Format. " +
|
||||
"You are calling the part of POI that deals with OOXML "+
|
||||
"(Office Open XML) Documents. You need to call a different " +
|
||||
"part of POI to process this data (eg HSSF instead of XSSF)");
|
||||
}
|
||||
|
||||
// Raw XML?
|
||||
byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
|
||||
if (data[0] == RAW_XML_FILE_HEADER[0] &&
|
||||
data[1] == RAW_XML_FILE_HEADER[1] &&
|
||||
data[2] == RAW_XML_FILE_HEADER[2] &&
|
||||
data[3] == RAW_XML_FILE_HEADER[3] &&
|
||||
data[4] == RAW_XML_FILE_HEADER[4]) {
|
||||
case XML:
|
||||
throw new NotOfficeXmlFileException(
|
||||
"The supplied data appears to be a raw XML file. " +
|
||||
"Formats such as Office 2003 XML are not supported");
|
||||
}
|
||||
|
||||
default:
|
||||
case OOXML:
|
||||
case UNKNOWN:
|
||||
// Don't check for a Zip header, as to maintain backwards
|
||||
// compatibility we need to let them seek over junk at the
|
||||
// start before beginning processing.
|
||||
|
||||
// Put things back
|
||||
if (stream instanceof PushbackInputStream) {
|
||||
((PushbackInputStream)stream).unread(data);
|
||||
} else if (stream.markSupported()) {
|
||||
stream.reset();
|
||||
} else if (stream instanceof FileInputStream) {
|
||||
// File open check, about to be closed, nothing to do
|
||||
} else {
|
||||
// Oh dear... I hope you know what you're doing!
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static InputStream prepareToCheckHeader(InputStream stream) {
|
||||
if (stream instanceof PushbackInputStream) {
|
||||
return stream;
|
||||
}
|
||||
if (stream.markSupported()) {
|
||||
stream.mark(8);
|
||||
return stream;
|
||||
}
|
||||
return new PushbackInputStream(stream, 8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the specified stream as a secure zip
|
||||
*
|
||||
|
@ -237,7 +205,7 @@ public final class ZipHelper {
|
|||
@SuppressWarnings("resource")
|
||||
public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
|
||||
// Peek at the first few bytes to sanity check
|
||||
InputStream checkedStream = prepareToCheckHeader(stream);
|
||||
InputStream checkedStream = FileMagic.prepareToCheckMagic(stream);
|
||||
verifyZipHeader(checkedStream);
|
||||
|
||||
// Open as a proper zip stream
|
||||
|
|
|
@ -198,10 +198,11 @@ public class ZipSecureFile extends ZipFile {
|
|||
|
||||
public static class ThresholdInputStream extends PushbackInputStream {
|
||||
long counter = 0;
|
||||
long markPos = 0;
|
||||
ThresholdInputStream cis;
|
||||
|
||||
public ThresholdInputStream(InputStream is, ThresholdInputStream cis) {
|
||||
super(is,1);
|
||||
super(is);
|
||||
this.cis = cis;
|
||||
}
|
||||
|
||||
|
@ -225,14 +226,15 @@ public class ZipSecureFile extends ZipFile {
|
|||
|
||||
@Override
|
||||
public long skip(long n) throws IOException {
|
||||
counter = 0;
|
||||
return in.skip(n);
|
||||
long s = in.skip(n);
|
||||
counter += s;
|
||||
return s;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void reset() throws IOException {
|
||||
counter = 0;
|
||||
in.reset();
|
||||
counter = markPos;
|
||||
super.reset();
|
||||
}
|
||||
|
||||
public void advance(int advance) throws IOException {
|
||||
|
@ -263,10 +265,10 @@ public class ZipSecureFile extends ZipFile {
|
|||
}
|
||||
|
||||
// one of the limits was reached, report it
|
||||
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data. "
|
||||
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
|
||||
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit. "
|
||||
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + (((double)cis.counter)/counter)
|
||||
throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data.\n"
|
||||
+ "This may indicate that the file is used to inflate memory usage and thus could pose a security risk.\n"
|
||||
+ "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit.\n"
|
||||
+ "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + ratio + "\n"
|
||||
+ "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO);
|
||||
}
|
||||
|
||||
|
@ -322,6 +324,7 @@ public class ZipSecureFile extends ZipFile {
|
|||
|
||||
@Override
|
||||
public synchronized void mark(int readlimit) {
|
||||
markPos = counter;
|
||||
in.mark(readlimit);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,11 +16,11 @@
|
|||
==================================================================== */
|
||||
package org.apache.poi.ss.usermodel;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
|
||||
import org.apache.poi.EmptyFileException;
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
|
@ -32,6 +32,7 @@ import org.apache.poi.openxml4j.opc.PackageAccess;
|
|||
import org.apache.poi.poifs.crypt.Decryptor;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
@ -127,7 +128,7 @@ public class WorkbookFactory {
|
|||
* the given InputStream.
|
||||
*
|
||||
* <p>Your input stream MUST either support mark/reset, or
|
||||
* be wrapped as a {@link PushbackInputStream}! Note that
|
||||
* be wrapped as a {@link BufferedInputStream}! Note that
|
||||
* using an {@link InputStream} has a higher memory footprint
|
||||
* than using a {@link File}.</p>
|
||||
*
|
||||
|
@ -150,16 +151,15 @@ public class WorkbookFactory {
|
|||
|
||||
/**
|
||||
* Creates the appropriate HSSFWorkbook / XSSFWorkbook from
|
||||
* the given InputStream, which may be password protected.
|
||||
* <p>Your input stream MUST either support mark/reset, or
|
||||
* be wrapped as a {@link PushbackInputStream}! Note that
|
||||
* using an {@link InputStream} has a higher memory footprint
|
||||
* than using a {@link File}.</p>
|
||||
* the given InputStream, which may be password protected.<p>
|
||||
*
|
||||
* <p>Note that in order to properly release resources the
|
||||
* Note that using an {@link InputStream} has a higher memory footprint
|
||||
* than using a {@link File}.<p>
|
||||
*
|
||||
* Note that in order to properly release resources the
|
||||
* Workbook should be closed after use. Note also that loading
|
||||
* from an InputStream requires more memory than loading
|
||||
* from a File, so prefer {@link #create(File)} where possible.</p>
|
||||
* from a File, so prefer {@link #create(File)} where possible.
|
||||
*
|
||||
* @param inp The {@link InputStream} to read data from.
|
||||
* @param password The password that should be used or null if no password is necessary.
|
||||
|
@ -172,24 +172,20 @@ public class WorkbookFactory {
|
|||
* @throws EmptyFileException If an empty stream is given
|
||||
*/
|
||||
public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
|
||||
// If clearly doesn't do mark/reset, wrap up
|
||||
if (! inp.markSupported()) {
|
||||
inp = new PushbackInputStream(inp, 8);
|
||||
}
|
||||
InputStream is = FileMagic.prepareToCheckMagic(inp);
|
||||
|
||||
// Ensure that there is at least some data there
|
||||
byte[] header8 = IOUtils.peekFirst8Bytes(inp);
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
|
||||
// Try to create
|
||||
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
|
||||
switch (fm) {
|
||||
case OLE2:
|
||||
NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
|
||||
return create(fs, password);
|
||||
}
|
||||
if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
|
||||
return new XSSFWorkbook(OPCPackage.open(inp));
|
||||
}
|
||||
case OOXML:
|
||||
return new XSSFWorkbook(OPCPackage.open(is));
|
||||
default:
|
||||
throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the appropriate HSSFWorkbook / XSSFWorkbook from
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.poi.xssf.usermodel;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
|
||||
|
@ -29,7 +28,7 @@ import org.apache.poi.POIXMLException;
|
|||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.ss.usermodel.ObjectData;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
@ -161,17 +160,8 @@ public class XSSFObjectData extends XSSFSimpleShape implements ObjectData {
|
|||
InputStream is = null;
|
||||
try {
|
||||
is = getObjectPart().getInputStream();
|
||||
|
||||
// If clearly doesn't do mark/reset, wrap up
|
||||
if (! is.markSupported()) {
|
||||
is = new PushbackInputStream(is, 8);
|
||||
}
|
||||
|
||||
// Ensure that there is at least some data there
|
||||
byte[] header8 = IOUtils.peekFirst8Bytes(is);
|
||||
|
||||
// Try to create
|
||||
return NPOIFSFileSystem.hasPOIFSHeader(header8);
|
||||
is = FileMagic.prepareToCheckMagic(is);
|
||||
return FileMagic.valueOf(is) == FileMagic.OLE2;
|
||||
} catch (IOException e) {
|
||||
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e);
|
||||
return false;
|
||||
|
|
|
@ -19,68 +19,70 @@
|
|||
|
||||
package org.apache.poi;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.util.Arrays;
|
||||
import static org.junit.Assert.assertArrayEquals;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Class to test that HXF correctly detects OOXML
|
||||
* documents
|
||||
*/
|
||||
public class TestDetectAsOOXML extends TestCase
|
||||
{
|
||||
public void testOpensProperly() throws Exception
|
||||
{
|
||||
public class TestDetectAsOOXML {
|
||||
@Test
|
||||
public void testOpensProperly() throws IOException, InvalidFormatException {
|
||||
OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx"));
|
||||
}
|
||||
|
||||
public void testDetectAsPOIFS() throws Exception {
|
||||
InputStream in;
|
||||
@Test
|
||||
public void testDetectAsPOIFS() throws IOException {
|
||||
Object fileAndMagic[][] = {
|
||||
{ "SampleSS.xlsx", FileMagic.OOXML },
|
||||
{ "SampleSS.xls", FileMagic.OLE2 },
|
||||
{ "SampleSS.txt", FileMagic.UNKNOWN }
|
||||
};
|
||||
|
||||
// ooxml file is
|
||||
in = new PushbackInputStream(
|
||||
HSSFTestDataSamples.openSampleFileStream("SampleSS.xlsx"), 10
|
||||
);
|
||||
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(in));
|
||||
in.close();
|
||||
for (Object fm[] : fileAndMagic) {
|
||||
InputStream is = HSSFTestDataSamples.openSampleFileStream((String)fm[0]);
|
||||
is = FileMagic.prepareToCheckMagic(is);
|
||||
FileMagic act = FileMagic.valueOf(is);
|
||||
|
||||
// xls file isn't
|
||||
in = new PushbackInputStream(
|
||||
HSSFTestDataSamples.openSampleFileStream("SampleSS.xls"), 10
|
||||
);
|
||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
|
||||
in.close();
|
||||
|
||||
// text file isn't
|
||||
in = new PushbackInputStream(
|
||||
HSSFTestDataSamples.openSampleFileStream("SampleSS.txt"), 10
|
||||
);
|
||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
|
||||
in.close();
|
||||
if (act == FileMagic.OOXML) {
|
||||
assertTrue(DocumentFactoryHelper.hasOOXMLHeader(is));
|
||||
}
|
||||
|
||||
assertEquals("file magic failed for "+fm[0], fm[1], act);
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFileCorruption() throws Exception {
|
||||
|
||||
// create test InputStream
|
||||
byte[] testData = { (byte)1, (byte)2, (byte)3 };
|
||||
byte[] testData = { 1, 2, 3 };
|
||||
ByteArrayInputStream testInput = new ByteArrayInputStream(testData);
|
||||
InputStream is = FileMagic.prepareToCheckMagic(testInput);
|
||||
|
||||
// detect header
|
||||
InputStream in = new PushbackInputStream(testInput, 10);
|
||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
|
||||
assertFalse(DocumentFactoryHelper.hasOOXMLHeader(is));
|
||||
|
||||
// check if InputStream is still intact
|
||||
byte[] test = new byte[3];
|
||||
assertEquals(3, in.read(test));
|
||||
assertTrue(Arrays.equals(testData, test));
|
||||
assertEquals(-1, in.read());
|
||||
in.close();
|
||||
byte[] act = IOUtils.toByteArray(is);
|
||||
assertArrayEquals(testData, act);
|
||||
assertEquals(-1, is.read());
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,29 +17,23 @@
|
|||
|
||||
package org.apache.poi.openxml4j.opc;
|
||||
|
||||
import org.apache.poi.*;
|
||||
import org.apache.poi.extractor.ExtractorFactory;
|
||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
||||
import org.apache.poi.openxml4j.exceptions.*;
|
||||
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
|
||||
import org.apache.poi.openxml4j.opc.internal.FileHelper;
|
||||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
|
||||
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
|
||||
import org.apache.poi.openxml4j.util.ZipSecureFile;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.apache.poi.util.*;
|
||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.SAXException;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
@ -52,7 +46,41 @@ import java.util.zip.ZipEntry;
|
|||
import java.util.zip.ZipFile;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.POITestCase;
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.POIXMLException;
|
||||
import org.apache.poi.UnsupportedFileFormatException;
|
||||
import org.apache.poi.extractor.ExtractorFactory;
|
||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||
import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
|
||||
import org.apache.poi.openxml4j.opc.internal.FileHelper;
|
||||
import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
|
||||
import org.apache.poi.openxml4j.opc.internal.ZipHelper;
|
||||
import org.apache.poi.openxml4j.util.ZipSecureFile;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.apache.poi.util.DocumentHelper;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.util.TempFile;
|
||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public final class TestPackage {
|
||||
private static final POILogger logger = POILogFactory.getLogger(TestPackage.class);
|
||||
|
@ -947,20 +975,32 @@ public final class TestPackage {
|
|||
}
|
||||
|
||||
// bug 60128
|
||||
@Test
|
||||
@Test(expected=NotOfficeXmlFileException.class)
|
||||
public void testCorruptFile() throws IOException, InvalidFormatException {
|
||||
OPCPackage pkg = null;
|
||||
File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx");
|
||||
OPCPackage.open(file, PackageAccess.READ);
|
||||
}
|
||||
|
||||
// bug 61381
|
||||
@Test
|
||||
public void testTooShortFilterStreams() throws IOException, InvalidFormatException {
|
||||
File xssf = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx");
|
||||
File hssf = POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls");
|
||||
|
||||
InputStream isList[] = {
|
||||
new PushbackInputStream(new FileInputStream(xssf), 2),
|
||||
new BufferedInputStream(new FileInputStream(xssf), 2),
|
||||
new PushbackInputStream(new FileInputStream(hssf), 2),
|
||||
new BufferedInputStream(new FileInputStream(hssf), 2),
|
||||
};
|
||||
|
||||
try {
|
||||
pkg = OPCPackage.open(file, PackageAccess.READ);
|
||||
} catch (NotOfficeXmlFileException e) {
|
||||
/*System.out.println(e.getClass().getName());
|
||||
System.out.println(e.getMessage());
|
||||
e.printStackTrace();*/
|
||||
// ignore exception
|
||||
for (InputStream is : isList) {
|
||||
WorkbookFactory.create(is).close();
|
||||
}
|
||||
} finally {
|
||||
if (pkg != null) {
|
||||
pkg.close();
|
||||
for (InputStream is : isList) {
|
||||
IOUtils.closeQuietly(is);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.poi.hwpf;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.security.GeneralSecurityException;
|
||||
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
|
@ -47,6 +46,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
|||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
||||
import org.apache.poi.poifs.filesystem.FileMagic;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.BoundedInputStream;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
|
@ -116,22 +116,14 @@ public abstract class HWPFDocumentCore extends POIDocument {
|
|||
* POIFSFileSystem from it, and returns that.
|
||||
*/
|
||||
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
|
||||
// Open a PushbackInputStream, so we can peek at the first few bytes
|
||||
PushbackInputStream pis = new PushbackInputStream(istream,6);
|
||||
byte[] first6 = IOUtils.toByteArray(pis, 6);
|
||||
InputStream is = FileMagic.prepareToCheckMagic(istream);
|
||||
FileMagic fm = FileMagic.valueOf(is);
|
||||
|
||||
// Does it start with {\rtf ? If so, it's really RTF
|
||||
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
|
||||
&& first6[3] == 't' && first6[4] == 'f') {
|
||||
throw new IllegalArgumentException("The document is really a RTF file");
|
||||
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
|
||||
throw new IllegalArgumentException("The document is really a PDF file");
|
||||
if (fm != FileMagic.OLE2) {
|
||||
throw new IllegalArgumentException("The document is really a "+fm+" file");
|
||||
}
|
||||
|
||||
// OK, so it's neither RTF nor PDF
|
||||
// Open a POIFSFileSystem on the (pushed back) stream
|
||||
pis.unread(first6);
|
||||
return new POIFSFileSystem(pis);
|
||||
return new POIFSFileSystem(is);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,7 +22,6 @@ import static org.apache.poi.POITestCase.assertContains;
|
|||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||
|
@ -86,8 +85,9 @@ public class TestOfficeXMLException extends TestCase {
|
|||
// text file isn't
|
||||
confirmIsPOIFS("SampleSS.txt", false);
|
||||
}
|
||||
|
||||
private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException {
|
||||
InputStream in = new PushbackInputStream(openSampleStream(sampleFileName), 10);
|
||||
InputStream in = FileMagic.prepareToCheckMagic(openSampleStream(sampleFileName));
|
||||
try {
|
||||
boolean actualResult;
|
||||
try {
|
||||
|
@ -108,7 +108,7 @@ public class TestOfficeXMLException extends TestCase {
|
|||
InputStream testInput = new ByteArrayInputStream(testData);
|
||||
|
||||
// detect header
|
||||
InputStream in = new PushbackInputStream(testInput, 10);
|
||||
InputStream in = FileMagic.prepareToCheckMagic(testInput);
|
||||
assertFalse(POIFSFileSystem.hasPOIFSHeader(in));
|
||||
|
||||
// check if InputStream is still intact
|
||||
|
@ -126,7 +126,7 @@ public class TestOfficeXMLException extends TestCase {
|
|||
InputStream testInput = new ByteArrayInputStream(testData);
|
||||
|
||||
// detect header
|
||||
InputStream in = new PushbackInputStream(testInput, 10);
|
||||
InputStream in = FileMagic.prepareToCheckMagic(testInput);
|
||||
assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in));
|
||||
|
||||
// check if InputStream is still intact
|
||||
|
|
Loading…
Reference in New Issue