More detection for older Excel formats when opening the POIFS Stream, and a more specific exception for non-OLE2 files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642565 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2014-11-30 16:59:06 +00:00
parent 8a13f67496
commit b970d826c6
4 changed files with 72 additions and 22 deletions

View File

@ -24,6 +24,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.CodepageRecord;
import org.apache.poi.hssf.record.FormulaRecord;
@ -37,6 +38,7 @@ import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.ss.usermodel.Cell;
/**
@ -65,12 +67,10 @@ public class OldExcelExtractor {
public OldExcelExtractor(File f) throws IOException {
try {
open(new NPOIFSFileSystem(f));
} catch (IOException e) {
if (e.getMessage().startsWith("Invalid header signature")) {
open(new FileInputStream(f));
} else {
throw e;
}
} catch (OldExcelFormatException oe) {
open(new FileInputStream(f));
} catch (NotOLE2FileException e) {
open(new FileInputStream(f));
}
}
public OldExcelExtractor(NPOIFSFileSystem fs) throws IOException {

View File

@ -0,0 +1,30 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.filesystem;
import java.io.IOException;
/**
* This exception is thrown when we try to open a file that doesn't
* seem to actually be an OLE2 file after all
*/
public class NotOLE2FileException extends IOException {
public NotOLE2FileException(String s) {
super(s);
}
}

View File

@ -1,4 +1,3 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
@ -16,20 +15,14 @@
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.filesystem;
/**
* This exception is thrown when we try to open a file that's actually
* an Office 2007+ XML file, rather than an OLE2 file (which is what
* POI works with)
*
* @author Nick Burch
* POIFS works with)
*/
public class OfficeXmlFileException extends IllegalArgumentException
{
public class OfficeXmlFileException extends IllegalArgumentException {
public OfficeXmlFileException(String s) {
super(s);
}

View File

@ -23,8 +23,10 @@ import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;
import org.apache.poi.hssf.OldExcelFormatException;
import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.IOUtils;
@ -124,20 +126,45 @@ public final class HeaderBlock implements HeaderBlockConstants {
if (signature != _signature) {
// Is it one of the usual suspects?
byte[] OOXML_FILE_HEADER = POIFSConstants.OOXML_FILE_HEADER;
if(_data[0] == OOXML_FILE_HEADER[0] &&
if (_data[0] == OOXML_FILE_HEADER[0] &&
_data[1] == OOXML_FILE_HEADER[1] &&
_data[2] == OOXML_FILE_HEADER[2] &&
_data[3] == OOXML_FILE_HEADER[3]) {
throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. You are calling the part of POI that deals with OLE2 Office Documents. You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
}
if ((signature & 0xFF8FFFFFFFFFFFFFL) == 0x0010000200040009L) {
// BIFF2 raw stream starts with BOF (sid=0x0009, size=0x0004, data=0x00t0)
throw new IllegalArgumentException("The supplied data appears to be in BIFF2 format. "
+ "POI only supports BIFF8 format");
}
if (_data[0] == 0x09 && _data[1] == 0x00 && // sid=0x0009
_data[2] == 0x04 && _data[3] == 0x00 && // size=0x0004
_data[4] == 0x00 && _data[5] == 0x00 && // unused
(_data[6] == 0x01 || _data[6] == 0x02 || _data[6] == 0x04) &&
_data[7] == 0x00) {
// BIFF2 raw stream
throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. " +
"HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
if (_data[0] == 0x09 && _data[1] == 0x02 && // sid=0x0209
_data[2] == 0x06 && _data[3] == 0x00 && // size=0x0006
_data[4] == 0x00 && _data[5] == 0x00 && // unused
(_data[6] == 0x01 || _data[6] == 0x02 || _data[6] == 0x04) &&
_data[7] == 0x00) {
// BIFF3 raw stream
throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. " +
"HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
if (_data[0] == 0x09 && _data[1] == 0x04 && // sid=0x0409
_data[2] == 0x06 && _data[3] == 0x00 && // size=0x0006
_data[4] == 0x00 && _data[5] == 0x00) { // unused
if (((_data[6] == 0x01 || _data[6] == 0x02 || _data[6] == 0x04) &&
_data[7] == 0x00) ||
(_data[6] == 0x00 && _data[7] == 0x01)) {
// BIFF4 raw stream
throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. " +
"HSSF only supports the BIFF8 format, try OldExcelExtractor");
}
}
// Give a generic error if the OLE2 signature isn't found
throw new IOException("Invalid header signature; read "
throw new NotOLE2FileException("Invalid header signature; read "
+ longToHex(signature) + ", expected "
+ longToHex(_signature) + " - Your file appears "
+ "not to be a valid OLE2 document");