60879 -- figure out if we can support old beta xlsb or throw exception. For now, let's hope there's only one diff and/or the other bounds checking etc will throw exception if there are other problems.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787832 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2017-03-20 18:50:25 +00:00
parent e592175afa
commit 206ca69e80
4 changed files with 62 additions and 8 deletions

View File

@ -32,11 +32,8 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public class XSSFBFileHandler extends AbstractFileHandler { public class XSSFBFileHandler extends AbstractFileHandler {
static { static {
//this is a "Beta" xlsb version and is not openable with Excel 2016 //add expected failures here:
//TODO: see if we can support this easily enough // AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.add("spreadsheet/Simple.xlsb");
AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.add(
"spreadsheet/Simple.xlsb"
);
} }
@Override @Override

View File

@ -73,6 +73,10 @@ public enum XSSFBRecordType {
BrtEndSst(160), //stored strings end sst BrtEndSst(160), //stored strings end sst
BrtBundleSh(156), //defines worksheet in wb part BrtBundleSh(156), //defines worksheet in wb part
//TODO -- implement these as needed
//BrtFileVersion(128), //file version
//BrtWbProp(153), //Workbook prop contains 1904/1900-date based bit
Unimplemented(-1); Unimplemented(-1);
private static final Map<Integer, XSSFBRecordType> TYPE_MAP = private static final Map<Integer, XSSFBRecordType> TYPE_MAP =

View File

@ -32,6 +32,8 @@ import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.xssf.binary.XSSFBCommentsTable; import org.apache.poi.xssf.binary.XSSFBCommentsTable;
import org.apache.poi.xssf.binary.XSSFBParseException; import org.apache.poi.xssf.binary.XSSFBParseException;
import org.apache.poi.xssf.binary.XSSFBParser; import org.apache.poi.xssf.binary.XSSFBParser;
@ -48,6 +50,9 @@ import org.apache.poi.xssf.usermodel.XSSFRelation;
* @since 3.16-beta3 * @since 3.16-beta3
*/ */
public class XSSFBReader extends XSSFReader { public class XSSFBReader extends XSSFReader {
private final static POILogger log = POILogFactory.getLogger(XSSFBReader.class);
/** /**
* Creates a new XSSFReader, for the given package * Creates a new XSSFReader, for the given package
* *
@ -147,6 +152,23 @@ public class XSSFBReader extends XSSFReader {
} }
private void addWorksheet(byte[] data) { private void addWorksheet(byte[] data) {
//try to parse the BrtBundleSh
//if there's an exception, catch it and
//try to figure out if this is one of the old beta-created xlsb files
//or if this is a general exception
try {
tryToAddWorksheet(data);
} catch (XSSFBParseException e) {
if (tryOldFormat(data)) {
log.log(POILogger.WARN, "This file was written with a beta version of Excel. "+
"POI will try to parse the file as a regular xlsb.");
} else {
throw e;
}
}
}
private void tryToAddWorksheet(byte[] data) throws XSSFBParseException {
int offset = 0; int offset = 0;
//this is the sheet state #2.5.142 //this is the sheet state #2.5.142
long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
@ -158,15 +180,37 @@ public class XSSFBReader extends XSSFReader {
} }
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
offset += XSSFBUtils.readXLWideString(data, offset, sb); offset += XSSFBUtils.readXLWideString(data, offset, sb);
String relId = sb.toString(); String relId = sb.toString(); sb.setLength(0);
sb.setLength(0); offset += XSSFBUtils.readXLWideString(data, offset, sb);
XSSFBUtils.readXLWideString(data, offset, sb);
String name = sb.toString(); String name = sb.toString();
if (relId != null && relId.trim().length() > 0) { if (relId != null && relId.trim().length() > 0) {
sheets.add(new XSSFSheetRef(relId, name)); sheets.add(new XSSFSheetRef(relId, name));
} }
} }
private boolean tryOldFormat(byte[] data) throws XSSFBParseException {
//undocumented what is contained in these 8 bytes.
//for the non-beta xlsb files, this would be 4, not 8.
int offset = 8;
long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
if (iTabID < 1 || iTabID > 0x0000FFFFL) {
throw new XSSFBParseException("table id out of range: "+iTabID);
}
StringBuilder sb = new StringBuilder();
offset += XSSFBUtils.readXLWideString(data, offset, sb);
String relId = sb.toString();
sb.setLength(0);
offset += XSSFBUtils.readXLWideString(data, offset, sb);
String name = sb.toString();
if (relId != null && relId.trim().length() > 0) {
sheets.add(new XSSFSheetRef(relId, name));
}
if (offset == data.length) {
return true;
}
return false;
}
List<XSSFSheetRef> getSheets() { List<XSSFSheetRef> getSheets() {
return sheets; return sheets;
} }

View File

@ -99,4 +99,13 @@ public class TestXSSFBEventBasedExcelExtractor {
} }
} }
@Test
public void testBeta() throws Exception {
XSSFEventBasedExcelExtractor extractor = getExtractor("Simple.xlsb");
extractor.setIncludeCellComments(true);
String text = extractor.getText();
assertContains(text,
"This is an example spreadsheet created with Microsoft Excel 2007 Beta 2.");
}
} }