59021 -- fix content extraction from namespaced elements in XSSFEventBasedExcelExtractor

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1730992 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2016-02-18 01:49:59 +00:00
parent 9ee05a1452
commit 0b0911db6b
3 changed files with 47 additions and 27 deletions

View File

@ -41,7 +41,7 @@ import org.xml.sax.helpers.DefaultHandler;
*/ */
public class XSSFSheetXMLHandler extends DefaultHandler { public class XSSFSheetXMLHandler extends DefaultHandler {
private static final POILogger logger = POILogFactory.getLogger(XSSFSheetXMLHandler.class); private static final POILogger logger = POILogFactory.getLogger(XSSFSheetXMLHandler.class);
static final String SPREADSHEETML_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
/** /**
* These are the different kinds of cells we support. * These are the different kinds of cells we support.
* We keep track of the current one between * We keep track of the current one between
@ -186,17 +186,21 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
@Override @Override
@SuppressWarnings("unused") @SuppressWarnings("unused")
public void startElement(String uri, String localName, String name, public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException { Attributes attributes) throws SAXException {
if (isTextTag(name)) { if (uri != null && ! uri.equals(SPREADSHEETML_NS)) {
return;
}
if (isTextTag(localName)) {
vIsOpen = true; vIsOpen = true;
// Clear contents cache // Clear contents cache
value.setLength(0); value.setLength(0);
} else if ("is".equals(name)) { } else if ("is".equals(localName)) {
// Inline string outer tag // Inline string outer tag
isIsOpen = true; isIsOpen = true;
} else if ("f".equals(name)) { } else if ("f".equals(localName)) {
// Clear contents cache // Clear contents cache
formula.setLength(0); formula.setLength(0);
@ -231,14 +235,14 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
fIsOpen = true; fIsOpen = true;
} }
} }
else if("oddHeader".equals(name) || "evenHeader".equals(name) || else if("oddHeader".equals(localName) || "evenHeader".equals(localName) ||
"firstHeader".equals(name) || "firstFooter".equals(name) || "firstHeader".equals(localName) || "firstFooter".equals(localName) ||
"oddFooter".equals(name) || "evenFooter".equals(name)) { "oddFooter".equals(localName) || "evenFooter".equals(localName)) {
hfIsOpen = true; hfIsOpen = true;
// Clear contents cache // Clear contents cache
headerFooter.setLength(0); headerFooter.setLength(0);
} }
else if("row".equals(name)) { else if("row".equals(localName)) {
String rowNumStr = attributes.getValue("r"); String rowNumStr = attributes.getValue("r");
if(rowNumStr != null) { if(rowNumStr != null) {
rowNum = Integer.parseInt(rowNumStr) - 1; rowNum = Integer.parseInt(rowNumStr) - 1;
@ -248,7 +252,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
output.startRow(rowNum); output.startRow(rowNum);
} }
// c => cell // c => cell
else if ("c".equals(name)) { else if ("c".equals(localName)) {
// Set up defaults. // Set up defaults.
this.nextDataType = xssfDataType.NUMBER; this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1; this.formatIndex = -1;
@ -269,11 +273,13 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
else { else {
// Number, but almost certainly with a special style or format // Number, but almost certainly with a special style or format
XSSFCellStyle style = null; XSSFCellStyle style = null;
if (cellStyleStr != null) { if (stylesTable != null) {
int styleIndex = Integer.parseInt(cellStyleStr); if (cellStyleStr != null) {
style = stylesTable.getStyleAt(styleIndex); int styleIndex = Integer.parseInt(cellStyleStr);
} else if (stylesTable.getNumCellStyles() > 0) { style = stylesTable.getStyleAt(styleIndex);
style = stylesTable.getStyleAt(0); } else if (stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
}
} }
if (style != null) { if (style != null) {
this.formatIndex = style.getDataFormat(); this.formatIndex = style.getDataFormat();
@ -286,12 +292,17 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
} }
@Override @Override
public void endElement(String uri, String localName, String name) public void endElement(String uri, String localName, String qName)
throws SAXException { throws SAXException {
if (uri != null && ! uri.equals(SPREADSHEETML_NS)) {
return;
}
String thisStr = null; String thisStr = null;
// v => contents of a cell // v => contents of a cell
if (isTextTag(name)) { if (isTextTag(localName)) {
vIsOpen = false; vIsOpen = false;
// Process the value contents as required, now we have it all // Process the value contents as required, now we have it all
@ -364,11 +375,11 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
// Output // Output
output.cell(cellRef, thisStr, comment); output.cell(cellRef, thisStr, comment);
} else if ("f".equals(name)) { } else if ("f".equals(localName)) {
fIsOpen = false; fIsOpen = false;
} else if ("is".equals(name)) { } else if ("is".equals(localName)) {
isIsOpen = false; isIsOpen = false;
} else if ("row".equals(name)) { } else if ("row".equals(localName)) {
// Handle any "missing" cells which had comments attached // Handle any "missing" cells which had comments attached
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW);
@ -377,19 +388,19 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
// some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well
nextRowNum = rowNum + 1; nextRowNum = rowNum + 1;
} else if ("sheetData".equals(name)) { } else if ("sheetData".equals(localName)) {
// Handle any "missing" cells which had comments attached // Handle any "missing" cells which had comments attached
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
} }
else if("oddHeader".equals(name) || "evenHeader".equals(name) || else if("oddHeader".equals(localName) || "evenHeader".equals(localName) ||
"firstHeader".equals(name)) { "firstHeader".equals(localName)) {
hfIsOpen = false; hfIsOpen = false;
output.headerFooter(headerFooter.toString(), true, name); output.headerFooter(headerFooter.toString(), true, localName);
} }
else if("oddFooter".equals(name) || "evenFooter".equals(name) || else if("oddFooter".equals(localName) || "evenFooter".equals(localName) ||
"firstFooter".equals(name)) { "firstFooter".equals(localName)) {
hfIsOpen = false; hfIsOpen = false;
output.headerFooter(headerFooter.toString(), false, name); output.headerFooter(headerFooter.toString(), false, localName);
} }
} }

View File

@ -337,4 +337,13 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
extractor.close(); extractor.close();
} }
} }
public void test59021() throws Exception {
XSSFEventBasedExcelExtractor ex =
new XSSFEventBasedExcelExtractor(
XSSFTestDataSamples.openSamplePackage("59021.xlsx"));
String text = ex.getText();
assertTrue("can't find Abhkazia", text.contains("Abkhazia - Fixed"));
assertTrue("can't find 10/02/2016", text.contains("10/02/2016"));
}
} }

Binary file not shown.