Added XSSF EventModel support for inline strings. Adds unit test for this for the event model extractor, and another for the usermodel extractor which already supported it

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1045020 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-12-13 05:07:19 +00:00
parent 9f8011aa3d
commit 5ff8131955
4 changed files with 72 additions and 3 deletions

View File

@ -34,6 +34,7 @@
<changes> <changes>
<release version="3.8-beta1" date="2010-??-??"> <release version="3.8-beta1" date="2010-??-??">
<action dev="POI-DEVELOPERS" type="add">Added inline string support to XSSF EventModel</action>
<action dev="POI-DEVELOPERS" type="fix">50246 - Properly position GutsRecord when reading HSSF workbooks</action> <action dev="POI-DEVELOPERS" type="fix">50246 - Properly position GutsRecord when reading HSSF workbooks</action>
<action dev="POI-DEVELOPERS" type="add">48539 - Added implementation for MROUND(), VAR() and VARP()</action> <action dev="POI-DEVELOPERS" type="add">48539 - Added implementation for MROUND(), VAR() and VARP()</action>
<action dev="POI-DEVELOPERS" type="add">50446 - Code cleanup and optimizations to keep some IDE quiet</action> <action dev="POI-DEVELOPERS" type="add">50446 - Code cleanup and optimizations to keep some IDE quiet</action>

View File

@ -61,6 +61,8 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private boolean vIsOpen; private boolean vIsOpen;
// Set when F start element is seen // Set when F start element is seen
private boolean fIsOpen; private boolean fIsOpen;
// Set when an Inline String "is" is seen
private boolean isIsOpen;
// Set when a header/footer element is seen // Set when a header/footer element is seen
private boolean hfIsOpen; private boolean hfIsOpen;
@ -113,13 +115,33 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults); this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
} }
private boolean isTextTag(String name) {
if("v".equals(name)) {
// Easy, normal v text tag
return true;
}
if("inlineStr".equals(name)) {
// Easy inline string
return true;
}
if("t".equals(name) && isIsOpen) {
// Inline string <is><t>...</t></is> pair
return true;
}
// It isn't a text tag
return false;
}
public void startElement(String uri, String localName, String name, public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException { Attributes attributes) throws SAXException {
if ("inlineStr".equals(name) || "v".equals(name)) { if (isTextTag(name)) {
vIsOpen = true; vIsOpen = true;
// Clear contents cache // Clear contents cache
value.setLength(0); value.setLength(0);
} else if ("is".equals(name)) {
// Inline string outer tag
isIsOpen = true;
} else if ("f".equals(name)) { } else if ("f".equals(name)) {
// Clear contents cache // Clear contents cache
formula.setLength(0); formula.setLength(0);
@ -202,7 +224,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
String thisStr = null; String thisStr = null;
// v => contents of a cell // v => contents of a cell
if ("v".equals(name)) { if (isTextTag(name)) {
vIsOpen = false; vIsOpen = false;
// Process the value contents as required, now we have it all // Process the value contents as required, now we have it all
@ -225,7 +247,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
break; break;
case INLINE_STRING: case INLINE_STRING:
// TODO: have seen an example of this, so it's untested. // TODO: Can these ever have formatting on them?
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString(); thisStr = rtsi.toString();
break; break;
@ -259,6 +281,8 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
output.cell(cellRef, thisStr); output.cell(cellRef, thisStr);
} else if ("f".equals(name)) { } else if ("f".equals(name)) {
fIsOpen = false; fIsOpen = false;
} else if ("is".equals(name)) {
isIsOpen = false;
} else if ("row".equals(name)) { } else if ("row".equals(name)) {
output.endRow(); output.endRow();
} }

View File

@ -114,6 +114,28 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
)); ));
} }
public void testInlineStrings() throws Exception {
XSSFEventBasedExcelExtractor extractor = getExtractor("InlineStrings.xlsx");
extractor.setFormulasNotResults(true);
String text = extractor.getText();
// Numbers
assertTrue("Unable to find expected word in text\n" + text, text.contains("43"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("22"));
// Strings
assertTrue("Unable to find expected word in text\n" + text, text.contains("ABCDE"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("Long Text"));
// Inline Strings
assertTrue("Unable to find expected word in text\n" + text, text.contains("1st Inline String"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("And More"));
// Formulas
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
}
/** /**
* Test that we return pretty much the same as * Test that we return pretty much the same as
* ExcelExtractor does, when we're both passed * ExcelExtractor does, when we're both passed

View File

@ -174,4 +174,26 @@ public final class TestXSSFExcelExtractor extends TestCase {
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
} }
public void testInlineStrings() {
XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx");
extractor.setFormulasNotResults(true);
String text = extractor.getText();
// Numbers
assertTrue("Unable to find expected word in text\n" + text, text.contains("43"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("22"));
// Strings
assertTrue("Unable to find expected word in text\n" + text, text.contains("ABCDE"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("Long Text"));
// Inline Strings
assertTrue("Unable to find expected word in text\n" + text, text.contains("1st Inline String"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("And More"));
// Formulas
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
}
} }