diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index f7ed872f45..302f22f89d 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -204,7 +204,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler { if (isTextTag(localName)) { vIsOpen = true; // Clear contents cache - value.setLength(0); + if (!isIsOpen) { + value.setLength(0); + } } else if ("is".equals(localName)) { // Inline string outer tag isIsOpen = true; @@ -307,86 +309,19 @@ public class XSSFSheetXMLHandler extends DefaultHandler { return; } - String thisStr = null; - // v => contents of a cell if (isTextTag(localName)) { vIsOpen = false; - // Process the value contents as required, now we have it all - switch (nextDataType) { - case BOOLEAN: - char first = value.charAt(0); - thisStr = first == '0' ? "FALSE" : "TRUE"; - break; - - case ERROR: - thisStr = "ERROR:" + value; - break; - - case FORMULA: - if(formulasNotResults) { - thisStr = formula.toString(); - } else { - String fv = value.toString(); - - if (this.formatString != null) { - try { - // Try to use the value as a formattable number - double d = Double.parseDouble(fv); - thisStr = formatter.formatRawCellContents(d, this.formatIndex, this.formatString); - } catch(NumberFormatException e) { - // Formula is a String result not a Numeric one - thisStr = fv; - } - } else { - // No formatting applied, just do raw value in all cases - thisStr = fv; - } - } - break; - - case INLINE_STRING: - // TODO: Can these ever have formatting on them? - XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); - thisStr = rtsi.toString(); - break; - - case SST_STRING: - String sstIndex = value.toString(); - try { - int idx = Integer.parseInt(sstIndex); - RichTextString rtss = sharedStringsTable.getItemAt(idx); - thisStr = rtss.toString(); - } - catch (NumberFormatException ex) { - LOG.log(POILogger.ERROR, "Failed to parse SST index '", sstIndex, ex); - } - break; - - case NUMBER: - String n = value.toString(); - if (this.formatString != null && n.length() > 0) - thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); - else - thisStr = n; - break; - - default: - thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; - break; + if (!isIsOpen) { + outputCell(); } - - // Do we have a comment for this cell? - checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); - XSSFComment comment = comments != null ? comments.findCellComment(new CellAddress(cellRef)) : null; - - // Output - output.cell(cellRef, thisStr, comment); } else if ("f".equals(localName)) { fIsOpen = false; } else if ("is".equals(localName)) { isIsOpen = false; + outputCell(); + value.setLength(0); } else if ("row".equals(localName)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); @@ -433,6 +368,81 @@ public class XSSFSheetXMLHandler extends DefaultHandler { } } + private void outputCell() { + String thisStr = null; + + // Process the value contents as required, now we have it all + switch (nextDataType) { + case BOOLEAN: + char first = value.charAt(0); + thisStr = first == '0' ? "FALSE" : "TRUE"; + break; + + case ERROR: + thisStr = "ERROR:" + value; + break; + + case FORMULA: + if(formulasNotResults) { + thisStr = formula.toString(); + } else { + String fv = value.toString(); + + if (this.formatString != null) { + try { + // Try to use the value as a formattable number + double d = Double.parseDouble(fv); + thisStr = formatter.formatRawCellContents(d, this.formatIndex, this.formatString); + } catch(NumberFormatException e) { + // Formula is a String result not a Numeric one + thisStr = fv; + } + } else { + // No formatting applied, just do raw value in all cases + thisStr = fv; + } + } + break; + + case INLINE_STRING: + // TODO: Can these ever have formatting on them? + XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); + thisStr = rtsi.toString(); + break; + + case SST_STRING: + String sstIndex = value.toString(); + try { + int idx = Integer.parseInt(sstIndex); + RichTextString rtss = sharedStringsTable.getItemAt(idx); + thisStr = rtss.toString(); + } + catch (NumberFormatException ex) { + LOG.log(POILogger.ERROR, "Failed to parse SST index '", sstIndex, ex); + } + break; + + case NUMBER: + String n = value.toString(); + if (this.formatString != null && n.length() > 0) + thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); + else + thisStr = n; + break; + + default: + thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; + break; + } + + // Do we have a comment for this cell? + checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); + XSSFComment comment = comments != null ? comments.findCellComment(new CellAddress(cellRef)) : null; + + // Output + output.cell(cellRef, thisStr, comment); + } + /** * Do a check for, and output, comments in otherwise empty cells. */ diff --git a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFSheetXMLHandler.java b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFSheetXMLHandler.java new file mode 100644 index 0000000000..4e02aadf24 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFSheetXMLHandler.java @@ -0,0 +1,55 @@ +package org.apache.poi.xssf.eventusermodel; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.util.XMLHelper; +import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; +import org.apache.poi.xssf.usermodel.XSSFComment; +import org.junit.jupiter.api.Test; +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +import java.io.InputStream; +import java.util.Iterator; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestXSSFSheetXMLHandler { + private static final POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); + + @Test + public void testInlineString() throws Exception { + try (OPCPackage xlsxPackage = OPCPackage.open(_ssTests.openResourceAsStream("InlineString.xlsx"))) { + final XSSFReader reader = new XSSFReader(xlsxPackage); + + final Iterator iter = reader.getSheetsData(); + + try (InputStream stream = iter.next()) { + final XMLReader sheetParser = XMLHelper.getSaxParserFactory().newSAXParser().getXMLReader(); + + sheetParser.setContentHandler(new XSSFSheetXMLHandler(reader.getStylesTable(), + new ReadOnlySharedStringsTable(xlsxPackage), new SheetContentsHandler() { + + int cellCount = 0; + + @Override + public void startRow(final int rowNum) { + } + + @Override + public void endRow(final int rowNum) { + } + + @Override + public void cell(final String cellReference, final String formattedValue, + final XSSFComment comment) { + assertEquals("\uD83D\uDE1Cmore text", formattedValue); + assertEquals(cellCount++, 0); + } + }, false)); + + sheetParser.parse(new InputSource(stream)); + } + } + } +} diff --git a/test-data/spreadsheet/InlineString.xlsx b/test-data/spreadsheet/InlineString.xlsx new file mode 100644 index 0000000000..308d669ccb Binary files /dev/null and b/test-data/spreadsheet/InlineString.xlsx differ