From 4013ffa2203cf67a0a8138b641afffdf69a855b2 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 21 Jan 2021 21:04:24 +0000 Subject: [PATCH] [bug-65096] XLSX Streaming XML not correctly reading multiple inline Strings git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1885770 13f79535-47bb-0310-9956-ffa450edef68 --- .../eventusermodel/XSSFSheetXMLHandler.java | 154 ++++++++++-------- .../TestXSSFSheetXMLHandler.java | 55 +++++++ test-data/spreadsheet/InlineString.xlsx | Bin 0 -> 8401 bytes 3 files changed, 137 insertions(+), 72 deletions(-) create mode 100644 src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFSheetXMLHandler.java create mode 100644 test-data/spreadsheet/InlineString.xlsx diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index f7ed872f45..302f22f89d 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -204,7 +204,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler { if (isTextTag(localName)) { vIsOpen = true; // Clear contents cache - value.setLength(0); + if (!isIsOpen) { + value.setLength(0); + } } else if ("is".equals(localName)) { // Inline string outer tag isIsOpen = true; @@ -307,86 +309,19 @@ public class XSSFSheetXMLHandler extends DefaultHandler { return; } - String thisStr = null; - // v => contents of a cell if (isTextTag(localName)) { vIsOpen = false; - // Process the value contents as required, now we have it all - switch (nextDataType) { - case BOOLEAN: - char first = value.charAt(0); - thisStr = first == '0' ? "FALSE" : "TRUE"; - break; - - case ERROR: - thisStr = "ERROR:" + value; - break; - - case FORMULA: - if(formulasNotResults) { - thisStr = formula.toString(); - } else { - String fv = value.toString(); - - if (this.formatString != null) { - try { - // Try to use the value as a formattable number - double d = Double.parseDouble(fv); - thisStr = formatter.formatRawCellContents(d, this.formatIndex, this.formatString); - } catch(NumberFormatException e) { - // Formula is a String result not a Numeric one - thisStr = fv; - } - } else { - // No formatting applied, just do raw value in all cases - thisStr = fv; - } - } - break; - - case INLINE_STRING: - // TODO: Can these ever have formatting on them? - XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); - thisStr = rtsi.toString(); - break; - - case SST_STRING: - String sstIndex = value.toString(); - try { - int idx = Integer.parseInt(sstIndex); - RichTextString rtss = sharedStringsTable.getItemAt(idx); - thisStr = rtss.toString(); - } - catch (NumberFormatException ex) { - LOG.log(POILogger.ERROR, "Failed to parse SST index '", sstIndex, ex); - } - break; - - case NUMBER: - String n = value.toString(); - if (this.formatString != null && n.length() > 0) - thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); - else - thisStr = n; - break; - - default: - thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; - break; + if (!isIsOpen) { + outputCell(); } - - // Do we have a comment for this cell? - checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); - XSSFComment comment = comments != null ? comments.findCellComment(new CellAddress(cellRef)) : null; - - // Output - output.cell(cellRef, thisStr, comment); } else if ("f".equals(localName)) { fIsOpen = false; } else if ("is".equals(localName)) { isIsOpen = false; + outputCell(); + value.setLength(0); } else if ("row".equals(localName)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); @@ -433,6 +368,81 @@ public class XSSFSheetXMLHandler extends DefaultHandler { } } + private void outputCell() { + String thisStr = null; + + // Process the value contents as required, now we have it all + switch (nextDataType) { + case BOOLEAN: + char first = value.charAt(0); + thisStr = first == '0' ? "FALSE" : "TRUE"; + break; + + case ERROR: + thisStr = "ERROR:" + value; + break; + + case FORMULA: + if(formulasNotResults) { + thisStr = formula.toString(); + } else { + String fv = value.toString(); + + if (this.formatString != null) { + try { + // Try to use the value as a formattable number + double d = Double.parseDouble(fv); + thisStr = formatter.formatRawCellContents(d, this.formatIndex, this.formatString); + } catch(NumberFormatException e) { + // Formula is a String result not a Numeric one + thisStr = fv; + } + } else { + // No formatting applied, just do raw value in all cases + thisStr = fv; + } + } + break; + + case INLINE_STRING: + // TODO: Can these ever have formatting on them? + XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); + thisStr = rtsi.toString(); + break; + + case SST_STRING: + String sstIndex = value.toString(); + try { + int idx = Integer.parseInt(sstIndex); + RichTextString rtss = sharedStringsTable.getItemAt(idx); + thisStr = rtss.toString(); + } + catch (NumberFormatException ex) { + LOG.log(POILogger.ERROR, "Failed to parse SST index '", sstIndex, ex); + } + break; + + case NUMBER: + String n = value.toString(); + if (this.formatString != null && n.length() > 0) + thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); + else + thisStr = n; + break; + + default: + thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; + break; + } + + // Do we have a comment for this cell? + checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); + XSSFComment comment = comments != null ? comments.findCellComment(new CellAddress(cellRef)) : null; + + // Output + output.cell(cellRef, thisStr, comment); + } + /** * Do a check for, and output, comments in otherwise empty cells. */ diff --git a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFSheetXMLHandler.java b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFSheetXMLHandler.java new file mode 100644 index 0000000000..4e02aadf24 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFSheetXMLHandler.java @@ -0,0 +1,55 @@ +package org.apache.poi.xssf.eventusermodel; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.util.XMLHelper; +import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; +import org.apache.poi.xssf.usermodel.XSSFComment; +import org.junit.jupiter.api.Test; +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +import java.io.InputStream; +import java.util.Iterator; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestXSSFSheetXMLHandler { + private static final POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); + + @Test + public void testInlineString() throws Exception { + try (OPCPackage xlsxPackage = OPCPackage.open(_ssTests.openResourceAsStream("InlineString.xlsx"))) { + final XSSFReader reader = new XSSFReader(xlsxPackage); + + final Iterator iter = reader.getSheetsData(); + + try (InputStream stream = iter.next()) { + final XMLReader sheetParser = XMLHelper.getSaxParserFactory().newSAXParser().getXMLReader(); + + sheetParser.setContentHandler(new XSSFSheetXMLHandler(reader.getStylesTable(), + new ReadOnlySharedStringsTable(xlsxPackage), new SheetContentsHandler() { + + int cellCount = 0; + + @Override + public void startRow(final int rowNum) { + } + + @Override + public void endRow(final int rowNum) { + } + + @Override + public void cell(final String cellReference, final String formattedValue, + final XSSFComment comment) { + assertEquals("\uD83D\uDE1Cmore text", formattedValue); + assertEquals(cellCount++, 0); + } + }, false)); + + sheetParser.parse(new InputSource(stream)); + } + } + } +} diff --git a/test-data/spreadsheet/InlineString.xlsx b/test-data/spreadsheet/InlineString.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..308d669ccbcdd82dda4e67d0a0f5b6a1afc841e5 GIT binary patch literal 8401 zcmeHM1y>wdw{2+Lf#kZ==hVHo&OZCxeU;^aaJT>j01^NIpa7WeXIdM=006OY000gE306-6 z?BHVI;9{ii>1Y8lVD+%KBYy`6OP>vZg+Bk^<3D%?N)iVYJJ~U%E~IZI)>vfcDxV?o z>;?8HQaM-ywY zy8*38w@{#WwtY!aa{-IL|HByO>Ilxbtqp^>;mInC3|u6o_Z+^PVV*LDwi<(}YJ>B2 zirgm%eLdHNu^H?Fdq`{x#{Absgr-k?PZYR`DnCS=7U_1!f|y!;^nJWq>0d{lDqlD9 z;#nK9=w&?%pa>kD0w{wbtMN-Y9fn?ZJ;i%3KXBMVK7T{Aj6>43NNnDi6%%W&NJweV zyp-3IC8;F9#~A2a(A!tf-=?%@M&jd;KZ3XhCw2LNdyci$c+w`0+1Exv{W5fKsdr1r zdv|zUFKGWLdCQOU9svM&cmM*F{|3t%4R)$iXspRYQHKhJrIE9R9fXbb$N9f-{13+9 zFGDYhS5WF?M+-TSz76fYn3#{n6qj=slWw9=_w|>VdsGwso|<^Rm5u~cohS(S!mq{m zwtseBIC{I6;(Ud>BoZ4}h^p4TG$iHP$rX{2);amPQ^|4{mfOVH#Ch5aSxmt`;$;%a-PA?0;^+1m+28Ge(= z`CI5u!vy4aN7L|nA*NQ-Wj?)j6lYgN8fw-8Hsz*S&VrPlM&@>HCz2VhICnlQif?*V zDY>yPIfq{K&}5(a=~i_a62IMv@gSKpEK_6RyN z9(HW*U}umi7!3NsTO}Iy;0!^+JKwxJ#4dM&j5>g=ARdzL$C(edRp)YfeeiVJFkhl< zR5fl6E;x|4W+xI|Be98RAN6cp3dBHeJ=L_jfeEApgX7dMrnt2eA_MsbK3v&n<+O1N zN@WCUzB5rfiF5tFk)ZTI@-bL{9<_oKkiC^}`9YmnM>8d*B!526S_jC4N&zY|rccqF zlzkpqW-dRGWMQ_e$Fm;YAQiSoL1TuVH%eT!M2-mkIv zPJf)`+1yD)aSzNm_qMR09-DP_s-3_F^#)$VL|GK-UKV8Vp&x~1^Wb&0h@;5;DLZAS}Ri`A_jtXStq_L-_TBTNw3+)#L?9yQbRur;oPbFC~NO8Z|J)#jy_WXpSXi4}0 zhc_DIEat7TuUf)UkYB8D<+c^*(2W=PK8s-J=IQc0;?}(aSJ~aT?3smI=X9etZ_pE> z47~+hQxTpx(pS8jo+Na0ZK2Cv+Mm{g8QYa8+%6zSght72X&xqpgKYML_O_z%3>B+U zm-Mv!Vh�*<)=o%E{dr$hTiIUVUxr^AfYI=MR`0CLT0C2fa+guFrA(I7E$lSbXfe3+6U;$L+T!Dh~Vaz^_h@t@y-tJ(-F*u8)O+K8Mq{E zLl#@^irYZSMis!Isth17;UmFGB+zWg>g1qq+Bo+?1B5<3ythOF`m)^T2Zh@|0U02g zm~sNW(+FiPA^;K$6p;T*T))EdPZEQH)|60K|7Ra1stR&l?3gX6_o3|W8E$x3(=Ke3 zyBa%>kou|_W@#z7d`}mN85<3>zsa!y!6BXpgFS8+JQyoLoU?YO!U$|QFC4oAA$aiM z$siEPbg%SD;0t&h?A`61;yqN1bO=s^XnZeo7B2tyOM05eq3`o4B~KehL%40fvb%EH zLWW8Ep-Ft8dd>g(mD+A;V0B#`c{+pT`a^@n>^Z{QKu@xflgG$1!swoWk19y1S| z9$2pR^-L^6zw}=r1AwkSb`-f(iHJ;&PyCU?A=VZaE)ce#4abjUUK%g= zqxcNDke+Mj#1`6qB?V6*!xx4Mhr#Y#5H58)>|sr%7?Uwft*Rx>`~bck8e4ZLZ{equ zS~Z|KQJ{qv+`H(mNxJp$^Y(~ux<1b*iAX3Inxd$b#wUo(i6@27Cq^_%pgOGr8*zm^ zD%)gKi%rR2-C0+)czvlXtu%fk&k&5sutVqt9}+GfdutQVH4?~VqYozGm2W*i1L92D z^>L5vwDC(}0j*2SZ8<4PRlZ%gOZZNGzs^(N8mzgB+A?C)QKBbAyqC!9`YuNB4yV3w zV4t?a&V9k%AD?*bBRMQwZnsZCSytYm`7p=fC3+woEjx3t;G$vaYomwX%ENX7%ukAl z06;hSk5${xS#zxOw??dXD0$xelXGiCdW+vfLK?J?iqDj%T1rS2 ze`y$Ks_gxelY(j&JxSKj`$MzRGDn;^dKCSlR#62_PQ_|xrFdBIrs_T^#muou7KhN2 z74OjtdTeZ8>nobO4T3s0{1HuB$#n!B35Vp)`we_`0jpsIniC?o|RNLfZqz=fD8L_9m0D({?=zaxFj8fGB)z~=*DTE#5;iylo7T8q zodI1|G=J;r0>V);Qf!M_23Z)j&$?!;#=w`N|v~+ozYu z$6E;mi!aYMlwwF{W=Tj>4=KVc4$GF!4rn7y0x)ilV4h}nv5R-bKf~H|E8HLGg*DV8 z-|lcN-|Ltq(e8$C7H$PiH`?bHIEn}mW^on3Oxi({dCHGx0^o&TX6$Nb-J0OJI4V#f)(6RLx*GmyqDehoqS%LN%ADZbI)mQHb`i3D>|z!-y88aT4jSYzb5E zSB6cE z=%+n6-po>Ju2|=yNZcQQ|J)?*{t~WvBnpwea8{*}uE7@OE9GT6Pb%|VA{EQ36YWzv z&Y75DG+hac`L;I^@@MKET(sF{xuLF1cyEVGZxl~FVxq?str|)i(?6iX$6t9c=toPW z$mVbAiX1GIj_>$xs=Mo?Aka^b5t_(b605ipsu*F2;~HAMGMrA+9&yngQQl6;(S7c; zj>2w_hv%_=z9iMM9x65?7Gg<~xM7A@P@UEsRfh64?YUP1*M~G1Wm7gT@)#Gj=|(3{ zTUVQ+7O7koswkJLX!cR|K!Rmvp~2)Eo$ki8&~Z+s2Ga&&XRht&M_3>!O1k<0T3WT? zE+L1j8e60~Mk;R570xDM=cETh1J$@5!8^5jx5qMB4wvpG$&U#tLnRXh^%E5P5jf&f zy%W1wj9C1V(78uQ-q|y0`D2ReuBJa@BFk{XbP?+H;B2R|37IEU$xSXL`@yf6B^ zNLoTYP14aq79G<96x`s$%GK6pV?ul#?jITCygkSfkWr2ow|?cmV%%>%*&Hz{kocNP zFjCp=U8;Fj2Typ}mk}iQH5$Z=>JQ<5EoB73&@X3C0D~9G|P^8 zFFNIidA;B7w#tlLJ1{z~yxE;ix?5~I5MF2)MQ!!9`~F7J&5%T}cEmSaabpZJG!Ic7 z%}_+CHtTjSl(CgjlAjwn_LX%)q5N2A&)EGit1u<#gS{?xbFDpxv-dR}kwg~DH% zw^MH;9iF0gj#FkY$ssk6t%f&Va4H?WBHE<7@;(-q8l#gTE|E!?oZ0;9DS3SQMlG@| z3-R`4I_mzp2>$a*{W)+VuM3O6ErEh{c%!wg>3Eu1{K5@o%v#=v=Hxz#V|W?JurkGE z!(7qY^EvF=B!v_*@@nLa(1~m=GKXDHix0mNsPv6go4kD38%-j))kmZ$}Q*ECNxhYSVWjzlPI)81rC-G)&*!zGwtH-3m zF$~qo6we!WsaGv_h~quVRCdr^HJH35P=?s}y53I`j5hzmiBs^|^()WDZ~1|Jq8F>1 z{c7#Dh?3}wc6Zw@z*f*6N#WZ9yaM9V!oxf?ayno?LmFm`?{(`0!IVL?Iuq*Felc37 z$gu){x^izj$ze!z57~S4DR79C!OQSRVN^2&xk_7clR}+u?_BZN1rU*{9U2_|ik`(&z2wvR3VOUYT%G*q@8H%so?Z z`@+1B`>iOcRk(%uvwSKY^*P%oREpn=L)V0Gv#q#duVrIT-iqB5|6YUlL2Gc^`;YKp z&@j%1{&D{ZXKNE@3v)FWXB!8rpHa;dEr!y;jtMvj^h5Hl`|w0E*XB_m+c4rQH+$GV zT1&ont$Zm@LQPUluDi2EwWsJf9e2Hyr6Ib)r3hIuH_NaQpGQq4D7g0*g>LacRDY2kjLLJ%G?ANtu5k#iDQBs zLXRH(T_$CLdlCB-3Q-*>7_p!WPIIuCvNPBb!e$0`w)kW3;=k)m=qHVe|3$lhY1*`| z!6ZZT%z!*LmL9gEYS)H{$BlV5_WZ!<3A&g?`zYk#O#_#6<6JT0HSA8mu2mQDR6s5%4y^<<;dV}iv(WFy zFAa;$jcYoQW!1KYM3S-rhL9RMP=r(FAak&0T-YQ2iKde2N_wsU26pxAXWpSU;zu!) zK^pr*A&P1I8e5NOce%SVEAuu#kF>b2Tid16+I%;Niha9sQP7j(Fi}6gR|=OcoD_(R zIycGdx70-WIYHdJIYQ_GcfXlIY~cyQkd26ATnUoqY~hy|F;~XxH5172X32J0h!P-kB5(*uLV*=43~5 zB%bkO|Gpu5Swz@=@};~w+Uar4w#`IPUMi}KpNGBHyGwgagg3IMv_d#x z6$rEAQ8h}^I!`~}MzH{L;_XsH%mgmJf-a=kUH}O;x+#odO_oWVHhO{gw!E}sLR|v_ zN4@Mviu|(yU_r=NKEpJnYdUYzCi9NCV70T(U@IT#CJzvj;iRXi`s_FJ!Yg>i9Z~dg z5isWsh53AXdFb`TNh`bX{pxAw*|C?3FYA?o6&KCoedRsspXnY3mIa#H|9#W<&(HN| z`!8F-%5r~q@b~ShKMjAhDNs!OvRU=J;qQ|DU#88_TIx3e|99iR%Zq=R0ssmqKaKw% zvGIqWr$3qP-!v!cKOB)mTYt3w>%>14C=|etCjd~fiV$$M+QKQb{|)BPc7JoZL>hYi zdyjwDX?{BRd8&ohjnLu#uG;+W;P*=NF9$q$KOOv4cm6ucf3o_oqlAMBZP0f^e&_gK zM@aqE6#d5t|7_)8hv0u)|K7pBGw@&C0{}ij-|`PW{@wiVsq|NKZfL3Uf98KAR%JPO TXp{f|D9~Rol=4qVe_Z_!a@A*} literal 0 HcmV?d00001