From 9649ca955ae98b28ed34d9a6ada01e32ea9309e7 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Tue, 24 Mar 2020 10:58:16 +0000 Subject: [PATCH] Bug 64244: Take the replacement of RichtText strings into account when computing length of strings git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1875571 13f79535-47bb-0310-9956-ffa450edef68 --- .../xssf/usermodel/XSSFRichTextString.java | 44 +++++++++++++++---- .../usermodel/TestXSSFRichTextString.java | 25 +++++++++++ 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java index 225b9eb860..7af9ee4cac 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRichTextString.java @@ -201,7 +201,7 @@ public class XSSFRichTextString implements RichTextString { CTRElt lt = st.addNewR(); lt.setT(text); preserveSpaces(lt.xgetT()); - + if (font != null) { CTRPrElt pr = lt.addNewRPr(); setRunAttributes(font.getCTFont(), pr); @@ -247,7 +247,7 @@ public class XSSFRichTextString implements RichTextString { } /** - * Does this string have any explicit formatting applied, or is + * Does this string have any explicit formatting applied, or is * it just text in the default style? */ public boolean hasFormatting() { @@ -495,6 +495,32 @@ public class XSSFRichTextString implements RichTextString { } } + /** + * Optimized counting of actual length of a string + * considering the replacement of _xHHHH_ that needs + * to be applied to rich-text strings. + * + * @param value The string + * @return The length of the string, 0 if the string is null. + */ + static int utfLength(String value) { + if(value == null) { + return 0; + } + if (!value.contains("_x")) { + return value.length(); + } + + Matcher matcher = utfPtrn.matcher(value); + int count = 0; + while (matcher.find()) { + count++; + } + + // Length of pattern is 7 (_xHHHH_), and we replace it with one character + return value.length() - (count * 6); + } + /** * For all characters which cannot be represented in XML as defined by the XML 1.0 specification, * the characters are escaped using the Unicode numerical character representation escape character @@ -512,7 +538,7 @@ public class XSSFRichTextString implements RichTextString { if(value == null || !value.contains("_x")) { return value; } - + StringBuilder buf = new StringBuilder(); Matcher m = utfPtrn.matcher(value); int idx = 0; @@ -528,13 +554,13 @@ public class XSSFRichTextString implements RichTextString { idx = m.end(); } - - // small optimization: don't go via StringBuilder if not necessary, - // the encodings are very rare, so we should almost always go via this shortcut. + + // small optimization: don't go via StringBuilder if not necessary, + // the encodings are very rare, so we should almost always go via this shortcut. if(idx == 0) { return value; } - + buf.append(value.substring(idx)); return buf.toString(); } @@ -577,7 +603,7 @@ public class XSSFRichTextString implements RichTextString { String txt = r.getT(); CTRPrElt fmt = r.getRPr(); - length += txt.length(); + length += utfLength(txt); formats.put(length, fmt); } return formats; @@ -605,7 +631,7 @@ public class XSSFRichTextString implements RichTextString { } return stf; } - + private ThemesTable getThemesTable() { if(styles == null) return null; return styles.getTheme(); diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java index db8dc231e8..8d4cb67a16 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFRichTextString.java @@ -563,4 +563,29 @@ public final class TestXSSFRichTextString { assertEquals("Rich Text\r\nTest", wb.getSheetAt(0).getRow(1).getCell(1).getRichStringCellValue().getString()); wb.close(); } + + @Test + public void testUtfDecode_withApplyFont() { + XSSFFont font = new XSSFFont(); + font.setBold(true); + font.setFontHeightInPoints((short) 14); + + CTRst st = CTRst.Factory.newInstance(); + st.setT("abc_x000D_2ef_x000D_"); + XSSFRichTextString rt = new XSSFRichTextString(st); + rt.applyFont(font); + assertEquals("abc\r2ef\r", rt.getString()); + } + + @Test + public void testUtfLength() { + assertEquals(0, XSSFRichTextString.utfLength(null)); + assertEquals(0, XSSFRichTextString.utfLength("")); + + assertEquals(3, XSSFRichTextString.utfLength("abc")); + assertEquals(3, XSSFRichTextString.utfLength("ab_x0032_")); + assertEquals(3, XSSFRichTextString.utfLength("a_x0032__x0032_")); + assertEquals(3, XSSFRichTextString.utfLength("_x0032_a_x0032_")); + assertEquals(3, XSSFRichTextString.utfLength("_x0032__x0032_a")); + } }