Bug 64244: Take the replacement of RichtText strings into account when computing length of strings

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1875571 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2020-03-24 10:58:16 +00:00
parent 7a340230fd
commit 9649ca955a
2 changed files with 60 additions and 9 deletions

View File

@ -495,6 +495,32 @@ public class XSSFRichTextString implements RichTextString {
} }
} }
/**
* Optimized counting of actual length of a string
* considering the replacement of _xHHHH_ that needs
* to be applied to rich-text strings.
*
* @param value The string
* @return The length of the string, 0 if the string is null.
*/
static int utfLength(String value) {
if(value == null) {
return 0;
}
if (!value.contains("_x")) {
return value.length();
}
Matcher matcher = utfPtrn.matcher(value);
int count = 0;
while (matcher.find()) {
count++;
}
// Length of pattern is 7 (_xHHHH_), and we replace it with one character
return value.length() - (count * 6);
}
/** /**
* For all characters which cannot be represented in XML as defined by the XML 1.0 specification, * For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
* the characters are escaped using the Unicode numerical character representation escape character * the characters are escaped using the Unicode numerical character representation escape character
@ -577,7 +603,7 @@ public class XSSFRichTextString implements RichTextString {
String txt = r.getT(); String txt = r.getT();
CTRPrElt fmt = r.getRPr(); CTRPrElt fmt = r.getRPr();
length += txt.length(); length += utfLength(txt);
formats.put(length, fmt); formats.put(length, fmt);
} }
return formats; return formats;

View File

@ -563,4 +563,29 @@ public final class TestXSSFRichTextString {
assertEquals("Rich Text\r\nTest", wb.getSheetAt(0).getRow(1).getCell(1).getRichStringCellValue().getString()); assertEquals("Rich Text\r\nTest", wb.getSheetAt(0).getRow(1).getCell(1).getRichStringCellValue().getString());
wb.close(); wb.close();
} }
@Test
public void testUtfDecode_withApplyFont() {
XSSFFont font = new XSSFFont();
font.setBold(true);
font.setFontHeightInPoints((short) 14);
CTRst st = CTRst.Factory.newInstance();
st.setT("abc_x000D_2ef_x000D_");
XSSFRichTextString rt = new XSSFRichTextString(st);
rt.applyFont(font);
assertEquals("abc\r2ef\r", rt.getString());
}
@Test
public void testUtfLength() {
assertEquals(0, XSSFRichTextString.utfLength(null));
assertEquals(0, XSSFRichTextString.utfLength(""));
assertEquals(3, XSSFRichTextString.utfLength("abc"));
assertEquals(3, XSSFRichTextString.utfLength("ab_x0032_"));
assertEquals(3, XSSFRichTextString.utfLength("a_x0032__x0032_"));
assertEquals(3, XSSFRichTextString.utfLength("_x0032_a_x0032_"));
assertEquals(3, XSSFRichTextString.utfLength("_x0032__x0032_a"));
}
} }