LANG-977: NumericEntityEscaper incorrectly encodes supplementary characters. Thanks to Chris Karcher.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1568612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Benedikt Ritter 2014-02-15 10:35:35 +00:00
parent 408462e80d
commit ab14240150
4 changed files with 37 additions and 26 deletions

View File

@ -22,6 +22,7 @@
<body>
<release version="3.3" date="TBA" description="Bugfix and Feature release">
<action issue="LANG-977" type="fix" dev="britter" due-to="Chris Karcher">NumericEntityEscaper incorrectly encodes supplementary characters</action>
<action issue="LANG-973" type="fix" dev="sebb">Make some private fields final</action>
<action issue="LANG-971" type="fix" dev="sebb">NumberUtils#isNumber(String) fails to reject invalid Octal numbers</action>
<action issue="LANG-972" type="fix" dev="sebb">NumberUtils#isNumber does not allow for hex 0XABCD</action>

View File

@ -185,7 +185,7 @@ public int translate(final CharSequence input, final int index, final Writer out
out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
out.write(CSV_QUOTE);
}
return input.length();
return Character.codePointCount(input, 0, input.length());
}
}
@ -314,7 +314,7 @@ public int translate(final CharSequence input, final int index, final Writer out
if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
out.write(input.toString());
return input.length();
return Character.codePointCount(input, 0, input.length());
}
// strip quotes
@ -326,7 +326,7 @@ public int translate(final CharSequence input, final int index, final Writer out
} else {
out.write(input.toString());
}
return input.length();
return Character.codePointCount(input, 0, input.length());
}
}

View File

@ -89,10 +89,10 @@ public final void translate(final CharSequence input, final Writer out) throws I
pos+= c.length;
continue;
}
// // contract with translators is that they have to understand codepoints
// // and they just took care of a surrogate pair
// contract with translators is that they have to understand codepoints
// and they just took care of a surrogate pair
for (int pt = 0; pt < consumed; pt++) {
pos += Character.charCount(Character.codePointAt(input, pt));
pos += Character.charCount(Character.codePointAt(input, pos));
}
}
}

View File

@ -348,6 +348,9 @@ public void testEscapeXmlSupplementaryCharacters() {
assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
escapeXml.translate("\uD84C\uDFB4"));
assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c &#144308;",
escapeXml.translate("a b c \uD84C\uDFB4"));
}
@Test
@ -377,6 +380,9 @@ public void testEscapeXmlAllCharacters() {
public void testUnescapeXmlSupplementaryCharacters() {
assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
StringEscapeUtils.unescapeXml("&#144308;") );
assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4",
StringEscapeUtils.unescapeXml("a b c &#144308;") );
}
// Tests issue #38569
@ -396,22 +402,24 @@ public void testLang313() {
@Test
public void testEscapeCsvString() throws Exception {
assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar"));
assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar"));
assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar"));
assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar"));
assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar"));
assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar"));
assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar"));
assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar"));
assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar"));
assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar"));
assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
assertEquals("", StringEscapeUtils.escapeCsv(""));
assertEquals(null, StringEscapeUtils.escapeCsv(null));
}
@Test
public void testEscapeCsvWriter() throws Exception {
checkCsvEscapeWriter("foo.bar", "foo.bar");
checkCsvEscapeWriter("\"foo,bar\"", "foo,bar");
checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar");
checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar");
checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar");
checkCsvEscapeWriter("foo.bar", "foo.bar");
checkCsvEscapeWriter("\"foo,bar\"", "foo,bar");
checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar");
checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar");
checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar");
checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
checkCsvEscapeWriter("", null);
checkCsvEscapeWriter("", "");
}
@ -428,11 +436,12 @@ private void checkCsvEscapeWriter(final String expected, final String value) {
@Test
public void testUnescapeCsvString() throws Exception {
assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar"));
assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\""));
assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar"));
assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\""));
assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
assertEquals("", StringEscapeUtils.unescapeCsv(""));
assertEquals(null, StringEscapeUtils.unescapeCsv(null));
@ -441,11 +450,12 @@ public void testUnescapeCsvString() throws Exception {
@Test
public void testUnescapeCsvWriter() throws Exception {
checkCsvUnescapeWriter("foo.bar", "foo.bar");
checkCsvUnescapeWriter("foo,bar", "\"foo,bar\"");
checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\"");
checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\"");
checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\"");
checkCsvUnescapeWriter("foo.bar", "foo.bar");
checkCsvUnescapeWriter("foo,bar", "\"foo,bar\"");
checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\"");
checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\"");
checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\"");
checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
checkCsvUnescapeWriter("", null);
checkCsvUnescapeWriter("", "");