LANG-977: NumericEntityEscaper incorrectly encodes supplementary characters. Thanks to Chris Karcher.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1568612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Benedikt Ritter 2014-02-15 10:35:35 +00:00
parent 408462e80d
commit ab14240150
4 changed files with 37 additions and 26 deletions

View File

@ -22,6 +22,7 @@
<body> <body>
<release version="3.3" date="TBA" description="Bugfix and Feature release"> <release version="3.3" date="TBA" description="Bugfix and Feature release">
<action issue="LANG-977" type="fix" dev="britter" due-to="Chris Karcher">NumericEntityEscaper incorrectly encodes supplementary characters</action>
<action issue="LANG-973" type="fix" dev="sebb">Make some private fields final</action> <action issue="LANG-973" type="fix" dev="sebb">Make some private fields final</action>
<action issue="LANG-971" type="fix" dev="sebb">NumberUtils#isNumber(String) fails to reject invalid Octal numbers</action> <action issue="LANG-971" type="fix" dev="sebb">NumberUtils#isNumber(String) fails to reject invalid Octal numbers</action>
<action issue="LANG-972" type="fix" dev="sebb">NumberUtils#isNumber does not allow for hex 0XABCD</action> <action issue="LANG-972" type="fix" dev="sebb">NumberUtils#isNumber does not allow for hex 0XABCD</action>

View File

@ -185,7 +185,7 @@ public int translate(final CharSequence input, final int index, final Writer out
out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
out.write(CSV_QUOTE); out.write(CSV_QUOTE);
} }
return input.length(); return Character.codePointCount(input, 0, input.length());
} }
} }
@ -314,7 +314,7 @@ public int translate(final CharSequence input, final int index, final Writer out
if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
out.write(input.toString()); out.write(input.toString());
return input.length(); return Character.codePointCount(input, 0, input.length());
} }
// strip quotes // strip quotes
@ -326,7 +326,7 @@ public int translate(final CharSequence input, final int index, final Writer out
} else { } else {
out.write(input.toString()); out.write(input.toString());
} }
return input.length(); return Character.codePointCount(input, 0, input.length());
} }
} }

View File

@ -89,10 +89,10 @@ public final void translate(final CharSequence input, final Writer out) throws I
pos+= c.length; pos+= c.length;
continue; continue;
} }
// // contract with translators is that they have to understand codepoints // contract with translators is that they have to understand codepoints
// // and they just took care of a surrogate pair // and they just took care of a surrogate pair
for (int pt = 0; pt < consumed; pt++) { for (int pt = 0; pt < consumed; pt++) {
pos += Character.charCount(Character.codePointAt(input, pt)); pos += Character.charCount(Character.codePointAt(input, pos));
} }
} }
} }

View File

@ -348,6 +348,9 @@ public void testEscapeXmlSupplementaryCharacters() {
assertEquals("Supplementary character must be represented using a single escape", "&#144308;", assertEquals("Supplementary character must be represented using a single escape", "&#144308;",
escapeXml.translate("\uD84C\uDFB4")); escapeXml.translate("\uD84C\uDFB4"));
assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c &#144308;",
escapeXml.translate("a b c \uD84C\uDFB4"));
} }
@Test @Test
@ -377,6 +380,9 @@ public void testEscapeXmlAllCharacters() {
public void testUnescapeXmlSupplementaryCharacters() { public void testUnescapeXmlSupplementaryCharacters() {
assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4", assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
StringEscapeUtils.unescapeXml("&#144308;") ); StringEscapeUtils.unescapeXml("&#144308;") );
assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4",
StringEscapeUtils.unescapeXml("a b c &#144308;") );
} }
// Tests issue #38569 // Tests issue #38569
@ -396,22 +402,24 @@ public void testLang313() {
@Test @Test
public void testEscapeCsvString() throws Exception { public void testEscapeCsvString() throws Exception {
assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar")); assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar"));
assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar")); assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar"));
assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar")); assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar"));
assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar")); assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar"));
assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar")); assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar"));
assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
assertEquals("", StringEscapeUtils.escapeCsv("")); assertEquals("", StringEscapeUtils.escapeCsv(""));
assertEquals(null, StringEscapeUtils.escapeCsv(null)); assertEquals(null, StringEscapeUtils.escapeCsv(null));
} }
@Test @Test
public void testEscapeCsvWriter() throws Exception { public void testEscapeCsvWriter() throws Exception {
checkCsvEscapeWriter("foo.bar", "foo.bar"); checkCsvEscapeWriter("foo.bar", "foo.bar");
checkCsvEscapeWriter("\"foo,bar\"", "foo,bar"); checkCsvEscapeWriter("\"foo,bar\"", "foo,bar");
checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar"); checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar");
checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar"); checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar");
checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar"); checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar");
checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
checkCsvEscapeWriter("", null); checkCsvEscapeWriter("", null);
checkCsvEscapeWriter("", ""); checkCsvEscapeWriter("", "");
} }
@ -428,11 +436,12 @@ private void checkCsvEscapeWriter(final String expected, final String value) {
@Test @Test
public void testUnescapeCsvString() throws Exception { public void testUnescapeCsvString() throws Exception {
assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar")); assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar"));
assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\"")); assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\""));
assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\"")); assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\"")); assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\"")); assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
assertEquals("", StringEscapeUtils.unescapeCsv("")); assertEquals("", StringEscapeUtils.unescapeCsv(""));
assertEquals(null, StringEscapeUtils.unescapeCsv(null)); assertEquals(null, StringEscapeUtils.unescapeCsv(null));
@ -441,11 +450,12 @@ public void testUnescapeCsvString() throws Exception {
@Test @Test
public void testUnescapeCsvWriter() throws Exception { public void testUnescapeCsvWriter() throws Exception {
checkCsvUnescapeWriter("foo.bar", "foo.bar"); checkCsvUnescapeWriter("foo.bar", "foo.bar");
checkCsvUnescapeWriter("foo,bar", "\"foo,bar\""); checkCsvUnescapeWriter("foo,bar", "\"foo,bar\"");
checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\""); checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\"");
checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\""); checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\"");
checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\""); checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\"");
checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
checkCsvUnescapeWriter("", null); checkCsvUnescapeWriter("", null);
checkCsvUnescapeWriter("", ""); checkCsvUnescapeWriter("", "");