LANG-977: NumericEntityEscaper incorrectly encodes supplementary characters. Thanks to Chris Karcher.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1568612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
408462e80d
commit
ab14240150
|
@ -22,6 +22,7 @@
|
||||||
<body>
|
<body>
|
||||||
|
|
||||||
<release version="3.3" date="TBA" description="Bugfix and Feature release">
|
<release version="3.3" date="TBA" description="Bugfix and Feature release">
|
||||||
|
<action issue="LANG-977" type="fix" dev="britter" due-to="Chris Karcher">NumericEntityEscaper incorrectly encodes supplementary characters</action>
|
||||||
<action issue="LANG-973" type="fix" dev="sebb">Make some private fields final</action>
|
<action issue="LANG-973" type="fix" dev="sebb">Make some private fields final</action>
|
||||||
<action issue="LANG-971" type="fix" dev="sebb">NumberUtils#isNumber(String) fails to reject invalid Octal numbers</action>
|
<action issue="LANG-971" type="fix" dev="sebb">NumberUtils#isNumber(String) fails to reject invalid Octal numbers</action>
|
||||||
<action issue="LANG-972" type="fix" dev="sebb">NumberUtils#isNumber does not allow for hex 0XABCD</action>
|
<action issue="LANG-972" type="fix" dev="sebb">NumberUtils#isNumber does not allow for hex 0XABCD</action>
|
||||||
|
|
|
@ -185,7 +185,7 @@ public int translate(final CharSequence input, final int index, final Writer out
|
||||||
out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
|
out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
|
||||||
out.write(CSV_QUOTE);
|
out.write(CSV_QUOTE);
|
||||||
}
|
}
|
||||||
return input.length();
|
return Character.codePointCount(input, 0, input.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,7 +314,7 @@ public int translate(final CharSequence input, final int index, final Writer out
|
||||||
|
|
||||||
if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
|
if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
|
||||||
out.write(input.toString());
|
out.write(input.toString());
|
||||||
return input.length();
|
return Character.codePointCount(input, 0, input.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
// strip quotes
|
// strip quotes
|
||||||
|
@ -326,7 +326,7 @@ public int translate(final CharSequence input, final int index, final Writer out
|
||||||
} else {
|
} else {
|
||||||
out.write(input.toString());
|
out.write(input.toString());
|
||||||
}
|
}
|
||||||
return input.length();
|
return Character.codePointCount(input, 0, input.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -89,10 +89,10 @@ public final void translate(final CharSequence input, final Writer out) throws I
|
||||||
pos+= c.length;
|
pos+= c.length;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// // contract with translators is that they have to understand codepoints
|
// contract with translators is that they have to understand codepoints
|
||||||
// // and they just took care of a surrogate pair
|
// and they just took care of a surrogate pair
|
||||||
for (int pt = 0; pt < consumed; pt++) {
|
for (int pt = 0; pt < consumed; pt++) {
|
||||||
pos += Character.charCount(Character.codePointAt(input, pt));
|
pos += Character.charCount(Character.codePointAt(input, pos));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -348,6 +348,9 @@ public void testEscapeXmlSupplementaryCharacters() {
|
||||||
|
|
||||||
assertEquals("Supplementary character must be represented using a single escape", "𣎴",
|
assertEquals("Supplementary character must be represented using a single escape", "𣎴",
|
||||||
escapeXml.translate("\uD84C\uDFB4"));
|
escapeXml.translate("\uD84C\uDFB4"));
|
||||||
|
|
||||||
|
assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c 𣎴",
|
||||||
|
escapeXml.translate("a b c \uD84C\uDFB4"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -377,6 +380,9 @@ public void testEscapeXmlAllCharacters() {
|
||||||
public void testUnescapeXmlSupplementaryCharacters() {
|
public void testUnescapeXmlSupplementaryCharacters() {
|
||||||
assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
|
assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4",
|
||||||
StringEscapeUtils.unescapeXml("𣎴") );
|
StringEscapeUtils.unescapeXml("𣎴") );
|
||||||
|
|
||||||
|
assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4",
|
||||||
|
StringEscapeUtils.unescapeXml("a b c 𣎴") );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests issue #38569
|
// Tests issue #38569
|
||||||
|
@ -401,6 +407,7 @@ public void testEscapeCsvString() throws Exception {
|
||||||
assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar"));
|
assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar"));
|
||||||
assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar"));
|
assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar"));
|
||||||
assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar"));
|
assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar"));
|
||||||
|
assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar"));
|
||||||
assertEquals("", StringEscapeUtils.escapeCsv(""));
|
assertEquals("", StringEscapeUtils.escapeCsv(""));
|
||||||
assertEquals(null, StringEscapeUtils.escapeCsv(null));
|
assertEquals(null, StringEscapeUtils.escapeCsv(null));
|
||||||
}
|
}
|
||||||
|
@ -412,6 +419,7 @@ public void testEscapeCsvWriter() throws Exception {
|
||||||
checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar");
|
checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar");
|
||||||
checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar");
|
checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar");
|
||||||
checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar");
|
checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar");
|
||||||
|
checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
|
||||||
checkCsvEscapeWriter("", null);
|
checkCsvEscapeWriter("", null);
|
||||||
checkCsvEscapeWriter("", "");
|
checkCsvEscapeWriter("", "");
|
||||||
}
|
}
|
||||||
|
@ -433,6 +441,7 @@ public void testUnescapeCsvString() throws Exception {
|
||||||
assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
|
assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\""));
|
||||||
assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
|
assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\""));
|
||||||
assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
|
assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\""));
|
||||||
|
assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar"));
|
||||||
assertEquals("", StringEscapeUtils.unescapeCsv(""));
|
assertEquals("", StringEscapeUtils.unescapeCsv(""));
|
||||||
assertEquals(null, StringEscapeUtils.unescapeCsv(null));
|
assertEquals(null, StringEscapeUtils.unescapeCsv(null));
|
||||||
|
|
||||||
|
@ -446,6 +455,7 @@ public void testUnescapeCsvWriter() throws Exception {
|
||||||
checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\"");
|
checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\"");
|
||||||
checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\"");
|
checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\"");
|
||||||
checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\"");
|
checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\"");
|
||||||
|
checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar");
|
||||||
checkCsvUnescapeWriter("", null);
|
checkCsvUnescapeWriter("", null);
|
||||||
checkCsvUnescapeWriter("", "");
|
checkCsvUnescapeWriter("", "");
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue