Applying Alexander Kjall's patch from LANG-480; along with a unit test made from his example. Fixes unicode conversion above U+00FFFF being done into 2 characters
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@749095 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
00fee20efc
commit
c068395793
|
@ -825,10 +825,15 @@ class Entities {
|
||||||
public void escape(Writer writer, String str) throws IOException {
|
public void escape(Writer writer, String str) throws IOException {
|
||||||
int len = str.length();
|
int len = str.length();
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
char c = str.charAt(i);
|
int c = Character.codePointAt(str, i);
|
||||||
String entityName = this.entityName(c);
|
String entityName = this.entityName(c);
|
||||||
if (entityName == null) {
|
if (entityName == null) {
|
||||||
if (c > 0x7F) {
|
if (c >= 0x010000 && i < len - 1) {
|
||||||
|
writer.write("&#");
|
||||||
|
writer.write(Integer.toString(c, 10));
|
||||||
|
writer.write(';');
|
||||||
|
i++;
|
||||||
|
} else if (c > 0x7F) {
|
||||||
writer.write("&#");
|
writer.write("&#");
|
||||||
writer.write(Integer.toString(c, 10));
|
writer.write(Integer.toString(c, 10));
|
||||||
writer.write(';');
|
writer.write(';');
|
||||||
|
|
|
@ -415,4 +415,18 @@ public class StringEscapeUtilsTest extends TestCase {
|
||||||
fail("Threw: " + e);
|
fail("Threw: " + e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://issues.apache.org/jira/browse/LANG-480
|
||||||
|
public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingException {
|
||||||
|
// this is the utf8 representation of the character:
|
||||||
|
// COUNTING ROD UNIT DIGIT THREE
|
||||||
|
// in unicode
|
||||||
|
// codepoint: U+1D362
|
||||||
|
byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 };
|
||||||
|
|
||||||
|
String escaped = StringEscapeUtils.escapeHtml( new String(data, "UTF8") );
|
||||||
|
String unescaped = StringEscapeUtils.unescapeHtml( escaped );
|
||||||
|
|
||||||
|
assertEquals( "High unicode was not escaped correctly", "𝍢", escaped);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue