[LANG-720] StringEscapeUtils.escapeXml(input) outputs wrong results when an input contains characters in Supplementary Planes. ALSO rewrite method to avoid modification of counter variable in for loop
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1146844 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8a3e860345
commit
2c1b5be146
|
@ -79,27 +79,20 @@ public abstract class CharSequenceTranslator {
|
|||
if (input == null) {
|
||||
return;
|
||||
}
|
||||
int sz = Character.codePointCount(input, 0, input.length());
|
||||
for (int i = 0; i < sz; i++) {
|
||||
|
||||
// consumed is the number of codepoints consumed
|
||||
int consumed = translate(input, i, out);
|
||||
|
||||
int pos = 0;
|
||||
int len = input.length();
|
||||
while (pos < len) {
|
||||
int consumed = translate(input, pos, out);
|
||||
if (consumed == 0) {
|
||||
out.write(Character.toChars(Character.codePointAt(input, i)));
|
||||
} else {
|
||||
// contract with translators is that they have to understand codepoints
|
||||
// and they just took care of a surrogate pair
|
||||
for (int j = 0; j < consumed; j++) {
|
||||
if (i < sz - 2) {
|
||||
i += Character.charCount(Character.codePointAt(input, i));
|
||||
} else {
|
||||
// If the String ends with a high surrogate, just add the 1 and don't worry about such things
|
||||
i++;
|
||||
}
|
||||
}
|
||||
// for loop will increment 1 anyway, so remove 1 to account for that
|
||||
i--;
|
||||
char[] c = Character.toChars(Character.codePointAt(input, pos));
|
||||
out.write(c);
|
||||
pos+= c.length;
|
||||
continue;
|
||||
}
|
||||
// // contract with translators is that they have to understand codepoints
|
||||
// // and they just took care of a surrogate pair
|
||||
for (int pt = 0; pt < consumed; pt++) {
|
||||
pos += Character.charCount(Character.codePointAt(input, pos));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -423,4 +423,11 @@ public class StringEscapeUtilsTest extends TestCase {
|
|||
|
||||
assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping", escaped, unescaped);
|
||||
}
|
||||
|
||||
// https://issues.apache.org/jira/browse/LANG-720
|
||||
public void testLang720() {
|
||||
String input = new StringBuilder("\ud842\udfb7").append("A").toString();
|
||||
String escaped = StringEscapeUtils.escapeXml(input);
|
||||
assertEquals(input, escaped);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue