[LANG-720] StringEscapeUtils.escapeXml(input) outputs wrong results when an input contains characters in Supplementary Planes. ALSO rewrite method to avoid modification of counter variable in for loop

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1146844 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Matthew Jason Benson 2011-07-14 18:49:51 +00:00
parent 8a3e860345
commit 2c1b5be146
2 changed files with 20 additions and 20 deletions

View File

@ -79,27 +79,20 @@ public abstract class CharSequenceTranslator {
if (input == null) {
return;
}
int sz = Character.codePointCount(input, 0, input.length());
for (int i = 0; i < sz; i++) {
// consumed is the number of codepoints consumed
int consumed = translate(input, i, out);
int pos = 0;
int len = input.length();
while (pos < len) {
int consumed = translate(input, pos, out);
if (consumed == 0) {
out.write(Character.toChars(Character.codePointAt(input, i)));
} else {
// contract with translators is that they have to understand codepoints
// and they just took care of a surrogate pair
for (int j = 0; j < consumed; j++) {
if (i < sz - 2) {
i += Character.charCount(Character.codePointAt(input, i));
} else {
// If the String ends with a high surrogate, just add the 1 and don't worry about such things
i++;
char[] c = Character.toChars(Character.codePointAt(input, pos));
out.write(c);
pos+= c.length;
continue;
}
}
// for loop will increment 1 anyway, so remove 1 to account for that
i--;
// // contract with translators is that they have to understand codepoints
// // and they just took care of a surrogate pair
for (int pt = 0; pt < consumed; pt++) {
pos += Character.charCount(Character.codePointAt(input, pos));
}
}
}

View File

@ -423,4 +423,11 @@ public class StringEscapeUtilsTest extends TestCase {
assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping", escaped, unescaped);
}
// https://issues.apache.org/jira/browse/LANG-720
public void testLang720() {
String input = new StringBuilder("\ud842\udfb7").append("A").toString();
String escaped = StringEscapeUtils.escapeXml(input);
assertEquals(input, escaped);
}
}