Changing the standard escapeXml method to not escape high characters. It's easier to add that back on that remove it. LANG-516 and LANG-517
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@836151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9adbbb0bbf
commit
000bac6b94
|
@ -66,8 +66,7 @@ public class EscapeUtils {
|
|||
public static final CharSequenceTranslator ESCAPE_XML =
|
||||
new AggregateTranslator(
|
||||
new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
|
||||
new LookupTranslator(EntityArrays.APOS_ESCAPE()),
|
||||
NumericEntityEscaper.above(0x7f)
|
||||
new LookupTranslator(EntityArrays.APOS_ESCAPE())
|
||||
);
|
||||
|
||||
public static final String escapeXml(String input) {
|
||||
|
@ -77,8 +76,7 @@ public class EscapeUtils {
|
|||
public static final CharSequenceTranslator ESCAPE_HTML3 =
|
||||
new AggregateTranslator(
|
||||
new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
|
||||
new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
|
||||
NumericEntityEscaper.above(0x7f)
|
||||
new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
|
||||
);
|
||||
|
||||
public static final String escapeHtml3(String input) {
|
||||
|
@ -89,8 +87,7 @@ public class EscapeUtils {
|
|||
new AggregateTranslator(
|
||||
new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
|
||||
new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
|
||||
new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()),
|
||||
NumericEntityEscaper.above(0x7f)
|
||||
new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
|
||||
);
|
||||
|
||||
public static final String escapeHtml4(String input) {
|
||||
|
|
|
@ -218,9 +218,8 @@ public class StringEscapeUtilsTest extends TestCase {
|
|||
{"final character only", "greater than >", "greater than >"},
|
||||
{"first character only", "< less than", "< less than"},
|
||||
{"apostrophe", "Huntington's chorea", "Huntington's chorea"},
|
||||
{"languages", "English,Français,日本語 (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
|
||||
{"8-bit ascii doesn't number-escape", "~\u007F", "\u007E\u007F"},
|
||||
{"8-bit ascii does number-escape", "€Ÿ", "\u0080\u009F"},
|
||||
{"languages", "English,Français,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"},
|
||||
{"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"},
|
||||
};
|
||||
|
||||
public void testEscapeHtml() {
|
||||
|
@ -298,9 +297,9 @@ public class StringEscapeUtilsTest extends TestCase {
|
|||
assertEquals("<abc>", StringEscapeUtils.escapeXml("<abc>"));
|
||||
assertEquals("<abc>", StringEscapeUtils.unescapeXml("<abc>"));
|
||||
|
||||
assertEquals("XML should use numbers, not names for HTML entities",
|
||||
"¡", StringEscapeUtils.escapeXml("\u00A1"));
|
||||
assertEquals("XML should use numbers, not names for HTML entities",
|
||||
assertEquals("XML should not escape >0x7f values",
|
||||
"\u00A1", StringEscapeUtils.escapeXml("\u00A1"));
|
||||
assertEquals("XML should be able to unescape >0x7f values",
|
||||
"\u00A0", StringEscapeUtils.unescapeXml(" "));
|
||||
|
||||
assertEquals("ain't", StringEscapeUtils.unescapeXml("ain't"));
|
||||
|
@ -413,21 +412,29 @@ public class StringEscapeUtilsTest extends TestCase {
|
|||
// codepoint: U+1D362
|
||||
byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 };
|
||||
|
||||
String escaped = StringEscapeUtils.escapeHtml( new String(data, "UTF8") );
|
||||
String unescaped = StringEscapeUtils.unescapeHtml( escaped );
|
||||
String original = new String(data, "UTF8");
|
||||
|
||||
assertEquals( "High unicode was not escaped correctly", "𝍢", escaped);
|
||||
String escaped = StringEscapeUtils.escapeHtml( original );
|
||||
assertEquals( "High unicode should not have been escaped", original, escaped);
|
||||
|
||||
String unescaped = StringEscapeUtils.unescapeHtml( escaped );
|
||||
assertEquals( "High unicode should have been unchanged", original, unescaped);
|
||||
|
||||
// TODO: I think this should hold, needs further investigation
|
||||
// String unescapedFromEntity = StringEscapeUtils.unescapeHtml( "𝍢" );
|
||||
// assertEquals( "High unicode should have been unescaped", original, unescapedFromEntity);
|
||||
}
|
||||
|
||||
// https://issues.apache.org/jira/browse/LANG-339
|
||||
public void testEscapeHiragana() throws java.io.UnsupportedEncodingException {
|
||||
// Some random Japanese unicode characters
|
||||
String escaped = StringEscapeUtils.escapeHtml( "\u304B\u304C\u3068" );
|
||||
assertEquals( "Hiragana character unicode behaviour has changed from their being escaped",
|
||||
"かがと", escaped);
|
||||
String original = "\u304B\u304C\u3068";
|
||||
String escaped = StringEscapeUtils.escapeHtml(original);
|
||||
assertEquals( "Hiragana character unicode behaviour should not be being escaped by escapeHtml",
|
||||
original, escaped);
|
||||
|
||||
String unescaped = StringEscapeUtils.unescapeHtml( escaped );
|
||||
|
||||
assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping", escaped, escaped);
|
||||
assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping", escaped, unescaped);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue