LANG-877: Performance improvements for StringEscapeUtils. This fixes #49 from github. Thanks to Fabian Lange.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1666535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a234f033b6
commit
1f75a8f7cc
|
@ -22,6 +22,7 @@
|
|||
<body>
|
||||
|
||||
<release version="3.4" date="tba" description="tba">
|
||||
<action issue="LANG-877" type="add" dev="britter" due-to="Fabian Lange">Performance improvements for StringEscapeUtils</action>
|
||||
<action issue="LANG-1093" type="add" dev="britter" due-to="Fabian Lange">Add ClassUtils.getAbbreviatedName()</action>
|
||||
<action issue="LANG-1090" type="fix" dev="sebb">FastDateParser does not set error indication in ParsePosition</action>
|
||||
<action issue="LANG-1089" type="fix" dev="sebb">FastDateParser does not handle excess hours as per SimpleDateFormat</action>
|
||||
|
|
|
@ -31,6 +31,8 @@ public class CharUtils {
|
|||
|
||||
private static final String[] CHAR_STRING_ARRAY = new String[128];
|
||||
|
||||
private static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
|
||||
|
||||
/**
|
||||
* {@code \u000a} linefeed LF ('\n').
|
||||
*
|
||||
|
@ -350,14 +352,13 @@ public class CharUtils {
|
|||
* @return the escaped Unicode string
|
||||
*/
|
||||
public static String unicodeEscaped(final char ch) {
|
||||
if (ch < 0x10) {
|
||||
return "\\u000" + Integer.toHexString(ch);
|
||||
} else if (ch < 0x100) {
|
||||
return "\\u00" + Integer.toHexString(ch);
|
||||
} else if (ch < 0x1000) {
|
||||
return "\\u0" + Integer.toHexString(ch);
|
||||
}
|
||||
return "\\u" + Integer.toHexString(ch);
|
||||
StringBuilder sb = new StringBuilder(6);
|
||||
sb.append("\\u");
|
||||
sb.append(HEX_DIGITS[(ch >> 12) & 15]);
|
||||
sb.append(HEX_DIGITS[(ch >> 8) & 15]);
|
||||
sb.append(HEX_DIGITS[(ch >> 4) & 15]);
|
||||
sb.append(HEX_DIGITS[(ch) & 15]);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -31,6 +31,8 @@ import java.util.Locale;
|
|||
*/
|
||||
public abstract class CharSequenceTranslator {
|
||||
|
||||
static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
|
||||
|
||||
/**
|
||||
* Translate a set of codepoints, represented by an int index into a CharSequence,
|
||||
* into another set of codepoints. The number of codepoints consumed must be returned,
|
||||
|
@ -84,9 +86,18 @@ public abstract class CharSequenceTranslator {
|
|||
while (pos < len) {
|
||||
final int consumed = translate(input, pos, out);
|
||||
if (consumed == 0) {
|
||||
final char[] c = Character.toChars(Character.codePointAt(input, pos));
|
||||
out.write(c);
|
||||
pos+= c.length;
|
||||
// inlined implementation of Character.toChars(Character.codePointAt(input, pos))
|
||||
// avoids allocating temp char arrays and duplicate checks
|
||||
char c1 = input.charAt(pos);
|
||||
out.write(c1);
|
||||
pos++;
|
||||
if (Character.isHighSurrogate(c1) && pos < len) {
|
||||
char c2 = input.charAt(pos);
|
||||
if (Character.isLowSurrogate(c2)) {
|
||||
out.write(c2);
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// contract with translators is that they have to understand codepoints
|
||||
|
|
|
@ -114,14 +114,12 @@ public class UnicodeEscaper extends CodePointTranslator {
|
|||
// TODO: Handle potential + sign per various Unicode escape implementations
|
||||
if (codepoint > 0xffff) {
|
||||
out.write(toUtf16Escape(codepoint));
|
||||
} else if (codepoint > 0xfff) {
|
||||
out.write("\\u" + hex(codepoint));
|
||||
} else if (codepoint > 0xff) {
|
||||
out.write("\\u0" + hex(codepoint));
|
||||
} else if (codepoint > 0xf) {
|
||||
out.write("\\u00" + hex(codepoint));
|
||||
} else {
|
||||
out.write("\\u000" + hex(codepoint));
|
||||
out.write("\\u");
|
||||
out.write(HEX_DIGITS[(codepoint >> 12) & 15]);
|
||||
out.write(HEX_DIGITS[(codepoint >> 8) & 15]);
|
||||
out.write(HEX_DIGITS[(codepoint >> 4) & 15]);
|
||||
out.write(HEX_DIGITS[(codepoint) & 15]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -194,6 +194,7 @@ public class CharUtilsTest {
|
|||
@Test
|
||||
public void testToUnicodeEscaped_char() {
|
||||
assertEquals("\\u0041", CharUtils.unicodeEscaped('A'));
|
||||
assertEquals("\\u004c", CharUtils.unicodeEscaped('L'));
|
||||
|
||||
for (int i = 0; i < 196; i++) {
|
||||
final String str = CharUtils.unicodeEscaped((char) i);
|
||||
|
|
Loading…
Reference in New Issue