diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 960c04e9d..dff9fccc9 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -22,6 +22,7 @@ + Performance improvements for StringEscapeUtils Add ClassUtils.getAbbreviatedName() FastDateParser does not set error indication in ParsePosition FastDateParser does not handle excess hours as per SimpleDateFormat diff --git a/src/main/java/org/apache/commons/lang3/CharUtils.java b/src/main/java/org/apache/commons/lang3/CharUtils.java index 3f19a214e..eb0c2c684 100644 --- a/src/main/java/org/apache/commons/lang3/CharUtils.java +++ b/src/main/java/org/apache/commons/lang3/CharUtils.java @@ -31,6 +31,8 @@ public class CharUtils { private static final String[] CHAR_STRING_ARRAY = new String[128]; + private static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + /** * {@code \u000a} linefeed LF ('\n'). * @@ -350,14 +352,13 @@ public class CharUtils { * @return the escaped Unicode string */ public static String unicodeEscaped(final char ch) { - if (ch < 0x10) { - return "\\u000" + Integer.toHexString(ch); - } else if (ch < 0x100) { - return "\\u00" + Integer.toHexString(ch); - } else if (ch < 0x1000) { - return "\\u0" + Integer.toHexString(ch); - } - return "\\u" + Integer.toHexString(ch); + StringBuilder sb = new StringBuilder(6); + sb.append("\\u"); + sb.append(HEX_DIGITS[(ch >> 12) & 15]); + sb.append(HEX_DIGITS[(ch >> 8) & 15]); + sb.append(HEX_DIGITS[(ch >> 4) & 15]); + sb.append(HEX_DIGITS[(ch) & 15]); + return sb.toString(); } /** diff --git a/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java b/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java index ab20e5b0e..17e73b640 100644 --- a/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java +++ b/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java @@ -31,6 +31,8 @@ import java.util.Locale; */ public abstract class CharSequenceTranslator { + static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; + /** * Translate a set of codepoints, represented by an int index into a CharSequence, * into another set of codepoints. The number of codepoints consumed must be returned, @@ -84,9 +86,18 @@ public abstract class CharSequenceTranslator { while (pos < len) { final int consumed = translate(input, pos, out); if (consumed == 0) { - final char[] c = Character.toChars(Character.codePointAt(input, pos)); - out.write(c); - pos+= c.length; + // inlined implementation of Character.toChars(Character.codePointAt(input, pos)) + // avoids allocating temp char arrays and duplicate checks + char c1 = input.charAt(pos); + out.write(c1); + pos++; + if (Character.isHighSurrogate(c1) && pos < len) { + char c2 = input.charAt(pos); + if (Character.isLowSurrogate(c2)) { + out.write(c2); + pos++; + } + } continue; } // contract with translators is that they have to understand codepoints diff --git a/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java b/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java index b1799bf99..3a63b721b 100644 --- a/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java +++ b/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java @@ -114,14 +114,12 @@ public class UnicodeEscaper extends CodePointTranslator { // TODO: Handle potential + sign per various Unicode escape implementations if (codepoint > 0xffff) { out.write(toUtf16Escape(codepoint)); - } else if (codepoint > 0xfff) { - out.write("\\u" + hex(codepoint)); - } else if (codepoint > 0xff) { - out.write("\\u0" + hex(codepoint)); - } else if (codepoint > 0xf) { - out.write("\\u00" + hex(codepoint)); } else { - out.write("\\u000" + hex(codepoint)); + out.write("\\u"); + out.write(HEX_DIGITS[(codepoint >> 12) & 15]); + out.write(HEX_DIGITS[(codepoint >> 8) & 15]); + out.write(HEX_DIGITS[(codepoint >> 4) & 15]); + out.write(HEX_DIGITS[(codepoint) & 15]); } return true; } diff --git a/src/test/java/org/apache/commons/lang3/CharUtilsTest.java b/src/test/java/org/apache/commons/lang3/CharUtilsTest.java index 94acbcdf4..f6297c60c 100644 --- a/src/test/java/org/apache/commons/lang3/CharUtilsTest.java +++ b/src/test/java/org/apache/commons/lang3/CharUtilsTest.java @@ -194,6 +194,7 @@ public class CharUtilsTest { @Test public void testToUnicodeEscaped_char() { assertEquals("\\u0041", CharUtils.unicodeEscaped('A')); + assertEquals("\\u004c", CharUtils.unicodeEscaped('L')); for (int i = 0; i < 196; i++) { final String str = CharUtils.unicodeEscaped((char) i);