diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 960c04e9d..dff9fccc9 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,7 @@
+ Performance improvements for StringEscapeUtils
Add ClassUtils.getAbbreviatedName()
FastDateParser does not set error indication in ParsePosition
FastDateParser does not handle excess hours as per SimpleDateFormat
diff --git a/src/main/java/org/apache/commons/lang3/CharUtils.java b/src/main/java/org/apache/commons/lang3/CharUtils.java
index 3f19a214e..eb0c2c684 100644
--- a/src/main/java/org/apache/commons/lang3/CharUtils.java
+++ b/src/main/java/org/apache/commons/lang3/CharUtils.java
@@ -31,6 +31,8 @@ public class CharUtils {
private static final String[] CHAR_STRING_ARRAY = new String[128];
+ private static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+
/**
* {@code \u000a} linefeed LF ('\n').
*
@@ -350,14 +352,13 @@ public class CharUtils {
* @return the escaped Unicode string
*/
public static String unicodeEscaped(final char ch) {
- if (ch < 0x10) {
- return "\\u000" + Integer.toHexString(ch);
- } else if (ch < 0x100) {
- return "\\u00" + Integer.toHexString(ch);
- } else if (ch < 0x1000) {
- return "\\u0" + Integer.toHexString(ch);
- }
- return "\\u" + Integer.toHexString(ch);
+ StringBuilder sb = new StringBuilder(6);
+ sb.append("\\u");
+ sb.append(HEX_DIGITS[(ch >> 12) & 15]);
+ sb.append(HEX_DIGITS[(ch >> 8) & 15]);
+ sb.append(HEX_DIGITS[(ch >> 4) & 15]);
+ sb.append(HEX_DIGITS[(ch) & 15]);
+ return sb.toString();
}
/**
diff --git a/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java b/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
index ab20e5b0e..17e73b640 100644
--- a/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
+++ b/src/main/java/org/apache/commons/lang3/text/translate/CharSequenceTranslator.java
@@ -31,6 +31,8 @@ import java.util.Locale;
*/
public abstract class CharSequenceTranslator {
+ static final char[] HEX_DIGITS = new char[] {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
+
/**
* Translate a set of codepoints, represented by an int index into a CharSequence,
* into another set of codepoints. The number of codepoints consumed must be returned,
@@ -84,9 +86,18 @@ public abstract class CharSequenceTranslator {
while (pos < len) {
final int consumed = translate(input, pos, out);
if (consumed == 0) {
- final char[] c = Character.toChars(Character.codePointAt(input, pos));
- out.write(c);
- pos+= c.length;
+ // inlined implementation of Character.toChars(Character.codePointAt(input, pos))
+ // avoids allocating temp char arrays and duplicate checks
+ char c1 = input.charAt(pos);
+ out.write(c1);
+ pos++;
+ if (Character.isHighSurrogate(c1) && pos < len) {
+ char c2 = input.charAt(pos);
+ if (Character.isLowSurrogate(c2)) {
+ out.write(c2);
+ pos++;
+ }
+ }
continue;
}
// contract with translators is that they have to understand codepoints
diff --git a/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java b/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java
index b1799bf99..3a63b721b 100644
--- a/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java
+++ b/src/main/java/org/apache/commons/lang3/text/translate/UnicodeEscaper.java
@@ -114,14 +114,12 @@ public class UnicodeEscaper extends CodePointTranslator {
// TODO: Handle potential + sign per various Unicode escape implementations
if (codepoint > 0xffff) {
out.write(toUtf16Escape(codepoint));
- } else if (codepoint > 0xfff) {
- out.write("\\u" + hex(codepoint));
- } else if (codepoint > 0xff) {
- out.write("\\u0" + hex(codepoint));
- } else if (codepoint > 0xf) {
- out.write("\\u00" + hex(codepoint));
} else {
- out.write("\\u000" + hex(codepoint));
+ out.write("\\u");
+ out.write(HEX_DIGITS[(codepoint >> 12) & 15]);
+ out.write(HEX_DIGITS[(codepoint >> 8) & 15]);
+ out.write(HEX_DIGITS[(codepoint >> 4) & 15]);
+ out.write(HEX_DIGITS[(codepoint) & 15]);
}
return true;
}
diff --git a/src/test/java/org/apache/commons/lang3/CharUtilsTest.java b/src/test/java/org/apache/commons/lang3/CharUtilsTest.java
index 94acbcdf4..f6297c60c 100644
--- a/src/test/java/org/apache/commons/lang3/CharUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/CharUtilsTest.java
@@ -194,6 +194,7 @@ public class CharUtilsTest {
@Test
public void testToUnicodeEscaped_char() {
assertEquals("\\u0041", CharUtils.unicodeEscaped('A'));
+ assertEquals("\\u004c", CharUtils.unicodeEscaped('L'));
for (int i = 0; i < 196; i++) {
final String str = CharUtils.unicodeEscaped((char) i);