diff --git a/src/java/org/apache/commons/lang/RandomStringUtils.java b/src/java/org/apache/commons/lang/RandomStringUtils.java index c83464cac..b5edc0b02 100644 --- a/src/java/org/apache/commons/lang/RandomStringUtils.java +++ b/src/java/org/apache/commons/lang/RandomStringUtils.java @@ -18,6 +18,14 @@ import java.util.Random; /** *

Operations for random Strings.

+ *

Currently private high surrogate characters are ignored. + * These are unicode characters that fall between the values 56192 (db80) + * and 56319 (dbff) as we don't know how to handle them. + * High and low surrogates are correctly dealt with - that is if a + * high surrogate is randomly chosen, 55296 (d800) to 56191 (db7f) + * then it is followed by a low surrogate. If a low surrogate is chosen, + * 56320 (dc00) to 57343 (dfff) then it is placed after a randomly + * chosen high surrogate.

* * @author GenerationJava Core library * @author Henri Yandell @@ -243,8 +251,32 @@ public static String random(int count, int start, int end, boolean letters, bool } if ((letters && Character.isLetter(ch)) || (numbers && Character.isDigit(ch)) - || (!letters && !numbers)) { - buffer[count] = ch; + || (!letters && !numbers)) + { + if(ch >= 56320 && ch <= 57343) { + if(count == 0) { + count++; + } else { + // low surrogate, insert high surrogate after putting it in + buffer[count] = ch; + count--; + buffer[count] = (char) (55296 + random.nextInt(128)); + } + } else if(ch >= 55296 && ch <= 56191) { + if(count == 0) { + count++; + } else { + // high surrogate, insert low surrogate before putting it in + buffer[count] = (char) (56320 + random.nextInt(128)); + count--; + buffer[count] = ch; + } + } else if(ch >= 56192 && ch <= 56319) { + // private high surrogate, no effing clue, so skip it + count++; + } else { + buffer[count] = ch; + } } else { count++; } diff --git a/src/test/org/apache/commons/lang/RandomStringUtilsTest.java b/src/test/org/apache/commons/lang/RandomStringUtilsTest.java index 473d974d0..84467703a 100644 --- a/src/test/org/apache/commons/lang/RandomStringUtilsTest.java +++ b/src/test/org/apache/commons/lang/RandomStringUtilsTest.java @@ -315,7 +315,33 @@ private double chiSquare(int[] expected, int[] observed) { } return sumSq; } - + + /** + * Checks if the string got by {@link RandomStringUtils#random(int)} + * can be converted to UTF-8 and back without loss. + * + * @author stefanhoehne@fastmail.fm + * @throws Exception + */ + public void testLang100() throws Exception { + int size = 5000; + String encoding = "UTF-8"; + String orig = RandomStringUtils.random(size); + byte[] bytes = orig.getBytes(encoding); + String copy = new String(bytes, encoding); + + // for a verbose compare: + for (int i=0; i < orig.length() && i < copy.length(); i++) { + char o = orig.charAt(i); + char c = copy.charAt(i); + assertEquals("differs at " + i + "(" + Integer.toHexString((new Character(o)).hashCode()) + "," + + Integer.toHexString((new Character(c)).hashCode()) + ")", o, c); + } + // compare length also + assertEquals(orig.length(), copy.length()); + // just to be complete + assertEquals(orig, copy); + } public static void main(String args[]) { TestRunner.run(suite());