diff --git a/src/java/org/apache/commons/lang/StringUtils.java b/src/java/org/apache/commons/lang/StringUtils.java index c9c0092ee..bd72bb859 100644 --- a/src/java/org/apache/commons/lang/StringUtils.java +++ b/src/java/org/apache/commons/lang/StringUtils.java @@ -2607,6 +2607,85 @@ private static String[] splitWorker(String str, String separatorChars, int max, return (String[]) list.toArray(new String[list.size()]); } + /** + *
Splits a String by Character type as returned by
+ * java.lang.Character.getType(char)
. Groups of contiguous
+ * characters of the same type are returned as complete tokens.
+ *
+ * StringUtils.splitByCamelCase(null) = null + * StringUtils.splitByCamelCase("") = [] + * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"] + * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"] + * StringUtils.splitByCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"] + * StringUtils.splitByCamelCase("fooBar") = ["foo", "B", "ar"] + * StringUtils.splitByCamelCase("foo200Bar") = ["foo", "200", "B", "ar"] + * StringUtils.splitByCamelCase("ASFRules") = ["ASFR", "ules"] + *+ * @param str the String to split, may be
null
+ * @return an array of parsed Strings, null
if null String input
+ * @since 2.4
+ */
+ public static String[] splitByCharacterType(String str) {
+ return splitByCharacterType(str, false);
+ }
+
+ /**
+ * Splits a String by Character type as returned by
+ * java.lang.Character.getType(char)
. Groups of contiguous
+ * characters of the same type are returned as complete tokens, with the
+ * following exception: if camelCase
is true
,
+ * the character of type Character.UPPERCASE_LETTER
, if any,
+ * immediately preceding a token of type Character.LOWERCASE_LETTER
+ * will belong to the following token rather than to the preceding, if any,
+ * Character.UPPERCASE_LETTER
token.
+ *
+ * StringUtils.splitByCamelCase(null) = null + * StringUtils.splitByCamelCase("") = [] + * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"] + * StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"] + * StringUtils.splitByCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"] + * StringUtils.splitByCamelCase("fooBar") = ["foo", "Bar"] + * StringUtils.splitByCamelCase("foo200Bar") = ["foo", "200", "Bar"] + * StringUtils.splitByCamelCase("ASFRules") = ["ASF", "Rules"] + *+ * @param str the String to split, may be
null
+ * @param camelCase whether to use so-called "camel-case" for letter types
+ * @return an array of parsed Strings, null
if null String input
+ * @since 2.4
+ */
+ public static String[] splitByCharacterType(String str, boolean camelCase) {
+ if (str == null) {
+ return null;
+ }
+ if (str.length() == 0) {
+ return ArrayUtils.EMPTY_STRING_ARRAY;
+ }
+ char[] c = str.toCharArray();
+ List list = new ArrayList();
+ int tokenStart = 0;
+ int currentType = Character.getType(c[tokenStart]);
+ for (int pos = tokenStart + 1; pos < c.length; pos++) {
+ int type = Character.getType(c[pos]);
+ if (type == currentType) {
+ continue;
+ }
+ if (camelCase && type == Character.LOWERCASE_LETTER
+ && currentType == Character.UPPERCASE_LETTER) {
+ int newTokenStart = pos - 1;
+ if (newTokenStart != tokenStart) {
+ list.add(new String(c, tokenStart, newTokenStart - tokenStart));
+ tokenStart = newTokenStart;
+ }
+ } else {
+ list.add(new String(c, tokenStart, pos - tokenStart));
+ tokenStart = pos;
+ }
+ currentType = type;
+ }
+ list.add(new String(c, tokenStart, c.length - tokenStart));
+ return (String[]) list.toArray(new String[list.size()]);
+ }
+
// Joining
//-----------------------------------------------------------------------
/**
diff --git a/src/test/org/apache/commons/lang/StringUtilsTest.java b/src/test/org/apache/commons/lang/StringUtilsTest.java
index dedfc98f5..f87e60403 100644
--- a/src/test/org/apache/commons/lang/StringUtilsTest.java
+++ b/src/test/org/apache/commons/lang/StringUtilsTest.java
@@ -847,7 +847,53 @@ private void innerTestSplitPreserveAllTokens(char separator, String sepStr, char
assertEquals(msg, "a", res[0]);
assertEquals(msg, str.substring(2), res[1]);
}
-
+
+ public void testSplitByCharacterType() {
+ assertNull(StringUtils.splitByCharacterType(null));
+ assertEquals(0, StringUtils.splitByCharacterType("").length);
+ assertNull(StringUtils.splitByCharacterType(null, true));
+ assertEquals(0, StringUtils.splitByCharacterType("", true).length);
+
+ final boolean camelCase = true;
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg", camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
+ "fg" }, StringUtils.splitByCharacterType("ab de fg", camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":",
+ "ef" }, StringUtils.splitByCharacterType("ab:cd:ef")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":",
+ "ef" }, StringUtils.splitByCharacterType("ab:cd:ef", camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "B", "ar" },
+ StringUtils.splitByCharacterType("fooBar")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "Bar" },
+ StringUtils.splitByCharacterType("fooBar", camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "B", "ar" },
+ StringUtils.splitByCharacterType("foo200Bar")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "Bar" },
+ StringUtils.splitByCharacterType("foo200Bar", camelCase)));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ASFR", "ules" },
+ StringUtils.splitByCharacterType("ASFRules")));
+
+ assertTrue(ArrayUtils.isEquals(new String[] { "ASF", "Rules" },
+ StringUtils.splitByCharacterType("ASFRules", camelCase)));
+
+ }
+
public void testDeprecatedDeleteSpace_String() {
assertEquals(null, StringUtils.deleteSpaces(null));
assertEquals("", StringUtils.deleteSpaces(""));