From ea810cbbbde4de3a2b93a38aed53d8994b3b44af Mon Sep 17 00:00:00 2001 From: Matthew Jason Benson Date: Fri, 11 Jan 2008 20:13:02 +0000 Subject: [PATCH] [LANG-192] addSplitByCharacterType and camelCase variant git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@611288 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/commons/lang/StringUtils.java | 79 +++++++++++++++++++ .../apache/commons/lang/StringUtilsTest.java | 48 ++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/commons/lang/StringUtils.java b/src/java/org/apache/commons/lang/StringUtils.java index c9c0092ee..bd72bb859 100644 --- a/src/java/org/apache/commons/lang/StringUtils.java +++ b/src/java/org/apache/commons/lang/StringUtils.java @@ -2607,6 +2607,85 @@ private static String[] splitWorker(String str, String separatorChars, int max, return (String[]) list.toArray(new String[list.size()]); } + /** + *

Splits a String by Character type as returned by + * java.lang.Character.getType(char). Groups of contiguous + * characters of the same type are returned as complete tokens. + *

+     * StringUtils.splitByCamelCase(null)         = null
+     * StringUtils.splitByCamelCase("")           = []
+     * StringUtils.splitByCamelCase("ab de fg")   = ["ab", " ", "de", " ", "fg"]
+     * StringUtils.splitByCamelCase("ab   de fg") = ["ab", "   ", "de", " ", "fg"]
+     * StringUtils.splitByCamelCase("ab:cd:ef")   = ["ab", ":", "cd", ":", "ef"]
+     * StringUtils.splitByCamelCase("fooBar")     = ["foo", "B", "ar"]
+     * StringUtils.splitByCamelCase("foo200Bar")  = ["foo", "200", "B", "ar"]
+     * StringUtils.splitByCamelCase("ASFRules")   = ["ASFR", "ules"]
+     * 
+ * @param str the String to split, may be null + * @return an array of parsed Strings, null if null String input + * @since 2.4 + */ + public static String[] splitByCharacterType(String str) { + return splitByCharacterType(str, false); + } + + /** + *

Splits a String by Character type as returned by + * java.lang.Character.getType(char). Groups of contiguous + * characters of the same type are returned as complete tokens, with the + * following exception: if camelCase is true, + * the character of type Character.UPPERCASE_LETTER, if any, + * immediately preceding a token of type Character.LOWERCASE_LETTER + * will belong to the following token rather than to the preceding, if any, + * Character.UPPERCASE_LETTER token. + *

+     * StringUtils.splitByCamelCase(null)         = null
+     * StringUtils.splitByCamelCase("")           = []
+     * StringUtils.splitByCamelCase("ab de fg")   = ["ab", " ", "de", " ", "fg"]
+     * StringUtils.splitByCamelCase("ab   de fg") = ["ab", "   ", "de", " ", "fg"]
+     * StringUtils.splitByCamelCase("ab:cd:ef")   = ["ab", ":", "cd", ":", "ef"]
+     * StringUtils.splitByCamelCase("fooBar")     = ["foo", "Bar"]
+     * StringUtils.splitByCamelCase("foo200Bar")  = ["foo", "200", "Bar"]
+     * StringUtils.splitByCamelCase("ASFRules")   = ["ASF", "Rules"]
+     * 
+ * @param str the String to split, may be null + * @param camelCase whether to use so-called "camel-case" for letter types + * @return an array of parsed Strings, null if null String input + * @since 2.4 + */ + public static String[] splitByCharacterType(String str, boolean camelCase) { + if (str == null) { + return null; + } + if (str.length() == 0) { + return ArrayUtils.EMPTY_STRING_ARRAY; + } + char[] c = str.toCharArray(); + List list = new ArrayList(); + int tokenStart = 0; + int currentType = Character.getType(c[tokenStart]); + for (int pos = tokenStart + 1; pos < c.length; pos++) { + int type = Character.getType(c[pos]); + if (type == currentType) { + continue; + } + if (camelCase && type == Character.LOWERCASE_LETTER + && currentType == Character.UPPERCASE_LETTER) { + int newTokenStart = pos - 1; + if (newTokenStart != tokenStart) { + list.add(new String(c, tokenStart, newTokenStart - tokenStart)); + tokenStart = newTokenStart; + } + } else { + list.add(new String(c, tokenStart, pos - tokenStart)); + tokenStart = pos; + } + currentType = type; + } + list.add(new String(c, tokenStart, c.length - tokenStart)); + return (String[]) list.toArray(new String[list.size()]); + } + // Joining //----------------------------------------------------------------------- /** diff --git a/src/test/org/apache/commons/lang/StringUtilsTest.java b/src/test/org/apache/commons/lang/StringUtilsTest.java index dedfc98f5..f87e60403 100644 --- a/src/test/org/apache/commons/lang/StringUtilsTest.java +++ b/src/test/org/apache/commons/lang/StringUtilsTest.java @@ -847,7 +847,53 @@ private void innerTestSplitPreserveAllTokens(char separator, String sepStr, char assertEquals(msg, "a", res[0]); assertEquals(msg, str.substring(2), res[1]); } - + + public void testSplitByCharacterType() { + assertNull(StringUtils.splitByCharacterType(null)); + assertEquals(0, StringUtils.splitByCharacterType("").length); + assertNull(StringUtils.splitByCharacterType(null, true)); + assertEquals(0, StringUtils.splitByCharacterType("", true).length); + + final boolean camelCase = true; + + assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ", + "fg" }, StringUtils.splitByCharacterType("ab de fg"))); + + assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ", + "fg" }, StringUtils.splitByCharacterType("ab de fg", camelCase))); + + assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ", + "fg" }, StringUtils.splitByCharacterType("ab de fg"))); + + assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ", + "fg" }, StringUtils.splitByCharacterType("ab de fg", camelCase))); + + assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":", + "ef" }, StringUtils.splitByCharacterType("ab:cd:ef"))); + + assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":", + "ef" }, StringUtils.splitByCharacterType("ab:cd:ef", camelCase))); + + assertTrue(ArrayUtils.isEquals(new String[] { "foo", "B", "ar" }, + StringUtils.splitByCharacterType("fooBar"))); + + assertTrue(ArrayUtils.isEquals(new String[] { "foo", "Bar" }, + StringUtils.splitByCharacterType("fooBar", camelCase))); + + assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "B", "ar" }, + StringUtils.splitByCharacterType("foo200Bar"))); + + assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "Bar" }, + StringUtils.splitByCharacterType("foo200Bar", camelCase))); + + assertTrue(ArrayUtils.isEquals(new String[] { "ASFR", "ules" }, + StringUtils.splitByCharacterType("ASFRules"))); + + assertTrue(ArrayUtils.isEquals(new String[] { "ASF", "Rules" }, + StringUtils.splitByCharacterType("ASFRules", camelCase))); + + } + public void testDeprecatedDeleteSpace_String() { assertEquals(null, StringUtils.deleteSpaces(null)); assertEquals("", StringUtils.deleteSpaces(""));