[LANG-192] addSplitByCharacterType and camelCase variant

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@611288 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Matthew Jason Benson 2008-01-11 20:13:02 +00:00
parent 400f92899e
commit ea810cbbbd
2 changed files with 126 additions and 1 deletions

View File

@ -2607,6 +2607,85 @@ private static String[] splitWorker(String str, String separatorChars, int max,
return (String[]) list.toArray(new String[list.size()]);
}
/**
* <p>Splits a String by Character type as returned by
* <code>java.lang.Character.getType(char)</code>. Groups of contiguous
* characters of the same type are returned as complete tokens.
* <pre>
* StringUtils.splitByCamelCase(null) = null
* StringUtils.splitByCamelCase("") = []
* StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
* StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
* StringUtils.splitByCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"]
* StringUtils.splitByCamelCase("fooBar") = ["foo", "B", "ar"]
* StringUtils.splitByCamelCase("foo200Bar") = ["foo", "200", "B", "ar"]
* StringUtils.splitByCamelCase("ASFRules") = ["ASFR", "ules"]
* </pre>
* @param str the String to split, may be <code>null</code>
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.4
*/
public static String[] splitByCharacterType(String str) {
return splitByCharacterType(str, false);
}
/**
* <p>Splits a String by Character type as returned by
* <code>java.lang.Character.getType(char)</code>. Groups of contiguous
* characters of the same type are returned as complete tokens, with the
* following exception: if <code>camelCase</code> is <code>true</code>,
* the character of type <code>Character.UPPERCASE_LETTER</code>, if any,
* immediately preceding a token of type <code>Character.LOWERCASE_LETTER</code>
* will belong to the following token rather than to the preceding, if any,
* <code>Character.UPPERCASE_LETTER</code> token.
* <pre>
* StringUtils.splitByCamelCase(null) = null
* StringUtils.splitByCamelCase("") = []
* StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
* StringUtils.splitByCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
* StringUtils.splitByCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"]
* StringUtils.splitByCamelCase("fooBar") = ["foo", "Bar"]
* StringUtils.splitByCamelCase("foo200Bar") = ["foo", "200", "Bar"]
* StringUtils.splitByCamelCase("ASFRules") = ["ASF", "Rules"]
* </pre>
* @param str the String to split, may be <code>null</code>
* @param camelCase whether to use so-called "camel-case" for letter types
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.4
*/
public static String[] splitByCharacterType(String str, boolean camelCase) {
if (str == null) {
return null;
}
if (str.length() == 0) {
return ArrayUtils.EMPTY_STRING_ARRAY;
}
char[] c = str.toCharArray();
List list = new ArrayList();
int tokenStart = 0;
int currentType = Character.getType(c[tokenStart]);
for (int pos = tokenStart + 1; pos < c.length; pos++) {
int type = Character.getType(c[pos]);
if (type == currentType) {
continue;
}
if (camelCase && type == Character.LOWERCASE_LETTER
&& currentType == Character.UPPERCASE_LETTER) {
int newTokenStart = pos - 1;
if (newTokenStart != tokenStart) {
list.add(new String(c, tokenStart, newTokenStart - tokenStart));
tokenStart = newTokenStart;
}
} else {
list.add(new String(c, tokenStart, pos - tokenStart));
tokenStart = pos;
}
currentType = type;
}
list.add(new String(c, tokenStart, c.length - tokenStart));
return (String[]) list.toArray(new String[list.size()]);
}
// Joining
//-----------------------------------------------------------------------
/**

View File

@ -848,6 +848,52 @@ private void innerTestSplitPreserveAllTokens(char separator, String sepStr, char
assertEquals(msg, str.substring(2), res[1]);
}
public void testSplitByCharacterType() {
assertNull(StringUtils.splitByCharacterType(null));
assertEquals(0, StringUtils.splitByCharacterType("").length);
assertNull(StringUtils.splitByCharacterType(null, true));
assertEquals(0, StringUtils.splitByCharacterType("", true).length);
final boolean camelCase = true;
assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
"fg" }, StringUtils.splitByCharacterType("ab de fg")));
assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
"fg" }, StringUtils.splitByCharacterType("ab de fg", camelCase)));
assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
"fg" }, StringUtils.splitByCharacterType("ab de fg")));
assertTrue(ArrayUtils.isEquals(new String[] { "ab", " ", "de", " ",
"fg" }, StringUtils.splitByCharacterType("ab de fg", camelCase)));
assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":",
"ef" }, StringUtils.splitByCharacterType("ab:cd:ef")));
assertTrue(ArrayUtils.isEquals(new String[] { "ab", ":", "cd", ":",
"ef" }, StringUtils.splitByCharacterType("ab:cd:ef", camelCase)));
assertTrue(ArrayUtils.isEquals(new String[] { "foo", "B", "ar" },
StringUtils.splitByCharacterType("fooBar")));
assertTrue(ArrayUtils.isEquals(new String[] { "foo", "Bar" },
StringUtils.splitByCharacterType("fooBar", camelCase)));
assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "B", "ar" },
StringUtils.splitByCharacterType("foo200Bar")));
assertTrue(ArrayUtils.isEquals(new String[] { "foo", "200", "Bar" },
StringUtils.splitByCharacterType("foo200Bar", camelCase)));
assertTrue(ArrayUtils.isEquals(new String[] { "ASFR", "ules" },
StringUtils.splitByCharacterType("ASFRules")));
assertTrue(ArrayUtils.isEquals(new String[] { "ASF", "Rules" },
StringUtils.splitByCharacterType("ASFRules", camelCase)));
}
public void testDeprecatedDeleteSpace_String() {
assertEquals(null, StringUtils.deleteSpaces(null));
assertEquals("", StringUtils.deleteSpaces(""));