diff --git a/src/java/org/apache/commons/lang/StringUtils.java b/src/java/org/apache/commons/lang/StringUtils.java index ecd5d5860..2746e060c 100644 --- a/src/java/org/apache/commons/lang/StringUtils.java +++ b/src/java/org/apache/commons/lang/StringUtils.java @@ -2228,7 +2228,7 @@ public class StringUtils { * @return an array of parsed Strings, null if null String was input */ public static String[] splitByWholeSeparator(String str, String separator) { - return splitByWholeSeparator( str, separator, -1 ) ; + return splitByWholeSeparatorWorker( str, separator, -1, false ) ; } /** @@ -2259,6 +2259,88 @@ public class StringUtils { * @return an array of parsed Strings, null if null String was input */ public static String[] splitByWholeSeparator( String str, String separator, int max ) { + return splitByWholeSeparatorWorker(str, separator, max, false); + } + + /** + *

Splits the provided text into an array, separator string specified.

+ * + *

The separator is not included in the returned String array. + * Adjacent separators are treated as separators for empty tokens. + * For more control over the split use the StrTokenizer class.

+ * + *

A null input String returns null. + * A null separator splits on whitespace.

+ * + *
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens(null, *)               = null
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("", *)                 = []
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null)      = ["ab", "de", "fg"]
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab   de fg", null)    = ["ab", "", "", "de", "fg"]
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab:cd:ef", ":")       = ["ab", "cd", "ef"]
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-") = ["ab", "cd", "ef"]
+     * 
+ * + * @param str the String to parse, may be null + * @param separator String containing the String to be used as a delimiter, + * null splits on whitespace + * @return an array of parsed Strings, null if null String was input + * @since 2.4 + */ + public static String[] splitByWholeSeparatorPreserveAllTokens(String str, String separator) { + return splitByWholeSeparatorWorker( str, separator, -1, true ) ; + } + + /** + *

Splits the provided text into an array, separator string specified. + * Returns a maximum of max substrings.

+ * + *

The separator is not included in the returned String array. + * Adjacent separators are treated as separators for empty tokens. + * For more control over the split use the StrTokenizer class.

+ * + *

A null input String returns null. + * A null separator splits on whitespace.

+ * + *
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens(null, *, *)               = null
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("", *, *)                 = []
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null, 0)      = ["ab", "de", "fg"]
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab   de fg", null, 0)    = ["ab", "", "", "de", "fg"]
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab:cd:ef", ":", 2)       = ["ab", "cd:ef"]
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-", 5) = ["ab", "cd", "ef"]
+     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-", 2) = ["ab", "cd-!-ef"]
+     * 
+ * + * @param str the String to parse, may be null + * @param separator String containing the String to be used as a delimiter, + * null splits on whitespace + * @param max the maximum number of elements to include in the returned + * array. A zero or negative value implies no limit. + * @return an array of parsed Strings, null if null String was input + * @since 2.4 + */ + public static String[] splitByWholeSeparatorPreserveAllTokens( String str, String separator, int max ) { + return splitByWholeSeparatorWorker(str, separator, max, true); + } + + /** + * Performs the logic for the splitByWholeSeparatorPreserveAllTokens methods. + * + * @param str the String to parse, may be null + * @param separator String containing the String to be used as a delimiter, + * null splits on whitespace + * @param max the maximum number of elements to include in the returned + * array. A zero or negative value implies no limit. + * @param preserveAllTokens if true, adjacent separators are + * treated as empty token separators; if false, adjacent + * separators are treated as one separator. + * @return an array of parsed Strings, null if null String input + * @since 2.4 + */ + private static String[] splitByWholeSeparatorWorker( String str, String separator, + int max, boolean preserveAllTokens ) + { if (str == null) { return null; } @@ -2271,7 +2353,7 @@ public class StringUtils { if ( ( separator == null ) || ( "".equals( separator ) ) ) { // Split on whitespace. - return split( str, null, max ) ; + return splitWorker( str, null, max, preserveAllTokens ) ; } @@ -2303,6 +2385,15 @@ public class StringUtils { } } else { // We found a consecutive occurrence of the separator, so skip it. + if( preserveAllTokens ) { + numberOfSubstrings += 1 ; + if ( numberOfSubstrings == max ) { + end = len ; + substrings.add( str.substring( beg ) ) ; + } else { + substrings.add( "" ); + } + } beg = end + separatorLength ; } } else { diff --git a/src/test/org/apache/commons/lang/StringUtilsTest.java b/src/test/org/apache/commons/lang/StringUtilsTest.java index 1629ed782..bf44c8dd7 100644 --- a/src/test/org/apache/commons/lang/StringUtilsTest.java +++ b/src/test/org/apache/commons/lang/StringUtilsTest.java @@ -486,6 +486,51 @@ public class StringUtilsTest extends TestCase { } } + public void testSplitByWholeSeparatorPreserveAllTokens_StringStringInt() { + assertEquals( null, StringUtils.splitByWholeSeparatorPreserveAllTokens( null, ".", -1 ) ) ; + + assertEquals( 0, StringUtils.splitByWholeSeparatorPreserveAllTokens( "", ".", -1 ).length ) ; + + // test whitespace + String input = "ab de fg" ; + String[] expected = new String[] { "ab", "", "", "de", "fg" } ; + + String[] actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, null, -1 ) ; + assertEquals( expected.length, actual.length ) ; + for ( int i = 0 ; i < actual.length ; i+= 1 ) { + assertEquals( expected[i], actual[i] ); + } + + // test delimiter singlechar + input = "1::2:::3::::4"; + expected = new String[] { "1", "", "2", "", "", "3", "", "", "", "4" }; + + actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, ":", -1 ) ; + assertEquals( expected.length, actual.length ) ; + for ( int i = 0 ; i < actual.length ; i+= 1 ) { + assertEquals( expected[i], actual[i] ); + } + + // test delimiter multichar + input = "1::2:::3::::4"; + expected = new String[] { "1", "2", ":3", "", "4" }; + + actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, "::", -1 ) ; + assertEquals( expected.length, actual.length ) ; + for ( int i = 0 ; i < actual.length ; i+= 1 ) { + assertEquals( expected[i], actual[i] ); + } + + // test delimiter char with max + input = "1::2::3:4"; + expected = new String[] { "1", "", "2", ":3:4" }; + + actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, ":", 4 ) ; + assertEquals( expected.length, actual.length ) ; + for ( int i = 0 ; i < actual.length ; i+= 1 ) { + assertEquals( expected[i], actual[i] ); + } + } public void testSplitPreserveAllTokens_String() { assertEquals(null, StringUtils.splitPreserveAllTokens(null));