Applying my patch from LANG-257, adding Cameron Whitehead's desired splitByWholeSeparatorPreserveAllTokens methods. In addition to the patch, I've added @since 2.4 tags to the javadoc

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@612367 13f79535-47bb-0310-9956-ffa450edef68
2008-01-16 06:43:30 +00:00 · 2008-01-16 06:43:30 +00:00 · 2a94f969ed
parent 77e9314410
commit 2a94f969ed
2 changed files with 138 additions and 2 deletions
--- a/src/java/org/apache/commons/lang/StringUtils.java
+++ b/src/java/org/apache/commons/lang/StringUtils.java
@ -2228,7 +2228,7 @@ public class StringUtils {
     * @return an array of parsed Strings, <code>null</code> if null String was input
     */
    public static String[] splitByWholeSeparator(String str, String separator) {
-        return splitByWholeSeparator( str, separator, -1 ) ;
+        return splitByWholeSeparatorWorker( str, separator, -1, false ) ;
    }
    /**
@ -2259,6 +2259,88 @@ public class StringUtils {
     * @return an array of parsed Strings, <code>null</code> if null String was input
     */
    public static String[] splitByWholeSeparator( String str, String separator, int max ) {
        return splitByWholeSeparatorWorker(str, separator, max, false);
    }
    /**
     * <p>Splits the provided text into an array, separator string specified. </p>
     *
     * <p>The separator is not included in the returned String array.
     * Adjacent separators are treated as separators for empty tokens.
     * For more control over the split use the StrTokenizer class.</p>
     *
     * <p>A <code>null</code> input String returns <code>null</code>.
     * A <code>null</code> separator splits on whitespace.</p>
     *
     * <pre>
     * StringUtils.splitByWholeSeparatorPreserveAllTokens(null, *)               = null
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("", *)                 = []
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null)      = ["ab", "de", "fg"]
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab   de fg", null)    = ["ab", "", "", "de", "fg"]
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab:cd:ef", ":")       = ["ab", "cd", "ef"]
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-") = ["ab", "cd", "ef"]
     * </pre>
     *
     * @param str  the String to parse, may be null
     * @param separator  String containing the String to be used as a delimiter,
     *  <code>null</code> splits on whitespace
     * @return an array of parsed Strings, <code>null</code> if null String was input
     * @since 2.4
     */
    public static String[] splitByWholeSeparatorPreserveAllTokens(String str, String separator) {
        return splitByWholeSeparatorWorker( str, separator, -1, true ) ;
    }
    /**
     * <p>Splits the provided text into an array, separator string specified.
     * Returns a maximum of <code>max</code> substrings.</p>
     *
     * <p>The separator is not included in the returned String array.
     * Adjacent separators are treated as separators for empty tokens.
     * For more control over the split use the StrTokenizer class.</p>
     *
     * <p>A <code>null</code> input String returns <code>null</code>.
     * A <code>null</code> separator splits on whitespace.</p>
     *
     * <pre>
     * StringUtils.splitByWholeSeparatorPreserveAllTokens(null, *, *)               = null
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("", *, *)                 = []
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null, 0)      = ["ab", "de", "fg"]
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab   de fg", null, 0)    = ["ab", "", "", "de", "fg"]
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab:cd:ef", ":", 2)       = ["ab", "cd:ef"]
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-", 5) = ["ab", "cd", "ef"]
     * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-", 2) = ["ab", "cd-!-ef"]
     * </pre>
     *
     * @param str  the String to parse, may be null
     * @param separator  String containing the String to be used as a delimiter,
     *  <code>null</code> splits on whitespace
     * @param max  the maximum number of elements to include in the returned
     *  array. A zero or negative value implies no limit.
     * @return an array of parsed Strings, <code>null</code> if null String was input
     * @since 2.4
     */
    public static String[] splitByWholeSeparatorPreserveAllTokens( String str, String separator, int max ) {
        return splitByWholeSeparatorWorker(str, separator, max, true);
    }
    /**
     * Performs the logic for the <code>splitByWholeSeparatorPreserveAllTokens</code> methods.
     *
     * @param str  the String to parse, may be <code>null</code>
     * @param separator  String containing the String to be used as a delimiter,
     *  <code>null</code> splits on whitespace
     * @param max  the maximum number of elements to include in the returned
     *  array. A zero or negative value implies no limit.
     * @param preserveAllTokens if <code>true</code>, adjacent separators are
     * treated as empty token separators; if <code>false</code>, adjacent
     * separators are treated as one separator.
     * @return an array of parsed Strings, <code>null</code> if null String input
     * @since 2.4
     */
    private static String[] splitByWholeSeparatorWorker( String str, String separator, 
                                                         int max, boolean preserveAllTokens ) 
    {
        if (str == null) {
            return null;
        }
@ -2271,7 +2353,7 @@ public class StringUtils {
        if ( ( separator == null ) || ( "".equals( separator ) ) ) {
            // Split on whitespace.
-            return split( str, null, max ) ;
+            return splitWorker( str, null, max, preserveAllTokens ) ;
        }
@ -2303,6 +2385,15 @@ public class StringUtils {
                    }
                } else {
                    // We found a consecutive occurrence of the separator, so skip it.
                    if( preserveAllTokens ) {
                        numberOfSubstrings += 1 ;
                        if ( numberOfSubstrings == max ) {
                            end = len ;
                            substrings.add( str.substring( beg ) ) ;
                        } else {
                            substrings.add( "" );
                        }
                    }
                    beg = end + separatorLength ;
                }
            } else {
--- a/src/test/org/apache/commons/lang/StringUtilsTest.java
+++ b/src/test/org/apache/commons/lang/StringUtilsTest.java
@ -486,6 +486,51 @@ public class StringUtilsTest extends TestCase {
        }
    }
    public void testSplitByWholeSeparatorPreserveAllTokens_StringStringInt() {
        assertEquals( null, StringUtils.splitByWholeSeparatorPreserveAllTokens( null, ".", -1 ) ) ;
        assertEquals( 0, StringUtils.splitByWholeSeparatorPreserveAllTokens( "", ".", -1 ).length ) ;
        // test whitespace
        String input = "ab   de fg" ;
        String[] expected = new String[] { "ab", "", "", "de", "fg" } ;
        String[] actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, null, -1 ) ;
        assertEquals( expected.length, actual.length ) ;
        for ( int i = 0 ; i < actual.length ; i+= 1 ) {
            assertEquals( expected[i], actual[i] );
        }
        // test delimiter singlechar
        input = "1::2:::3::::4";
        expected = new String[] { "1", "", "2", "", "", "3", "", "", "", "4" };
        actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, ":", -1 ) ;
        assertEquals( expected.length, actual.length ) ;
        for ( int i = 0 ; i < actual.length ; i+= 1 ) {
            assertEquals( expected[i], actual[i] );
        }
        // test delimiter multichar
        input = "1::2:::3::::4";
        expected = new String[] { "1", "2", ":3", "", "4" };
        actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, "::", -1 ) ;
        assertEquals( expected.length, actual.length ) ;
        for ( int i = 0 ; i < actual.length ; i+= 1 ) {
            assertEquals( expected[i], actual[i] );
        }
        // test delimiter char with max
        input = "1::2::3:4";
        expected = new String[] { "1", "", "2", ":3:4" };
        actual = StringUtils.splitByWholeSeparatorPreserveAllTokens( input, ":", 4 ) ;
        assertEquals( expected.length, actual.length ) ;
        for ( int i = 0 ; i < actual.length ; i+= 1 ) {
            assertEquals( expected[i], actual[i] );
        }
    }
    public void testSplitPreserveAllTokens_String() {
        assertEquals(null, StringUtils.splitPreserveAllTokens(null));