http://issues.apache.org/bugzilla/show_bug.cgi?id=22692 :

- added new splitPreserveAllTokens methods to mirror the split functionality, preserving empty tokens indicated by adjacent tokens; - refactored logic of existing split method into splitWorker for sharing by new splitPreserveAllTokens methods git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/lang/trunk@137875 13f79535-47bb-0310-9956-ffa450edef68
2025-02-12 21:15:05 +00:00 · 2004-07-11 16:48:31 +00:00 · 2004-07-11 16:48:31 +00:00 · f919d93040
commit f919d93040
parent 3787473c50
1 changed files with 225 additions and 34 deletions
--- a/src/java/org/apache/commons/lang/StringUtils.java
+++ b/src/java/org/apache/commons/lang/StringUtils.java
@ -111,7 +111,7 @@
 * @author Al Chou
 * @author Michael Davey
 * @since 1.0
- * @version $Id: StringUtils.java,v 1.130 2004/05/24 20:15:44 fredrik Exp $
+ * @version $Id: StringUtils.java,v 1.131 2004/07/11 16:48:31 stevencaswell Exp $
 */
 public class StringUtils {
    // Performance testing notes (JDK 1.4, Jul03, scolebourne)
@ -1994,34 +1994,7 @@ public static String[] split(String str) {
     * @since 2.0
     */
    public static String[] split(String str, char separatorChar) {
-        // Performance tuned for 2.0 (JDK1.4)
-
-        if (str == null) {
-            return null;
-        }
-        int len = str.length();
-        if (len == 0) {
-            return ArrayUtils.EMPTY_STRING_ARRAY;
-        }
-        List list = new ArrayList();
-        int i = 0, start = 0;
-        boolean match = false;
-        while (i < len) {
-            if (str.charAt(i) == separatorChar) {
-                if (match) {
-                    list.add(str.substring(start, i));
-                    match = false;
-                }
-                start = ++i;
-                continue;
-            }
-            match = true;
-            i++;
-        }
-        if (match) {
-            list.add(str.substring(start, i));
-        }
-        return (String[]) list.toArray(new String[list.size()]);
+        return splitWorker(str, separatorChar, false);
    }

    /**
@ -2050,7 +2023,7 @@ public static String[] split(String str, char separatorChar) {
     * @return an array of parsed Strings, <code>null</code> if null String input
     */
    public static String[] split(String str, String separatorChars) {
-        return split(str, separatorChars, -1);
+        return splitWorker(str, separatorChars, -1, false);
    }

    /**
@ -2084,6 +2057,211 @@ public static String[] split(String str, String separatorChars) {
     * @return an array of parsed Strings, <code>null</code> if null String input
     */
    public static String[] split(String str, String separatorChars, int max) {
+        return splitWorker(str, separatorChars, max, false);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Splits the provided text into an array, using whitespace as the
+     * separator, preserving all tokens, including empty tokens created by 
+     * adjacent separators. This is an alternative to using StringTokenizer.
+     * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
+     *
+     * <p>The separator is not included in the returned String array.
+     * Adjacent separators are treated as separators for empty tokens.
+     * For more control over the split use the Tokenizer class.</p>
+     *
+     * <p>A <code>null</code> input String returns <code>null</code>.</p>
+     *
+     * <pre>
+     * StringUtils.splitPreserveAllTokens(null)       = null
+     * StringUtils.splitPreserveAllTokens("")         = []
+     * StringUtils.splitPreserveAllTokens("abc def")  = ["abc", "def"]
+     * StringUtils.splitPreserveAllTokens("abc  def") = ["abc", "", "def"]
+     * StringUtils.splitPreserveAllTokens(" abc ")    = ["", "abc", ""]
+     * </pre>
+     *
+     * @param str  the String to parse, may be <code>null</code>
+     * @return an array of parsed Strings, <code>null</code> if null String input
+     * @since 2.1
+     */
+    public static String[] splitPreserveAllTokens(String str) {
+        return splitWorker(str, null, -1, true);
+    }
+
+    /**
+     * <p>Splits the provided text into an array, separator specified,
+     * preserving all tokens, including empty tokens created by adjacent
+     * separators. This is an alternative to using StringTokenizer.</p>
+     *
+     * <p>The separator is not included in the returned String array.
+     * Adjacent separators are treated as separators for empty tokens.
+     * For more control over the split use the Tokenizer class.</p>
+     *
+     * <p>A <code>null</code> input String returns <code>null</code>.</p>
+     *
+     * <pre>
+     * StringUtils.splitPreserveAllTokens(null, *)         = null
+     * StringUtils.splitPreserveAllTokens("", *)           = []
+     * StringUtils.splitPreserveAllTokens("a.b.c", '.')    = ["a", "b", "c"]
+     * StringUtils.splitPreserveAllTokens("a..b.c", '.')   = ["a", "b", "c"]
+     * StringUtils.splitPreserveAllTokens("a:b:c", '.')    = ["a:b:c"]
+     * StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"]
+     * StringUtils.splitPreserveAllTokens("a b c", ' ')    = ["a", "b", "c"]
+     * StringUtils.splitPreserveAllTokens("a b c ", ' ')   = ["a", "b", "c", ""]
+     * StringUtils.splitPreserveAllTokens("a b c ", ' ')   = ["a", "b", "c", "", ""]
+     * StringUtils.splitPreserveAllTokens(" a b c", ' ')   = ["", a", "b", "c"]
+     * StringUtils.splitPreserveAllTokens("  a b c", ' ')  = ["", "", a", "b", "c"]
+     * StringUtils.splitPreserveAllTokens(" a b c ", ' ')  = ["", a", "b", "c", ""]
+     * </pre>
+     *
+     * @param str  the String to parse, may be <code>null</code>
+     * @param separatorChar  the character used as the delimiter,
+     *  <code>null</code> splits on whitespace
+     * @return an array of parsed Strings, <code>null</code> if null String input
+     * @since 2.1
+     */
+    public static String[] splitPreserveAllTokens(String str, char separatorChar) {
+        return splitWorker(str, separatorChar, true);
+    }
+
+    /**
+     * Performs the logic for the <code>split</code> and 
+     * <code>splitPreserveAllTokens</code> methods that do not return a
+     * maximum array length.
+     *
+     * @param str  the String to parse, may be <code>null</code>
+     * @param separatorChar the separate character
+     * @param preserveAllTokens if <code>true</code>, adjacent separators are
+     * treated as empty token separators; if <code>false</code>, adjacent
+     * separators are treated as one separator.
+     * @return an array of parsed Strings, <code>null</code> if null String input
+     */
+    private static String[] splitWorker(String str, char separatorChar, boolean preserveAllTokens) {
+        // Performance tuned for 2.0 (JDK1.4)
+
+        if (str == null) {
+            return null;
+        }
+        int len = str.length();
+        if (len == 0) {
+            return ArrayUtils.EMPTY_STRING_ARRAY;
+        }
+        List list = new ArrayList();
+        int i = 0, start = 0;
+        boolean match = false;
+        boolean lastMatch = false;
+        while (i < len) {
+            if (str.charAt(i) == separatorChar) {
+                if (match || preserveAllTokens) {
+                    list.add(str.substring(start, i));
+                    match = false;
+                    lastMatch = true;
+                }
+                start = ++i;
+                continue;
+            } else {
+                lastMatch = false;
+            }
+            match = true;
+            i++;
+        }
+        if (match || (preserveAllTokens && lastMatch)) {
+            list.add(str.substring(start, i));
+        }
+        return (String[]) list.toArray(new String[list.size()]);
+    }
+
+    /**
+     * <p>Splits the provided text into an array, separators specified, 
+     * preserving all tokens, including empty tokens created by adjacent
+     * separators. This is an alternative to using StringTokenizer.</p>
+     *
+     * <p>The separator is not included in the returned String array.
+     * Adjacent separators are treated as separators for empty tokens.
+     * For more control over the split use the Tokenizer class.</p>
+     *
+     * <p>A <code>null</code> input String returns <code>null</code>.
+     * A <code>null</code> separatorChars splits on whitespace.</p>
+     *
+     * <pre>
+     * StringUtils.splitPreserveAllTokens(null, *)           = null
+     * StringUtils.splitPreserveAllTokens("", *)             = []
+     * StringUtils.splitPreserveAllTokens("abc def", null)   = ["abc", "def"]
+     * StringUtils.splitPreserveAllTokens("abc def", " ")    = ["abc", "def"]
+     * StringUtils.splitPreserveAllTokens("abc  def", " ")   = ["abc", "", def"]
+     * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":")   = ["ab", "cd", "ef"]
+     * StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":")  = ["ab", "cd", "ef", ""]
+     * StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""]
+     * StringUtils.splitPreserveAllTokens("ab::cd:ef", ":")  = ["ab", "", cd", "ef"]
+     * StringUtils.splitPreserveAllTokens(":cd:ef", ":")     = ["", cd", "ef"]
+     * StringUtils.splitPreserveAllTokens("::cd:ef", ":")    = ["", "", cd", "ef"]
+     * StringUtils.splitPreserveAllTokens(":cd:ef:", ":")    = ["", cd", "ef", ""]
+     * </pre>
+     *
+     * @param str  the String to parse, may be <code>null</code>
+     * @param separatorChars  the characters used as the delimiters,
+     *  <code>null</code> splits on whitespace
+     * @return an array of parsed Strings, <code>null</code> if null String input
+     */
+    public static String[] splitPreserveAllTokens(String str, String separatorChars) {
+        return splitWorker(str, separatorChars, -1, true);
+    }
+
+    /**
+     * <p>Splits the provided text into an array with a maximum length,
+     * separators specified, preserving all tokens, including empty tokens 
+     * created by adjacent separators.</p>
+     *
+     * <p>The separator is not included in the returned String array.
+     * Adjacent separators are treated as separators for empty tokens.
+     * Adjacent separators are treated as one separator.</p>
+     *
+     * <p>A <code>null</code> input String returns <code>null</code>.
+     * A <code>null</code> separatorChars splits on whitespace.</p>
+     *
+     * <p>If more than <code>max</code> delimited substrings are found, the last
+     * returned string includes all characters after the first <code>max - 1</code>
+     * returned strings (including separator characters).</p>
+     *
+     * <pre>
+     * StringUtils.splitPreserveAllTokens(null, *, *)            = null
+     * StringUtils.splitPreserveAllTokens("", *, *)              = []
+     * StringUtils.splitPreserveAllTokens("ab de fg", null, 0)   = ["ab", "cd", "ef"]
+     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 0) = ["ab", "cd", "ef"]
+     * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0)    = ["ab", "cd", "ef"]
+     * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2)    = ["ab", "cd:ef"]
+     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 2) = ["ab", "  de fg"]
+     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 3) = ["ab", "", " de fg"]
+     * StringUtils.splitPreserveAllTokens("ab   de fg", null, 4) = ["ab", "", "", "de fg"]
+     * </pre>
+     *
+     * @param str  the String to parse, may be <code>null</code>
+     * @param separatorChars  the characters used as the delimiters,
+     *  <code>null</code> splits on whitespace
+     * @param max  the maximum number of elements to include in the
+     *  array. A zero or negative value implies no limit
+     * @return an array of parsed Strings, <code>null</code> if null String input
+     */
+    public static String[] splitPreserveAllTokens(String str, String separatorChars, int max) {
+        return splitWorker(str, separatorChars, max, true);
+    }
+
+    /**
+     * Performs the logic for the <code>split</code> and 
+     * <code>splitPreserveAllTokens</code> methods that return a maximum array 
+     * length.
+     *
+     * @param str  the String to parse, may be <code>null</code>
+     * @param separatorChars the separate character
+     * @param max  the maximum number of elements to include in the
+     *  array. A zero or negative value implies no limit.
+     * @param preserveAllTokens if <code>true</code>, adjacent separators are
+     * treated as empty token separators; if <code>false</code>, adjacent
+     * separators are treated as one separator.
+     * @return an array of parsed Strings, <code>null</code> if null String input
+     */
+    private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) {
        // Performance tuned for 2.0 (JDK1.4)
        // Direct code is quicker than StringTokenizer.
        // Also, StringTokenizer uses isSpace() not isWhitespace()
@ -2099,19 +2277,24 @@ public static String[] split(String str, String separatorChars, int max) {
        int sizePlus1 = 1;
        int i = 0, start = 0;
        boolean match = false;
+        boolean lastMatch = false;
        if (separatorChars == null) {
            // Null separator means use whitespace
            while (i < len) {
                if (Character.isWhitespace(str.charAt(i))) {
-                    if (match) {
+                    if (match || preserveAllTokens) {
+                        lastMatch = true;
                        if (sizePlus1++ == max) {
                            i = len;
+                            lastMatch = false;
                        }
                        list.add(str.substring(start, i));
                        match = false;
                    }
                    start = ++i;
                    continue;
+                } else {
+                    lastMatch = false;
                }
                match = true;
                i++;
@ -2121,15 +2304,19 @@ public static String[] split(String str, String separatorChars, int max) {
            char sep = separatorChars.charAt(0);
            while (i < len) {
                if (str.charAt(i) == sep) {
-                    if (match) {
+                    if (match || preserveAllTokens) {
+                        lastMatch = true;
                        if (sizePlus1++ == max) {
                            i = len;
+                            lastMatch = false;
                        }
                        list.add(str.substring(start, i));
                        match = false;
                    }
                    start = ++i;
                    continue;
+                } else {
+                    lastMatch = false;
                }
                match = true;
                i++;
@ -2138,21 +2325,25 @@ public static String[] split(String str, String separatorChars, int max) {
            // standard case
            while (i < len) {
                if (separatorChars.indexOf(str.charAt(i)) >= 0) {
-                    if (match) {
+                    if (match || preserveAllTokens) {
+                        lastMatch = true;
                        if (sizePlus1++ == max) {
                            i = len;
+                            lastMatch = false;
                        }
                        list.add(str.substring(start, i));
                        match = false;
                    }
                    start = ++i;
                    continue;
+                } else {
+                    lastMatch = false;
                }
                match = true;
                i++;
            }
        }
-        if (match) {
+        if (match || (preserveAllTokens && lastMatch)) {
            list.add(str.substring(start, i));
        }
        return (String[]) list.toArray(new String[list.size()]);