mirror of
https://github.com/apache/commons-lang.git
synced 2025-02-12 21:15:05 +00:00
- added new splitPreserveAllTokens methods to mirror the split functionality, preserving empty tokens indicated by adjacent tokens; - refactored logic of existing split method into splitWorker for sharing by new splitPreserveAllTokens methods git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/lang/trunk@137875 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3787473c50
commit
f919d93040
@ -111,7 +111,7 @@
|
||||
* @author Al Chou
|
||||
* @author Michael Davey
|
||||
* @since 1.0
|
||||
* @version $Id: StringUtils.java,v 1.130 2004/05/24 20:15:44 fredrik Exp $
|
||||
* @version $Id: StringUtils.java,v 1.131 2004/07/11 16:48:31 stevencaswell Exp $
|
||||
*/
|
||||
public class StringUtils {
|
||||
// Performance testing notes (JDK 1.4, Jul03, scolebourne)
|
||||
@ -1994,34 +1994,7 @@ public static String[] split(String str) {
|
||||
* @since 2.0
|
||||
*/
|
||||
public static String[] split(String str, char separatorChar) {
|
||||
// Performance tuned for 2.0 (JDK1.4)
|
||||
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
int len = str.length();
|
||||
if (len == 0) {
|
||||
return ArrayUtils.EMPTY_STRING_ARRAY;
|
||||
}
|
||||
List list = new ArrayList();
|
||||
int i = 0, start = 0;
|
||||
boolean match = false;
|
||||
while (i < len) {
|
||||
if (str.charAt(i) == separatorChar) {
|
||||
if (match) {
|
||||
list.add(str.substring(start, i));
|
||||
match = false;
|
||||
}
|
||||
start = ++i;
|
||||
continue;
|
||||
}
|
||||
match = true;
|
||||
i++;
|
||||
}
|
||||
if (match) {
|
||||
list.add(str.substring(start, i));
|
||||
}
|
||||
return (String[]) list.toArray(new String[list.size()]);
|
||||
return splitWorker(str, separatorChar, false);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2050,7 +2023,7 @@ public static String[] split(String str, char separatorChar) {
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
*/
|
||||
public static String[] split(String str, String separatorChars) {
|
||||
return split(str, separatorChars, -1);
|
||||
return splitWorker(str, separatorChars, -1, false);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2084,6 +2057,211 @@ public static String[] split(String str, String separatorChars) {
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
*/
|
||||
public static String[] split(String str, String separatorChars, int max) {
|
||||
return splitWorker(str, separatorChars, max, false);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
/**
|
||||
* <p>Splits the provided text into an array, using whitespace as the
|
||||
* separator, preserving all tokens, including empty tokens created by
|
||||
* adjacent separators. This is an alternative to using StringTokenizer.
|
||||
* Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
|
||||
*
|
||||
* <p>The separator is not included in the returned String array.
|
||||
* Adjacent separators are treated as separators for empty tokens.
|
||||
* For more control over the split use the Tokenizer class.</p>
|
||||
*
|
||||
* <p>A <code>null</code> input String returns <code>null</code>.</p>
|
||||
*
|
||||
* <pre>
|
||||
* StringUtils.splitPreserveAllTokens(null) = null
|
||||
* StringUtils.splitPreserveAllTokens("") = []
|
||||
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"]
|
||||
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "def"]
|
||||
* StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""]
|
||||
* </pre>
|
||||
*
|
||||
* @param str the String to parse, may be <code>null</code>
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
* @since 2.1
|
||||
*/
|
||||
public static String[] splitPreserveAllTokens(String str) {
|
||||
return splitWorker(str, null, -1, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Splits the provided text into an array, separator specified,
|
||||
* preserving all tokens, including empty tokens created by adjacent
|
||||
* separators. This is an alternative to using StringTokenizer.</p>
|
||||
*
|
||||
* <p>The separator is not included in the returned String array.
|
||||
* Adjacent separators are treated as separators for empty tokens.
|
||||
* For more control over the split use the Tokenizer class.</p>
|
||||
*
|
||||
* <p>A <code>null</code> input String returns <code>null</code>.</p>
|
||||
*
|
||||
* <pre>
|
||||
* StringUtils.splitPreserveAllTokens(null, *) = null
|
||||
* StringUtils.splitPreserveAllTokens("", *) = []
|
||||
* StringUtils.splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"]
|
||||
* StringUtils.splitPreserveAllTokens("a..b.c", '.') = ["a", "b", "c"]
|
||||
* StringUtils.splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"]
|
||||
* StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"]
|
||||
* StringUtils.splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"]
|
||||
* StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""]
|
||||
* StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""]
|
||||
* StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"]
|
||||
* StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"]
|
||||
* StringUtils.splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""]
|
||||
* </pre>
|
||||
*
|
||||
* @param str the String to parse, may be <code>null</code>
|
||||
* @param separatorChar the character used as the delimiter,
|
||||
* <code>null</code> splits on whitespace
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
* @since 2.1
|
||||
*/
|
||||
public static String[] splitPreserveAllTokens(String str, char separatorChar) {
|
||||
return splitWorker(str, separatorChar, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the logic for the <code>split</code> and
|
||||
* <code>splitPreserveAllTokens</code> methods that do not return a
|
||||
* maximum array length.
|
||||
*
|
||||
* @param str the String to parse, may be <code>null</code>
|
||||
* @param separatorChar the separate character
|
||||
* @param preserveAllTokens if <code>true</code>, adjacent separators are
|
||||
* treated as empty token separators; if <code>false</code>, adjacent
|
||||
* separators are treated as one separator.
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
*/
|
||||
private static String[] splitWorker(String str, char separatorChar, boolean preserveAllTokens) {
|
||||
// Performance tuned for 2.0 (JDK1.4)
|
||||
|
||||
if (str == null) {
|
||||
return null;
|
||||
}
|
||||
int len = str.length();
|
||||
if (len == 0) {
|
||||
return ArrayUtils.EMPTY_STRING_ARRAY;
|
||||
}
|
||||
List list = new ArrayList();
|
||||
int i = 0, start = 0;
|
||||
boolean match = false;
|
||||
boolean lastMatch = false;
|
||||
while (i < len) {
|
||||
if (str.charAt(i) == separatorChar) {
|
||||
if (match || preserveAllTokens) {
|
||||
list.add(str.substring(start, i));
|
||||
match = false;
|
||||
lastMatch = true;
|
||||
}
|
||||
start = ++i;
|
||||
continue;
|
||||
} else {
|
||||
lastMatch = false;
|
||||
}
|
||||
match = true;
|
||||
i++;
|
||||
}
|
||||
if (match || (preserveAllTokens && lastMatch)) {
|
||||
list.add(str.substring(start, i));
|
||||
}
|
||||
return (String[]) list.toArray(new String[list.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Splits the provided text into an array, separators specified,
|
||||
* preserving all tokens, including empty tokens created by adjacent
|
||||
* separators. This is an alternative to using StringTokenizer.</p>
|
||||
*
|
||||
* <p>The separator is not included in the returned String array.
|
||||
* Adjacent separators are treated as separators for empty tokens.
|
||||
* For more control over the split use the Tokenizer class.</p>
|
||||
*
|
||||
* <p>A <code>null</code> input String returns <code>null</code>.
|
||||
* A <code>null</code> separatorChars splits on whitespace.</p>
|
||||
*
|
||||
* <pre>
|
||||
* StringUtils.splitPreserveAllTokens(null, *) = null
|
||||
* StringUtils.splitPreserveAllTokens("", *) = []
|
||||
* StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"]
|
||||
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"]
|
||||
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"]
|
||||
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"]
|
||||
* StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""]
|
||||
* StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""]
|
||||
* StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"]
|
||||
* StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"]
|
||||
* StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"]
|
||||
* StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""]
|
||||
* </pre>
|
||||
*
|
||||
* @param str the String to parse, may be <code>null</code>
|
||||
* @param separatorChars the characters used as the delimiters,
|
||||
* <code>null</code> splits on whitespace
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
*/
|
||||
public static String[] splitPreserveAllTokens(String str, String separatorChars) {
|
||||
return splitWorker(str, separatorChars, -1, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Splits the provided text into an array with a maximum length,
|
||||
* separators specified, preserving all tokens, including empty tokens
|
||||
* created by adjacent separators.</p>
|
||||
*
|
||||
* <p>The separator is not included in the returned String array.
|
||||
* Adjacent separators are treated as separators for empty tokens.
|
||||
* Adjacent separators are treated as one separator.</p>
|
||||
*
|
||||
* <p>A <code>null</code> input String returns <code>null</code>.
|
||||
* A <code>null</code> separatorChars splits on whitespace.</p>
|
||||
*
|
||||
* <p>If more than <code>max</code> delimited substrings are found, the last
|
||||
* returned string includes all characters after the first <code>max - 1</code>
|
||||
* returned strings (including separator characters).</p>
|
||||
*
|
||||
* <pre>
|
||||
* StringUtils.splitPreserveAllTokens(null, *, *) = null
|
||||
* StringUtils.splitPreserveAllTokens("", *, *) = []
|
||||
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
|
||||
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
|
||||
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
|
||||
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
|
||||
* StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"]
|
||||
* StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"]
|
||||
* StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"]
|
||||
* </pre>
|
||||
*
|
||||
* @param str the String to parse, may be <code>null</code>
|
||||
* @param separatorChars the characters used as the delimiters,
|
||||
* <code>null</code> splits on whitespace
|
||||
* @param max the maximum number of elements to include in the
|
||||
* array. A zero or negative value implies no limit
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
*/
|
||||
public static String[] splitPreserveAllTokens(String str, String separatorChars, int max) {
|
||||
return splitWorker(str, separatorChars, max, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the logic for the <code>split</code> and
|
||||
* <code>splitPreserveAllTokens</code> methods that return a maximum array
|
||||
* length.
|
||||
*
|
||||
* @param str the String to parse, may be <code>null</code>
|
||||
* @param separatorChars the separate character
|
||||
* @param max the maximum number of elements to include in the
|
||||
* array. A zero or negative value implies no limit.
|
||||
* @param preserveAllTokens if <code>true</code>, adjacent separators are
|
||||
* treated as empty token separators; if <code>false</code>, adjacent
|
||||
* separators are treated as one separator.
|
||||
* @return an array of parsed Strings, <code>null</code> if null String input
|
||||
*/
|
||||
private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) {
|
||||
// Performance tuned for 2.0 (JDK1.4)
|
||||
// Direct code is quicker than StringTokenizer.
|
||||
// Also, StringTokenizer uses isSpace() not isWhitespace()
|
||||
@ -2099,19 +2277,24 @@ public static String[] split(String str, String separatorChars, int max) {
|
||||
int sizePlus1 = 1;
|
||||
int i = 0, start = 0;
|
||||
boolean match = false;
|
||||
boolean lastMatch = false;
|
||||
if (separatorChars == null) {
|
||||
// Null separator means use whitespace
|
||||
while (i < len) {
|
||||
if (Character.isWhitespace(str.charAt(i))) {
|
||||
if (match) {
|
||||
if (match || preserveAllTokens) {
|
||||
lastMatch = true;
|
||||
if (sizePlus1++ == max) {
|
||||
i = len;
|
||||
lastMatch = false;
|
||||
}
|
||||
list.add(str.substring(start, i));
|
||||
match = false;
|
||||
}
|
||||
start = ++i;
|
||||
continue;
|
||||
} else {
|
||||
lastMatch = false;
|
||||
}
|
||||
match = true;
|
||||
i++;
|
||||
@ -2121,15 +2304,19 @@ public static String[] split(String str, String separatorChars, int max) {
|
||||
char sep = separatorChars.charAt(0);
|
||||
while (i < len) {
|
||||
if (str.charAt(i) == sep) {
|
||||
if (match) {
|
||||
if (match || preserveAllTokens) {
|
||||
lastMatch = true;
|
||||
if (sizePlus1++ == max) {
|
||||
i = len;
|
||||
lastMatch = false;
|
||||
}
|
||||
list.add(str.substring(start, i));
|
||||
match = false;
|
||||
}
|
||||
start = ++i;
|
||||
continue;
|
||||
} else {
|
||||
lastMatch = false;
|
||||
}
|
||||
match = true;
|
||||
i++;
|
||||
@ -2138,21 +2325,25 @@ public static String[] split(String str, String separatorChars, int max) {
|
||||
// standard case
|
||||
while (i < len) {
|
||||
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
|
||||
if (match) {
|
||||
if (match || preserveAllTokens) {
|
||||
lastMatch = true;
|
||||
if (sizePlus1++ == max) {
|
||||
i = len;
|
||||
lastMatch = false;
|
||||
}
|
||||
list.add(str.substring(start, i));
|
||||
match = false;
|
||||
}
|
||||
start = ++i;
|
||||
continue;
|
||||
} else {
|
||||
lastMatch = false;
|
||||
}
|
||||
match = true;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (match) {
|
||||
if (match || (preserveAllTokens && lastMatch)) {
|
||||
list.add(str.substring(start, i));
|
||||
}
|
||||
return (String[]) list.toArray(new String[list.size()]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user