HADOOP-6623. Add StringUtils.split for non-escaped single-character separator. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@941508 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
06a13750a5
commit
fe34ccdda8
|
@ -2,6 +2,11 @@ Hadoop Change Log
|
|||
|
||||
Trunk (unreleased changes)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HADOOP-6623. Add StringUtils.split for non-escaped single-character
|
||||
separator. (Todd Lipcon via tomwhite)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-6730. Bug in FileContext#copy and provide base class for FileContext
|
||||
|
|
|
@ -383,6 +383,35 @@ public class StringUtils {
|
|||
}
|
||||
return strList.toArray(new String[strList.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a string using the given separator, with no escaping performed.
|
||||
* @param str a string to be split. Note that this may not be null.
|
||||
* @param separator a separator char
|
||||
* @return an array of strings
|
||||
*/
|
||||
public static String[] split(
|
||||
String str, char separator) {
|
||||
// String.split returns a single empty result for splitting the empty
|
||||
// string.
|
||||
if ("".equals(str)) {
|
||||
return new String[]{""};
|
||||
}
|
||||
ArrayList<String> strList = new ArrayList<String>();
|
||||
int startIndex = 0;
|
||||
int nextIndex = 0;
|
||||
while ((nextIndex = str.indexOf((int)separator, startIndex)) != -1) {
|
||||
strList.add(str.substring(startIndex, nextIndex));
|
||||
startIndex = nextIndex + 1;
|
||||
}
|
||||
strList.add(str.substring(startIndex));
|
||||
// remove trailing empty split(s)
|
||||
int last = strList.size(); // last split
|
||||
while (--last>=0 && "".equals(strList.get(last))) {
|
||||
strList.remove(last);
|
||||
}
|
||||
return strList.toArray(new String[strList.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the first occurrence of the separator character ignoring the escaped
|
||||
|
|
|
@ -78,6 +78,21 @@ public class TestStringUtils extends TestCase {
|
|||
assertEquals(ESCAPED_STR_WITH_BOTH2, splits[0]);
|
||||
}
|
||||
|
||||
public void testSimpleSplit() throws Exception {
|
||||
final String[] TO_TEST = {
|
||||
"a/b/c",
|
||||
"a/b/c////",
|
||||
"///a/b/c",
|
||||
"",
|
||||
"/",
|
||||
"////"};
|
||||
for (String testSubject : TO_TEST) {
|
||||
assertArrayEquals("Testing '" + testSubject + "'",
|
||||
testSubject.split("/"),
|
||||
StringUtils.split(testSubject, '/'));
|
||||
}
|
||||
}
|
||||
|
||||
public void testUnescapeString() throws Exception {
|
||||
assertEquals(NULL_STR, StringUtils.unEscapeString(NULL_STR));
|
||||
assertEquals(EMPTY_STR, StringUtils.unEscapeString(EMPTY_STR));
|
||||
|
@ -188,4 +203,32 @@ public class TestStringUtils extends TestCase {
|
|||
assertEquals("Yy", StringUtils.camelize("yY"));
|
||||
assertEquals("Zz", StringUtils.camelize("zZ"));
|
||||
}
|
||||
|
||||
// Benchmark for StringUtils split
|
||||
public static void main(String []args) {
|
||||
final String TO_SPLIT = "foo,bar,baz,blah,blah";
|
||||
for (boolean useOurs : new boolean[] { false, true }) {
|
||||
for (int outer=0; outer < 10; outer++) {
|
||||
long st = System.nanoTime();
|
||||
int components = 0;
|
||||
for (int inner=0; inner < 1000000; inner++) {
|
||||
String[] res;
|
||||
if (useOurs) {
|
||||
res = StringUtils.split(TO_SPLIT, ',');
|
||||
} else {
|
||||
res = TO_SPLIT.split(",");
|
||||
}
|
||||
// be sure to use res, otherwise might be optimized out
|
||||
components += res.length;
|
||||
}
|
||||
long et = System.nanoTime();
|
||||
if (outer > 3) {
|
||||
System.out.println(
|
||||
(useOurs ? "StringUtils impl" : "Java impl") +
|
||||
" #" + outer + ":" +
|
||||
(et - st)/1000000 + "ms");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue