HADOOP-6623. Add StringUtils.split for non-escaped single-character separator. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@941508 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Thomas White 2010-05-05 21:46:31 +00:00
parent 06a13750a5
commit fe34ccdda8
3 changed files with 77 additions and 0 deletions

View File

@ -2,6 +2,11 @@ Hadoop Change Log
Trunk (unreleased changes)
IMPROVEMENTS
HADOOP-6623. Add StringUtils.split for non-escaped single-character
separator. (Todd Lipcon via tomwhite)
BUG FIXES
HADOOP-6730. Bug in FileContext#copy and provide base class for FileContext

View File

@ -383,6 +383,35 @@ public class StringUtils {
}
return strList.toArray(new String[strList.size()]);
}
/**
* Split a string using the given separator, with no escaping performed.
* @param str a string to be split. Note that this may not be null.
* @param separator a separator char
* @return an array of strings
*/
public static String[] split(
String str, char separator) {
// String.split returns a single empty result for splitting the empty
// string.
if ("".equals(str)) {
return new String[]{""};
}
ArrayList<String> strList = new ArrayList<String>();
int startIndex = 0;
int nextIndex = 0;
while ((nextIndex = str.indexOf((int)separator, startIndex)) != -1) {
strList.add(str.substring(startIndex, nextIndex));
startIndex = nextIndex + 1;
}
strList.add(str.substring(startIndex));
// remove trailing empty split(s)
int last = strList.size(); // last split
while (--last>=0 && "".equals(strList.get(last))) {
strList.remove(last);
}
return strList.toArray(new String[strList.size()]);
}
/**
* Finds the first occurrence of the separator character ignoring the escaped

View File

@ -78,6 +78,21 @@ public class TestStringUtils extends TestCase {
assertEquals(ESCAPED_STR_WITH_BOTH2, splits[0]);
}
public void testSimpleSplit() throws Exception {
final String[] TO_TEST = {
"a/b/c",
"a/b/c////",
"///a/b/c",
"",
"/",
"////"};
for (String testSubject : TO_TEST) {
assertArrayEquals("Testing '" + testSubject + "'",
testSubject.split("/"),
StringUtils.split(testSubject, '/'));
}
}
public void testUnescapeString() throws Exception {
assertEquals(NULL_STR, StringUtils.unEscapeString(NULL_STR));
assertEquals(EMPTY_STR, StringUtils.unEscapeString(EMPTY_STR));
@ -188,4 +203,32 @@ public class TestStringUtils extends TestCase {
assertEquals("Yy", StringUtils.camelize("yY"));
assertEquals("Zz", StringUtils.camelize("zZ"));
}
// Benchmark for StringUtils split
public static void main(String []args) {
final String TO_SPLIT = "foo,bar,baz,blah,blah";
for (boolean useOurs : new boolean[] { false, true }) {
for (int outer=0; outer < 10; outer++) {
long st = System.nanoTime();
int components = 0;
for (int inner=0; inner < 1000000; inner++) {
String[] res;
if (useOurs) {
res = StringUtils.split(TO_SPLIT, ',');
} else {
res = TO_SPLIT.split(",");
}
// be sure to use res, otherwise might be optimized out
components += res.length;
}
long et = System.nanoTime();
if (outer > 3) {
System.out.println(
(useOurs ? "StringUtils impl" : "Java impl") +
" #" + outer + ":" +
(et - st)/1000000 + "ms");
}
}
}
}
}