Cleanup split strings by comma method

We have some methods Strings#splitStringByCommaToArray and
Strings#splitStringByCommaToSet. It is not obvious that the former
leaves whitespace and the latter trims it. We also have
Strings#tokenizeToStringArray which tokenizes a string to an array, and
trims whitespace. It seems the right thing to do here is to rename
Strings#splitStringByCommaToSet to Strings#tokenizeByCommaToSet so that
its name is aligned with another method that tokenizes by a delimiter
and trims whitespace. We also cleanup the code here, removing an
unneeded splitting by delimiter to set method.

Relates #27715
This commit is contained in:
Jason Tedor 2017-12-08 12:17:12 -05:00 committed by GitHub
parent 8b49b3f8af
commit 5c9415a4d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 52 additions and 114 deletions

View File

@ -41,6 +41,7 @@ import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.function.Supplier;
import static java.util.Collections.unmodifiableSet;
import static org.elasticsearch.common.util.set.Sets.newHashSet;
@ -410,62 +411,27 @@ public class Strings {
return collection.toArray(new String[collection.size()]);
}
public static Set<String> splitStringByCommaToSet(final String s) {
return splitStringToSet(s, ',');
}
public static String[] splitStringByCommaToArray(final String s) {
if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
else return s.split(",");
/**
* Tokenize the specified string by commas to a set, trimming whitespace and ignoring empty tokens.
*
* @param s the string to tokenize
* @return the set of tokens
*/
public static Set<String> tokenizeByCommaToSet(final String s) {
if (s == null) return Collections.emptySet();
return tokenizeToCollection(s, ",", HashSet::new);
}
/**
* A convenience method for splitting a delimited string into
* a set and trimming leading and trailing whitespace from all
* split strings.
* Split the specified string by commas to an array.
*
* @param s the string to split
* @param c the delimiter to split on
* @return the set of split strings
* @return the array of split values
* @see String#split(String)
*/
public static Set<String> splitStringToSet(final String s, final char c) {
if (s == null || s.isEmpty()) {
return Collections.emptySet();
}
final char[] chars = s.toCharArray();
int count = 1;
for (final char x : chars) {
if (x == c) {
count++;
}
}
final Set<String> result = new HashSet<>(count);
final int len = chars.length;
int start = 0; // starting index in chars of the current substring.
int pos = 0; // current index in chars.
int end = 0; // the position of the end of the current token
for (; pos < len; pos++) {
if (chars[pos] == c) {
int size = end - start;
if (size > 0) { // only add non empty strings
result.add(new String(chars, start, size));
}
start = pos + 1;
end = start;
} else if (Character.isWhitespace(chars[pos])) {
if (start == pos) {
// skip over preceding whitespace
start++;
}
} else {
end = pos + 1;
}
}
int size = end - start;
if (size > 0) {
result.add(new String(chars, start, size));
}
return result;
public static String[] splitStringByCommaToArray(final String s) {
if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
else return s.split(",");
}
/**
@ -499,7 +465,7 @@ public class Strings {
* tokens. A delimiter is always a single character; for multi-character
* delimiters, consider using <code>delimitedListToStringArray</code>
*
* @param str the String to tokenize
* @param s the String to tokenize
* @param delimiters the delimiter characters, assembled as String
* (each of those characters is individually considered as delimiter).
* @return an array of the tokens
@ -507,48 +473,35 @@ public class Strings {
* @see java.lang.String#trim()
* @see #delimitedListToStringArray
*/
public static String[] tokenizeToStringArray(String str, String delimiters) {
return tokenizeToStringArray(str, delimiters, true, true);
public static String[] tokenizeToStringArray(final String s, final String delimiters) {
return toStringArray(tokenizeToCollection(s, delimiters, ArrayList::new));
}
/**
* Tokenize the given String into a String array via a StringTokenizer.
* <p>The given delimiters string is supposed to consist of any number of
* delimiter characters. Each of those characters can be used to separate
* tokens. A delimiter is always a single character; for multi-character
* delimiters, consider using <code>delimitedListToStringArray</code>
* Tokenizes the specified string to a collection using the specified delimiters as the token delimiters. This method trims whitespace
* from tokens and ignores empty tokens.
*
* @param str the String to tokenize
* @param delimiters the delimiter characters, assembled as String
* (each of those characters is individually considered as delimiter)
* @param trimTokens trim the tokens via String's <code>trim</code>
* @param ignoreEmptyTokens omit empty tokens from the result array
* (only applies to tokens that are empty after trimming; StringTokenizer
* will not consider subsequent delimiters as token in the first place).
* @return an array of the tokens (<code>null</code> if the input String
* was <code>null</code>)
* @param s the string to tokenize.
* @param delimiters the token delimiters
* @param supplier a collection supplier
* @param <T> the type of the collection
* @return the tokens
* @see java.util.StringTokenizer
* @see java.lang.String#trim()
* @see #delimitedListToStringArray
*/
public static String[] tokenizeToStringArray(
String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) {
if (str == null) {
private static <T extends Collection<String>> T tokenizeToCollection(
final String s, final String delimiters, final Supplier<T> supplier) {
if (s == null) {
return null;
}
StringTokenizer st = new StringTokenizer(str, delimiters);
List<String> tokens = new ArrayList<>();
while (st.hasMoreTokens()) {
String token = st.nextToken();
if (trimTokens) {
token = token.trim();
}
if (!ignoreEmptyTokens || token.length() > 0) {
final StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
final T tokens = supplier.get();
while (tokenizer.hasMoreTokens()) {
final String token = tokenizer.nextToken().trim();
if (token.length() > 0) {
tokens.add(token);
}
}
return toStringArray(tokens);
return tokens;
}
/**

View File

@ -94,7 +94,7 @@ public abstract class AbstractRestChannel implements RestChannel {
Set<String> includes = Collections.emptySet();
Set<String> excludes = Collections.emptySet();
if (useFiltering) {
Set<String> filters = Strings.splitStringByCommaToSet(filterPath);
Set<String> filters = Strings.tokenizeByCommaToSet(filterPath);
includes = filters.stream().filter(INCLUDE_FILTER).collect(toSet());
excludes = filters.stream().filter(EXCLUDE_FILTER).map(f -> f.substring(1)).collect(toSet());
}

View File

@ -76,7 +76,7 @@ public class RestNodesInfoAction extends BaseRestHandler {
// still, /_nodes/_local (or any other node id) should work and be treated as usual
// this means one must differentiate between allowed metrics and arbitrary node ids in the same place
if (request.hasParam("nodeId") && !request.hasParam("metrics")) {
Set<String> metricsOrNodeIds = Strings.splitStringByCommaToSet(request.param("nodeId", "_all"));
Set<String> metricsOrNodeIds = Strings.tokenizeByCommaToSet(request.param("nodeId", "_all"));
boolean isMetricsOnly = ALLOWED_METRICS.containsAll(metricsOrNodeIds);
if (isMetricsOnly) {
nodeIds = new String[]{"_all"};
@ -87,7 +87,7 @@ public class RestNodesInfoAction extends BaseRestHandler {
}
} else {
nodeIds = Strings.splitStringByCommaToArray(request.param("nodeId", "_all"));
metrics = Strings.splitStringByCommaToSet(request.param("metrics", "_all"));
metrics = Strings.tokenizeByCommaToSet(request.param("metrics", "_all"));
}
final NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(nodeIds);

View File

@ -92,7 +92,7 @@ public class RestNodesStatsAction extends BaseRestHandler {
@Override
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodesIds);
nodesStatsRequest.timeout(request.param("timeout"));
@ -134,7 +134,7 @@ public class RestNodesStatsAction extends BaseRestHandler {
// check for index specific metrics
if (metrics.contains("indices")) {
Set<String> indexMetrics = Strings.splitStringByCommaToSet(request.param("index_metric", "_all"));
Set<String> indexMetrics = Strings.tokenizeByCommaToSet(request.param("index_metric", "_all"));
if (indexMetrics.size() == 1 && indexMetrics.contains("_all")) {
nodesStatsRequest.indices(CommonStatsFlags.ALL);
} else {

View File

@ -56,7 +56,7 @@ public class RestNodesUsageAction extends BaseRestHandler {
@Override
protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
NodesUsageRequest nodesUsageRequest = new NodesUsageRequest(nodesIds);
nodesUsageRequest.timeout(request.param("timeout"));

View File

@ -91,7 +91,7 @@ public class RestIndicesStatsAction extends BaseRestHandler {
indicesStatsRequest.indices(Strings.splitStringByCommaToArray(request.param("index")));
indicesStatsRequest.types(Strings.splitStringByCommaToArray(request.param("types")));
Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
// short cut, if no metrics have been specified in URI
if (metrics.size() == 1 && metrics.contains("_all")) {
indicesStatsRequest.all();

View File

@ -90,30 +90,15 @@ public class StringsTests extends ESTestCase {
}
public void testSplitStringToSet() {
assertEquals(Strings.splitStringByCommaToSet(null), Sets.newHashSet());
assertEquals(Strings.splitStringByCommaToSet(""), Sets.newHashSet());
assertEquals(Strings.splitStringByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringByCommaToSet(" a , b, c "), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.splitStringByCommaToSet(" aa "), Sets.newHashSet("aa"));
assertEquals(Strings.splitStringByCommaToSet(" "), Sets.newHashSet());
assertEquals(Strings.splitStringToSet(null, ' '), Sets.newHashSet());
assertEquals(Strings.splitStringToSet("", ' '), Sets.newHashSet());
assertEquals(Strings.splitStringToSet("a b c", ' '), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringToSet("a, b, c", ' '), Sets.newHashSet("a,","b,","c"));
assertEquals(Strings.splitStringToSet(" a b c ", ' '), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringToSet(" a b c ", ' '), Sets.newHashSet("a","b","c"));
assertEquals(Strings.splitStringToSet("aa bb cc", ' '), Sets.newHashSet("aa","bb","cc"));
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet("a ", ' '), Sets.newHashSet("a"));
assertEquals(Strings.splitStringToSet(" aa ", ' '), Sets.newHashSet("aa"));
assertEquals(Strings.splitStringToSet("aa ", ' '), Sets.newHashSet("aa"));
assertEquals(Strings.splitStringToSet(" ", ' '), Sets.newHashSet());
assertEquals(Strings.tokenizeByCommaToSet(null), Sets.newHashSet());
assertEquals(Strings.tokenizeByCommaToSet(""), Sets.newHashSet());
assertEquals(Strings.tokenizeByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.tokenizeByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
assertEquals(Strings.tokenizeByCommaToSet(" a , b, c "), Sets.newHashSet("a","b","c"));
assertEquals(Strings.tokenizeByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
assertEquals(Strings.tokenizeByCommaToSet(" aa "), Sets.newHashSet("aa"));
assertEquals(Strings.tokenizeByCommaToSet(" "), Sets.newHashSet());
}
}