Cleanup split strings by comma method

We have some methods Strings#splitStringByCommaToArray and Strings#splitStringByCommaToSet. It is not obvious that the former leaves whitespace and the latter trims it. We also have Strings#tokenizeToStringArray which tokenizes a string to an array, and trims whitespace. It seems the right thing to do here is to rename Strings#splitStringByCommaToSet to Strings#tokenizeByCommaToSet so that its name is aligned with another method that tokenizes by a delimiter and trims whitespace. We also cleanup the code here, removing an unneeded splitting by delimiter to set method. Relates #27715
2017-12-08 12:17:12 -05:00 · 2017-12-08 12:17:12 -05:00 · 5c9415a4d3
parent 8b49b3f8af
commit 5c9415a4d3
7 changed files with 52 additions and 114 deletions
--- a/core/src/main/java/org/elasticsearch/common/Strings.java
+++ b/core/src/main/java/org/elasticsearch/common/Strings.java
@ -41,6 +41,7 @@ import java.util.List;
 import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.TreeSet;
 import java.util.function.Supplier;
 import static java.util.Collections.unmodifiableSet;
 import static org.elasticsearch.common.util.set.Sets.newHashSet;
@ -410,62 +411,27 @@ public class Strings {
        return collection.toArray(new String[collection.size()]);
    }
-    public static Set<String> splitStringByCommaToSet(final String s) {
+    /**
-        return splitStringToSet(s, ',');
+     * Tokenize the specified string by commas to a set, trimming whitespace and ignoring empty tokens.
-    }
+     *
-
+     * @param s the string to tokenize
-    public static String[] splitStringByCommaToArray(final String s) {
+     * @return the set of tokens
-        if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
+     */
-        else return s.split(",");
+    public static Set<String> tokenizeByCommaToSet(final String s) {
        if (s == null) return Collections.emptySet();
        return tokenizeToCollection(s, ",", HashSet::new);
    }
    /**
-     * A convenience method for splitting a delimited string into
+     * Split the specified string by commas to an array.
     * a set and trimming leading and trailing whitespace from all
     * split strings.
     *
     * @param s the string to split
-     * @param c the delimiter to split on
+     * @return the array of split values
-     * @return the set of split strings
+     * @see String#split(String)
     */
-    public static Set<String> splitStringToSet(final String s, final char c) {
+    public static String[] splitStringByCommaToArray(final String s) {
-        if (s == null || s.isEmpty()) {
+        if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
-            return Collections.emptySet();
+        else return s.split(",");
        }
        final char[] chars = s.toCharArray();
        int count = 1;
        for (final char x : chars) {
            if (x == c) {
                count++;
            }
        }
        final Set<String> result = new HashSet<>(count);
        final int len = chars.length;
        int start = 0;  // starting index in chars of the current substring.
        int pos = 0;    // current index in chars.
        int end = 0; // the position of the end of the current token
        for (; pos < len; pos++) {
            if (chars[pos] == c) {
                int size = end - start;
                if (size > 0) { // only add non empty strings
                    result.add(new String(chars, start, size));
                }
                start = pos + 1;
                end = start;
            } else if (Character.isWhitespace(chars[pos])) {
                if (start == pos) {
                    // skip over preceding whitespace
                    start++;
                }
            } else {
                end = pos + 1;
            }
        }
        int size = end - start;
        if (size > 0) {
            result.add(new String(chars, start, size));
        }
        return result;
    }
    /**
@ -499,7 +465,7 @@ public class Strings {
     * tokens. A delimiter is always a single character; for multi-character
     * delimiters, consider using <code>delimitedListToStringArray</code>
     *
-     * @param str        the String to tokenize
+     * @param s        the String to tokenize
     * @param delimiters the delimiter characters, assembled as String
     *                   (each of those characters is individually considered as delimiter).
     * @return an array of the tokens
@ -507,48 +473,35 @@ public class Strings {
     * @see java.lang.String#trim()
     * @see #delimitedListToStringArray
     */
-    public static String[] tokenizeToStringArray(String str, String delimiters) {
+    public static String[] tokenizeToStringArray(final String s, final String delimiters) {
-        return tokenizeToStringArray(str, delimiters, true, true);
+        return toStringArray(tokenizeToCollection(s, delimiters, ArrayList::new));
    }
    /**
-     * Tokenize the given String into a String array via a StringTokenizer.
+     * Tokenizes the specified string to a collection using the specified delimiters as the token delimiters. This method trims whitespace
-     * <p>The given delimiters string is supposed to consist of any number of
+     * from tokens and ignores empty tokens.
     * delimiter characters. Each of those characters can be used to separate
     * tokens. A delimiter is always a single character; for multi-character
     * delimiters, consider using <code>delimitedListToStringArray</code>
     *
-     * @param str               the String to tokenize
+     * @param s          the string to tokenize.
-     * @param delimiters        the delimiter characters, assembled as String
+     * @param delimiters the token delimiters
-     *                          (each of those characters is individually considered as delimiter)
+     * @param supplier   a collection supplier
-     * @param trimTokens        trim the tokens via String's <code>trim</code>
+     * @param <T>        the type of the collection
-     * @param ignoreEmptyTokens omit empty tokens from the result array
+     * @return the tokens
     *                          (only applies to tokens that are empty after trimming; StringTokenizer
     *                          will not consider subsequent delimiters as token in the first place).
     * @return an array of the tokens (<code>null</code> if the input String
     *         was <code>null</code>)
     * @see java.util.StringTokenizer
     * @see java.lang.String#trim()
     * @see #delimitedListToStringArray
     */
-    public static String[] tokenizeToStringArray(
+    private static <T extends Collection<String>> T tokenizeToCollection(
-            String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) {
+            final String s, final String delimiters, final Supplier<T> supplier) {
-
+        if (s == null) {
        if (str == null) {
            return null;
        }
-        StringTokenizer st = new StringTokenizer(str, delimiters);
+        final StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
-        List<String> tokens = new ArrayList<>();
+        final T tokens = supplier.get();
-        while (st.hasMoreTokens()) {
+        while (tokenizer.hasMoreTokens()) {
-            String token = st.nextToken();
+            final String token = tokenizer.nextToken().trim();
-            if (trimTokens) {
+            if (token.length() > 0) {
                token = token.trim();
            }
            if (!ignoreEmptyTokens || token.length() > 0) {
                tokens.add(token);
            }
        }
-        return toStringArray(tokens);
+        return tokens;
    }
    /**
--- a/core/src/main/java/org/elasticsearch/rest/AbstractRestChannel.java
+++ b/core/src/main/java/org/elasticsearch/rest/AbstractRestChannel.java
@ -94,7 +94,7 @@ public abstract class AbstractRestChannel implements RestChannel {
        Set<String> includes = Collections.emptySet();
        Set<String> excludes = Collections.emptySet();
        if (useFiltering) {
-            Set<String> filters = Strings.splitStringByCommaToSet(filterPath);
+            Set<String> filters = Strings.tokenizeByCommaToSet(filterPath);
            includes = filters.stream().filter(INCLUDE_FILTER).collect(toSet());
            excludes = filters.stream().filter(EXCLUDE_FILTER).map(f -> f.substring(1)).collect(toSet());
        }
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesInfoAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesInfoAction.java
@ -76,7 +76,7 @@ public class RestNodesInfoAction extends BaseRestHandler {
        // still, /_nodes/_local (or any other node id) should work and be treated as usual
        // this means one must differentiate between allowed metrics and arbitrary node ids in the same place
        if (request.hasParam("nodeId") && !request.hasParam("metrics")) {
-            Set<String> metricsOrNodeIds = Strings.splitStringByCommaToSet(request.param("nodeId", "_all"));
+            Set<String> metricsOrNodeIds = Strings.tokenizeByCommaToSet(request.param("nodeId", "_all"));
            boolean isMetricsOnly = ALLOWED_METRICS.containsAll(metricsOrNodeIds);
            if (isMetricsOnly) {
                nodeIds = new String[]{"_all"};
@ -87,7 +87,7 @@ public class RestNodesInfoAction extends BaseRestHandler {
            }
        } else {
            nodeIds = Strings.splitStringByCommaToArray(request.param("nodeId", "_all"));
-            metrics = Strings.splitStringByCommaToSet(request.param("metrics", "_all"));
+            metrics = Strings.tokenizeByCommaToSet(request.param("metrics", "_all"));
        }
        final NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(nodeIds);
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java
@ -92,7 +92,7 @@ public class RestNodesStatsAction extends BaseRestHandler {
    @Override
    public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
        String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
        NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodesIds);
        nodesStatsRequest.timeout(request.param("timeout"));
@ -134,7 +134,7 @@ public class RestNodesStatsAction extends BaseRestHandler {
            // check for index specific metrics
            if (metrics.contains("indices")) {
-                Set<String> indexMetrics = Strings.splitStringByCommaToSet(request.param("index_metric", "_all"));
+                Set<String> indexMetrics = Strings.tokenizeByCommaToSet(request.param("index_metric", "_all"));
                if (indexMetrics.size() == 1 && indexMetrics.contains("_all")) {
                    nodesStatsRequest.indices(CommonStatsFlags.ALL);
                } else {
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesUsageAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesUsageAction.java
@ -56,7 +56,7 @@ public class RestNodesUsageAction extends BaseRestHandler {
    @Override
    protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
        String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
        NodesUsageRequest nodesUsageRequest = new NodesUsageRequest(nodesIds);
        nodesUsageRequest.timeout(request.param("timeout"));
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestIndicesStatsAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestIndicesStatsAction.java
@ -91,7 +91,7 @@ public class RestIndicesStatsAction extends BaseRestHandler {
        indicesStatsRequest.indices(Strings.splitStringByCommaToArray(request.param("index")));
        indicesStatsRequest.types(Strings.splitStringByCommaToArray(request.param("types")));
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
        // short cut, if no metrics have been specified in URI
        if (metrics.size() == 1 && metrics.contains("_all")) {
            indicesStatsRequest.all();
--- a/core/src/test/java/org/elasticsearch/common/StringsTests.java
+++ b/core/src/test/java/org/elasticsearch/common/StringsTests.java
@ -90,30 +90,15 @@ public class StringsTests extends ESTestCase {
    }
    public void testSplitStringToSet() {
-        assertEquals(Strings.splitStringByCommaToSet(null), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet(null), Sets.newHashSet());
-        assertEquals(Strings.splitStringByCommaToSet(""), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet(""), Sets.newHashSet());
-        assertEquals(Strings.splitStringByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet(" a ,  b, c  "), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet(" a ,  b, c  "), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
+        assertEquals(Strings.tokenizeByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
-        assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
+        assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringByCommaToSet("   a   "), Sets.newHashSet("a"));
+        assertEquals(Strings.tokenizeByCommaToSet("   a   "), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringByCommaToSet("   aa   "), Sets.newHashSet("aa"));
+        assertEquals(Strings.tokenizeByCommaToSet("   aa   "), Sets.newHashSet("aa"));
-        assertEquals(Strings.splitStringByCommaToSet("   "), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet("   "), Sets.newHashSet());
        assertEquals(Strings.splitStringToSet(null, ' '), Sets.newHashSet());
        assertEquals(Strings.splitStringToSet("", ' '), Sets.newHashSet());
        assertEquals(Strings.splitStringToSet("a b c", ' '), Sets.newHashSet("a","b","c"));
        assertEquals(Strings.splitStringToSet("a, b, c", ' '), Sets.newHashSet("a,","b,","c"));
        assertEquals(Strings.splitStringToSet(" a   b c  ", ' '), Sets.newHashSet("a","b","c"));
        assertEquals(Strings.splitStringToSet("  a   b   c  ", ' '), Sets.newHashSet("a","b","c"));
        assertEquals(Strings.splitStringToSet("aa bb cc", ' '), Sets.newHashSet("aa","bb","cc"));
        assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
        assertEquals(Strings.splitStringToSet("    a    ", ' '), Sets.newHashSet("a"));
        assertEquals(Strings.splitStringToSet(" a   ", ' '), Sets.newHashSet("a"));
        assertEquals(Strings.splitStringToSet("a   ", ' '), Sets.newHashSet("a"));
        assertEquals(Strings.splitStringToSet("   aa   ", ' '), Sets.newHashSet("aa"));
        assertEquals(Strings.splitStringToSet("aa   ", ' '), Sets.newHashSet("aa"));
        assertEquals(Strings.splitStringToSet("   ", ' '), Sets.newHashSet());
    }
 }