Cleanup split strings by comma method

We have some methods Strings#splitStringByCommaToArray and Strings#splitStringByCommaToSet. It is not obvious that the former leaves whitespace and the latter trims it. We also have Strings#tokenizeToStringArray which tokenizes a string to an array, and trims whitespace. It seems the right thing to do here is to rename Strings#splitStringByCommaToSet to Strings#tokenizeByCommaToSet so that its name is aligned with another method that tokenizes by a delimiter and trims whitespace. We also cleanup the code here, removing an unneeded splitting by delimiter to set method. Relates #27715
2025-03-09 14:34:43 +00:00 · 2017-12-08 12:17:12 -05:00 · 2017-12-08 12:17:12 -05:00 · 5c9415a4d3
commit 5c9415a4d3
parent 8b49b3f8af
7 changed files with 52 additions and 114 deletions
--- a/core/src/main/java/org/elasticsearch/common/Strings.java
+++ b/core/src/main/java/org/elasticsearch/common/Strings.java
@ -41,6 +41,7 @@ import java.util.List;
 import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.TreeSet;
+import java.util.function.Supplier;

 import static java.util.Collections.unmodifiableSet;
 import static org.elasticsearch.common.util.set.Sets.newHashSet;
@ -410,62 +411,27 @@ public class Strings {
        return collection.toArray(new String[collection.size()]);
    }

-    public static Set<String> splitStringByCommaToSet(final String s) {
-        return splitStringToSet(s, ',');
-    }
-
-    public static String[] splitStringByCommaToArray(final String s) {
-        if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
-        else return s.split(",");
+    /**
+     * Tokenize the specified string by commas to a set, trimming whitespace and ignoring empty tokens.
+     *
+     * @param s the string to tokenize
+     * @return the set of tokens
+     */
+    public static Set<String> tokenizeByCommaToSet(final String s) {
+        if (s == null) return Collections.emptySet();
+        return tokenizeToCollection(s, ",", HashSet::new);
    }

    /**
-     * A convenience method for splitting a delimited string into
-     * a set and trimming leading and trailing whitespace from all
-     * split strings.
+     * Split the specified string by commas to an array.
     *
     * @param s the string to split
-     * @param c the delimiter to split on
-     * @return the set of split strings
+     * @return the array of split values
+     * @see String#split(String)
     */
-    public static Set<String> splitStringToSet(final String s, final char c) {
-        if (s == null || s.isEmpty()) {
-            return Collections.emptySet();
-        }
-        final char[] chars = s.toCharArray();
-        int count = 1;
-        for (final char x : chars) {
-            if (x == c) {
-                count++;
-            }
-        }
-        final Set<String> result = new HashSet<>(count);
-        final int len = chars.length;
-        int start = 0;  // starting index in chars of the current substring.
-        int pos = 0;    // current index in chars.
-        int end = 0; // the position of the end of the current token
-        for (; pos < len; pos++) {
-            if (chars[pos] == c) {
-                int size = end - start;
-                if (size > 0) { // only add non empty strings
-                    result.add(new String(chars, start, size));
-                }
-                start = pos + 1;
-                end = start;
-            } else if (Character.isWhitespace(chars[pos])) {
-                if (start == pos) {
-                    // skip over preceding whitespace
-                    start++;
-                }
-            } else {
-                end = pos + 1;
-            }
-        }
-        int size = end - start;
-        if (size > 0) {
-            result.add(new String(chars, start, size));
-        }
-        return result;
+    public static String[] splitStringByCommaToArray(final String s) {
+        if (s == null || s.isEmpty()) return Strings.EMPTY_ARRAY;
+        else return s.split(",");
    }

    /**
@ -499,7 +465,7 @@ public class Strings {
     * tokens. A delimiter is always a single character; for multi-character
     * delimiters, consider using <code>delimitedListToStringArray</code>
     *
-     * @param str        the String to tokenize
+     * @param s        the String to tokenize
     * @param delimiters the delimiter characters, assembled as String
     *                   (each of those characters is individually considered as delimiter).
     * @return an array of the tokens
@ -507,48 +473,35 @@ public class Strings {
     * @see java.lang.String#trim()
     * @see #delimitedListToStringArray
     */
-    public static String[] tokenizeToStringArray(String str, String delimiters) {
-        return tokenizeToStringArray(str, delimiters, true, true);
+    public static String[] tokenizeToStringArray(final String s, final String delimiters) {
+        return toStringArray(tokenizeToCollection(s, delimiters, ArrayList::new));
    }

    /**
-     * Tokenize the given String into a String array via a StringTokenizer.
-     * <p>The given delimiters string is supposed to consist of any number of
-     * delimiter characters. Each of those characters can be used to separate
-     * tokens. A delimiter is always a single character; for multi-character
-     * delimiters, consider using <code>delimitedListToStringArray</code>
+     * Tokenizes the specified string to a collection using the specified delimiters as the token delimiters. This method trims whitespace
+     * from tokens and ignores empty tokens.
     *
-     * @param str               the String to tokenize
-     * @param delimiters        the delimiter characters, assembled as String
-     *                          (each of those characters is individually considered as delimiter)
-     * @param trimTokens        trim the tokens via String's <code>trim</code>
-     * @param ignoreEmptyTokens omit empty tokens from the result array
-     *                          (only applies to tokens that are empty after trimming; StringTokenizer
-     *                          will not consider subsequent delimiters as token in the first place).
-     * @return an array of the tokens (<code>null</code> if the input String
-     *         was <code>null</code>)
+     * @param s          the string to tokenize.
+     * @param delimiters the token delimiters
+     * @param supplier   a collection supplier
+     * @param <T>        the type of the collection
+     * @return the tokens
     * @see java.util.StringTokenizer
-     * @see java.lang.String#trim()
-     * @see #delimitedListToStringArray
     */
-    public static String[] tokenizeToStringArray(
-            String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) {
-
-        if (str == null) {
+    private static <T extends Collection<String>> T tokenizeToCollection(
+            final String s, final String delimiters, final Supplier<T> supplier) {
+        if (s == null) {
            return null;
        }
-        StringTokenizer st = new StringTokenizer(str, delimiters);
-        List<String> tokens = new ArrayList<>();
-        while (st.hasMoreTokens()) {
-            String token = st.nextToken();
-            if (trimTokens) {
-                token = token.trim();
-            }
-            if (!ignoreEmptyTokens || token.length() > 0) {
+        final StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
+        final T tokens = supplier.get();
+        while (tokenizer.hasMoreTokens()) {
+            final String token = tokenizer.nextToken().trim();
+            if (token.length() > 0) {
                tokens.add(token);
            }
        }
-        return toStringArray(tokens);
+        return tokens;
    }

    /**
--- a/core/src/main/java/org/elasticsearch/rest/AbstractRestChannel.java
+++ b/core/src/main/java/org/elasticsearch/rest/AbstractRestChannel.java
@ -94,7 +94,7 @@ public abstract class AbstractRestChannel implements RestChannel {
        Set<String> includes = Collections.emptySet();
        Set<String> excludes = Collections.emptySet();
        if (useFiltering) {
-            Set<String> filters = Strings.splitStringByCommaToSet(filterPath);
+            Set<String> filters = Strings.tokenizeByCommaToSet(filterPath);
            includes = filters.stream().filter(INCLUDE_FILTER).collect(toSet());
            excludes = filters.stream().filter(EXCLUDE_FILTER).map(f -> f.substring(1)).collect(toSet());
        }
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesInfoAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesInfoAction.java
@ -76,7 +76,7 @@ public class RestNodesInfoAction extends BaseRestHandler {
        // still, /_nodes/_local (or any other node id) should work and be treated as usual
        // this means one must differentiate between allowed metrics and arbitrary node ids in the same place
        if (request.hasParam("nodeId") && !request.hasParam("metrics")) {
-            Set<String> metricsOrNodeIds = Strings.splitStringByCommaToSet(request.param("nodeId", "_all"));
+            Set<String> metricsOrNodeIds = Strings.tokenizeByCommaToSet(request.param("nodeId", "_all"));
            boolean isMetricsOnly = ALLOWED_METRICS.containsAll(metricsOrNodeIds);
            if (isMetricsOnly) {
                nodeIds = new String[]{"_all"};
@ -87,7 +87,7 @@ public class RestNodesInfoAction extends BaseRestHandler {
            }
        } else {
            nodeIds = Strings.splitStringByCommaToArray(request.param("nodeId", "_all"));
-            metrics = Strings.splitStringByCommaToSet(request.param("metrics", "_all"));
+            metrics = Strings.tokenizeByCommaToSet(request.param("metrics", "_all"));
        }

        final NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(nodeIds);
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java
@ -92,7 +92,7 @@ public class RestNodesStatsAction extends BaseRestHandler {
    @Override
    public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
        String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));

        NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodesIds);
        nodesStatsRequest.timeout(request.param("timeout"));
@ -134,7 +134,7 @@ public class RestNodesStatsAction extends BaseRestHandler {

            // check for index specific metrics
            if (metrics.contains("indices")) {
-                Set<String> indexMetrics = Strings.splitStringByCommaToSet(request.param("index_metric", "_all"));
+                Set<String> indexMetrics = Strings.tokenizeByCommaToSet(request.param("index_metric", "_all"));
                if (indexMetrics.size() == 1 && indexMetrics.contains("_all")) {
                    nodesStatsRequest.indices(CommonStatsFlags.ALL);
                } else {
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesUsageAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesUsageAction.java
@ -56,7 +56,7 @@ public class RestNodesUsageAction extends BaseRestHandler {
    @Override
    protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
        String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId"));
-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));

        NodesUsageRequest nodesUsageRequest = new NodesUsageRequest(nodesIds);
        nodesUsageRequest.timeout(request.param("timeout"));
--- a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestIndicesStatsAction.java
+++ b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestIndicesStatsAction.java
@ -91,7 +91,7 @@ public class RestIndicesStatsAction extends BaseRestHandler {
        indicesStatsRequest.indices(Strings.splitStringByCommaToArray(request.param("index")));
        indicesStatsRequest.types(Strings.splitStringByCommaToArray(request.param("types")));

-        Set<String> metrics = Strings.splitStringByCommaToSet(request.param("metric", "_all"));
+        Set<String> metrics = Strings.tokenizeByCommaToSet(request.param("metric", "_all"));
        // short cut, if no metrics have been specified in URI
        if (metrics.size() == 1 && metrics.contains("_all")) {
            indicesStatsRequest.all();
--- a/core/src/test/java/org/elasticsearch/common/StringsTests.java
+++ b/core/src/test/java/org/elasticsearch/common/StringsTests.java
@ -90,30 +90,15 @@ public class StringsTests extends ESTestCase {
    }

    public void testSplitStringToSet() {
-        assertEquals(Strings.splitStringByCommaToSet(null), Sets.newHashSet());
-        assertEquals(Strings.splitStringByCommaToSet(""), Sets.newHashSet());
-        assertEquals(Strings.splitStringByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet(" a ,  b, c  "), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
-        assertEquals(Strings.splitStringByCommaToSet(" a "), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringByCommaToSet("   a   "), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringByCommaToSet("   aa   "), Sets.newHashSet("aa"));
-        assertEquals(Strings.splitStringByCommaToSet("   "), Sets.newHashSet());
-
-        assertEquals(Strings.splitStringToSet(null, ' '), Sets.newHashSet());
-        assertEquals(Strings.splitStringToSet("", ' '), Sets.newHashSet());
-        assertEquals(Strings.splitStringToSet("a b c", ' '), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringToSet("a, b, c", ' '), Sets.newHashSet("a,","b,","c"));
-        assertEquals(Strings.splitStringToSet(" a   b c  ", ' '), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringToSet("  a   b   c  ", ' '), Sets.newHashSet("a","b","c"));
-        assertEquals(Strings.splitStringToSet("aa bb cc", ' '), Sets.newHashSet("aa","bb","cc"));
-        assertEquals(Strings.splitStringToSet(" a ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet("    a    ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet(" a   ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet("a   ", ' '), Sets.newHashSet("a"));
-        assertEquals(Strings.splitStringToSet("   aa   ", ' '), Sets.newHashSet("aa"));
-        assertEquals(Strings.splitStringToSet("aa   ", ' '), Sets.newHashSet("aa"));
-        assertEquals(Strings.splitStringToSet("   ", ' '), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet(null), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet(""), Sets.newHashSet());
+        assertEquals(Strings.tokenizeByCommaToSet("a,b,c"), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet("a, b, c"), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet(" a ,  b, c  "), Sets.newHashSet("a","b","c"));
+        assertEquals(Strings.tokenizeByCommaToSet("aa, bb, cc"), Sets.newHashSet("aa","bb","cc"));
+        assertEquals(Strings.tokenizeByCommaToSet(" a "), Sets.newHashSet("a"));
+        assertEquals(Strings.tokenizeByCommaToSet("   a   "), Sets.newHashSet("a"));
+        assertEquals(Strings.tokenizeByCommaToSet("   aa   "), Sets.newHashSet("aa"));
+        assertEquals(Strings.tokenizeByCommaToSet("   "), Sets.newHashSet());
    }
 }