Updates to the large string reg-ex check (#2814)

* Updates to the large string reg-ex check Removed the null-case for IndexSettings since this only occurs in tests. The tests now use a dummy Index Setting. This change also fixes a bug with the base case handling of max regex length in the check. Signed-off-by: Kartik Ganesh <gkart@amazon.com>
2025-02-28 07:59:10 +00:00 · 2022-04-07 18:12:33 -07:00 · 2022-04-07 18:12:33 -07:00 · 2d89bc7c61
commit 2d89bc7c61
parent 47a22bb08d
2 changed files with 40 additions and 35 deletions
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java
@ -48,7 +48,6 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.RegExp;
 import org.opensearch.OpenSearchParseException;
-import org.opensearch.common.Nullable;
 import org.opensearch.common.ParseField;
 import org.opensearch.common.io.stream.StreamInput;
 import org.opensearch.common.io.stream.StreamOutput;
@ -577,23 +576,10 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
        return incNumPartitions > 0;
    }

-    private Automaton toAutomaton(@Nullable IndexSettings indexSettings) {
-        int maxRegexLength = indexSettings == null ? -1 : indexSettings.getMaxRegexLength();
+    private Automaton toAutomaton(IndexSettings indexSettings) {
        Automaton a;
        if (include != null) {
-            if (include.length() > maxRegexLength) {
-                throw new IllegalArgumentException(
-                    "The length of regex ["
-                        + include.length()
-                        + "] used in the request has exceeded "
-                        + "the allowed maximum of ["
-                        + maxRegexLength
-                        + "]. "
-                        + "This maximum can be set by changing the ["
-                        + IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey()
-                        + "] index level setting."
-                );
-            }
+            validateRegExpStringLength(include, indexSettings);
            a = new RegExp(include).toAutomaton();
        } else if (includeValues != null) {
            a = Automata.makeStringUnion(includeValues);
@ -601,19 +587,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
            a = Automata.makeAnyString();
        }
        if (exclude != null) {
-            if (exclude.length() > maxRegexLength) {
-                throw new IllegalArgumentException(
-                    "The length of regex ["
-                        + exclude.length()
-                        + "] used in the request has exceeded "
-                        + "the allowed maximum of ["
-                        + maxRegexLength
-                        + "]. "
-                        + "This maximum can be set by changing the ["
-                        + IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey()
-                        + "] index level setting."
-                );
-            }
+            validateRegExpStringLength(exclude, indexSettings);
            Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
            a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
        } else if (excludeValues != null) {
@ -622,6 +596,23 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
        return a;
    }

+    private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
+        int maxRegexLength = indexSettings.getMaxRegexLength();
+        if (maxRegexLength > 0 && source.length() > maxRegexLength) {
+            throw new IllegalArgumentException(
+                "The length of regex ["
+                    + source.length()
+                    + "] used in the request has exceeded "
+                    + "the allowed maximum of ["
+                    + maxRegexLength
+                    + "]. "
+                    + "This maximum can be set by changing the ["
+                    + IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey()
+                    + "] index level setting."
+            );
+        }
+    }
+
    public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
        if (isRegexBased()) {
            return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
--- a/server/src/test/java/org/opensearch/search/aggregations/support/IncludeExcludeTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/support/IncludeExcludeTests.java
@ -36,12 +36,16 @@ import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongBitSet;
+import org.opensearch.Version;
+import org.opensearch.cluster.metadata.IndexMetadata;
 import org.opensearch.common.ParseField;
+import org.opensearch.common.settings.Settings;
 import org.opensearch.common.xcontent.ToXContent;
 import org.opensearch.common.xcontent.XContentBuilder;
 import org.opensearch.common.xcontent.XContentFactory;
 import org.opensearch.common.xcontent.XContentParser;
 import org.opensearch.common.xcontent.XContentType;
+import org.opensearch.index.IndexSettings;
 import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
@ -53,14 +57,24 @@ import java.util.Collections;
 import java.util.TreeSet;

 public class IncludeExcludeTests extends OpenSearchTestCase {
+
+    private final IndexSettings dummyIndexSettings = new IndexSettings(
+        IndexMetadata.builder("index")
+            .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
+            .numberOfShards(1)
+            .numberOfReplicas(0)
+            .build(),
+        Settings.EMPTY
+    );
+
    public void testEmptyTermsWithOrds() throws IOException {
        IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
-        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
+        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
        LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
        assertEquals(0, acceptedOrds.length());

        inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
        acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
        assertEquals(0, acceptedOrds.length());
    }
@ -99,13 +113,13 @@ public class IncludeExcludeTests extends OpenSearchTestCase {

        };
        IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
-        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
+        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
        LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertTrue(acceptedOrds.get(0));

        inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
        acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertFalse(acceptedOrds.get(0));
@ -114,7 +128,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
            new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
            new TreeSet<>(Collections.singleton(new BytesRef("foo")))
        );
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
        acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertFalse(acceptedOrds.get(0));
@ -123,7 +137,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
            null, // means everything included
            new TreeSet<>(Collections.singleton(new BytesRef("foo")))
        );
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
        acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertFalse(acceptedOrds.get(0));