Decouple IndexSettings from IncludeExclude (#2860) (#2861)

This change refactors an earlier change to impose a reg-ex size limit on the include/exclude string. Instead of accepting an IndexSettings instance, the class now accepts a integer limit value. This is necessary because the IncludeExclude class is used outside the core codebase, whose use-cases may be unaware of indices and their settings. To ensure that a limit is always imposed, a default limit is defined in the class. (cherry picked from commit ba1966853e728b153e42be59ba449420e79b09ee) Signed-off-by: Kartik Ganesh <gkart@amazon.com> Co-authored-by: Kartik Ganesh <gkart@amazon.com>
2022-04-13 14:13:18 -04:00 · 2022-04-13 14:13:18 -04:00 · 3af4300c3f
parent 135177e28e
commit 3af4300c3f
6 changed files with 50 additions and 44 deletions
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java
@ -79,6 +79,14 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
    // can disagree on which terms hash to the required partition.
    private static final int HASH_PARTITIONING_SEED = 31;

+    /**
+     * The default length limit for a reg-ex string. The value is derived from {@link IndexSettings#MAX_REGEX_LENGTH_SETTING}.
+     * For context, see:
+     * https://github.com/opensearch-project/OpenSearch/issues/1992
+     * https://github.com/opensearch-project/OpenSearch/issues/2858
+     */
+    private static final int DEFAULT_MAX_REGEX_LENGTH = 1000;
+
    // for parsing purposes only
    // TODO: move all aggs to the same package so that this stuff could be pkg-private
    public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclude) {
@ -576,10 +584,10 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
        return incNumPartitions > 0;
    }

-    private Automaton toAutomaton(IndexSettings indexSettings) {
+    private Automaton toAutomaton(int maxRegExLength) {
        Automaton a;
        if (include != null) {
-            validateRegExpStringLength(include, indexSettings);
+            validateRegExpStringLength(include, maxRegExLength);
            a = new RegExp(include).toAutomaton();
        } else if (includeValues != null) {
            a = Automata.makeStringUnion(includeValues);
@ -587,7 +595,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
            a = Automata.makeAnyString();
        }
        if (exclude != null) {
-            validateRegExpStringLength(exclude, indexSettings);
+            validateRegExpStringLength(exclude, maxRegExLength);
            Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
            a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
        } else if (excludeValues != null) {
@ -596,8 +604,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
        return a;
    }

-    private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
-        int maxRegexLength = indexSettings.getMaxRegexLength();
+    private static void validateRegExpStringLength(String source, int maxRegexLength) {
        if (maxRegexLength > 0 && source.length() > maxRegexLength) {
            throw new IllegalArgumentException(
                "The length of regex ["
@ -613,9 +620,17 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
        }
    }

-    public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
+    /**
+     * Wrapper method that imposes a default regex limit.
+     * See https://github.com/opensearch-project/OpenSearch/issues/2858
+     */
+    public StringFilter convertToStringFilter(DocValueFormat format) {
+        return convertToStringFilter(format, DEFAULT_MAX_REGEX_LENGTH);
+    }
+
+    public StringFilter convertToStringFilter(DocValueFormat format, int maxRegexLength) {
        if (isRegexBased()) {
-            return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
+            return new AutomatonBackedStringFilter(toAutomaton(maxRegexLength));
        }
        if (isPartitionBased()) {
            return new PartitionedStringFilter();
@ -636,10 +651,18 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
        return result;
    }

-    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, IndexSettings indexSettings) {
+    /**
+     * Wrapper method that imposes a default regex limit.
+     * See https://github.com/opensearch-project/OpenSearch/issues/2858
+     */
+    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
+        return convertToOrdinalsFilter(format, DEFAULT_MAX_REGEX_LENGTH);
+    }
+
+    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, int maxRegexLength) {

        if (isRegexBased()) {
-            return new AutomatonBackedOrdinalsFilter(toAutomaton(indexSettings));
+            return new AutomatonBackedOrdinalsFilter(toAutomaton(maxRegexLength));
        }
        if (isPartitionBased()) {
            return new PartitionedOrdinalsFilter();
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java
@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;

 import org.opensearch.common.ParseField;
 import org.opensearch.common.logging.DeprecationLogger;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.Aggregator;
@ -251,10 +250,10 @@ public class RareTermsAggregatorFactory extends ValuesSourceAggregatorFactory {
                double precision,
                CardinalityUpperBound cardinality
            ) throws IOException {
-                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                final IncludeExclude.StringFilter filter = includeExclude == null
                    ? null
-                    : includeExclude.convertToStringFilter(format, indexSettings);
+                    : includeExclude.convertToStringFilter(format, maxRegexLength);
                return new StringRareTermsAggregator(
                    name,
                    factories,
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java
@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;

 import org.opensearch.common.ParseField;
 import org.opensearch.common.logging.DeprecationLogger;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryBuilder;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
@ -326,10 +325,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
                CardinalityUpperBound cardinality,
                Map<String, Object> metadata
            ) throws IOException {
-                IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                final IncludeExclude.StringFilter filter = includeExclude == null
                    ? null
-                    : includeExclude.convertToStringFilter(format, indexSettings);
+                    : includeExclude.convertToStringFilter(format, maxRegexLength);
                return new MapStringTermsAggregator(
                    name,
                    factories,
@ -367,10 +366,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
                CardinalityUpperBound cardinality,
                Map<String, Object> metadata
            ) throws IOException {
-                IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                final IncludeExclude.OrdinalsFilter filter = includeExclude == null
                    ? null
-                    : includeExclude.convertToOrdinalsFilter(format, indexSettings);
+                    : includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
                boolean remapGlobalOrd = true;
                if (cardinality == CardinalityUpperBound.ONE && factories == AggregatorFactories.EMPTY && includeExclude == null) {
                    /*
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java
@ -44,7 +44,6 @@ import org.opensearch.common.lease.Releasables;
 import org.opensearch.common.util.BigArrays;
 import org.opensearch.common.util.BytesRefHash;
 import org.opensearch.common.util.ObjectArray;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.mapper.MappedFieldType;
 import org.opensearch.index.query.QueryBuilder;
 import org.opensearch.index.query.QueryShardContext;
@ -138,10 +137,10 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {

        // TODO - need to check with mapping that this is indeed a text field....

-        IndexSettings indexSettings = searchContext.getQueryShardContext().getIndexSettings();
+        int maxRegexLength = searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
        IncludeExclude.StringFilter incExcFilter = includeExclude == null
            ? null
-            : includeExclude.convertToStringFilter(DocValueFormat.RAW, indexSettings);
+            : includeExclude.convertToStringFilter(DocValueFormat.RAW, maxRegexLength);

        MapStringTermsAggregator.CollectorSource collectorSource = new SignificantTextCollectorSource(
            queryShardContext.lookup().source(),
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;

 import org.apache.lucene.search.IndexSearcher;
 import org.opensearch.common.ParseField;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.AggregationExecutionException;
@ -381,10 +380,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
                CardinalityUpperBound cardinality,
                Map<String, Object> metadata
            ) throws IOException {
-                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                final IncludeExclude.StringFilter filter = includeExclude == null
                    ? null
-                    : includeExclude.convertToStringFilter(format, indexSettings);
+                    : includeExclude.convertToStringFilter(format, maxRegexLength);
                return new MapStringTermsAggregator(
                    name,
                    factories,
@ -462,10 +461,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
                    );

                }
-                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                final IncludeExclude.OrdinalsFilter filter = includeExclude == null
                    ? null
-                    : includeExclude.convertToOrdinalsFilter(format, indexSettings);
+                    : includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
                boolean remapGlobalOrds;
                if (cardinality == CardinalityUpperBound.ONE && REMAP_GLOBAL_ORDS != null) {
                    /*
--- a/server/src/test/java/org/opensearch/search/aggregations/support/IncludeExcludeTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/support/IncludeExcludeTests.java
@ -36,16 +36,12 @@ import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongBitSet;
-import org.opensearch.Version;
-import org.opensearch.cluster.metadata.IndexMetadata;
 import org.opensearch.common.ParseField;
-import org.opensearch.common.settings.Settings;
 import org.opensearch.common.xcontent.ToXContent;
 import org.opensearch.common.xcontent.XContentBuilder;
 import org.opensearch.common.xcontent.XContentFactory;
 import org.opensearch.common.xcontent.XContentParser;
 import org.opensearch.common.xcontent.XContentType;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
@ -58,23 +54,14 @@ import java.util.TreeSet;

 public class IncludeExcludeTests extends OpenSearchTestCase {

-    private final IndexSettings dummyIndexSettings = new IndexSettings(
-        IndexMetadata.builder("index")
-            .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
-            .numberOfShards(1)
-            .numberOfReplicas(0)
-            .build(),
-        Settings.EMPTY
-    );
-
    public void testEmptyTermsWithOrds() throws IOException {
        IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
-        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
        LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
        assertEquals(0, acceptedOrds.length());

        inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
        acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
        assertEquals(0, acceptedOrds.length());
    }
@ -113,13 +100,13 @@ public class IncludeExcludeTests extends OpenSearchTestCase {

        };
        IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
-        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
        LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertTrue(acceptedOrds.get(0));

        inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
        acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertFalse(acceptedOrds.get(0));
@ -128,7 +115,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
            new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
            new TreeSet<>(Collections.singleton(new BytesRef("foo")))
        );
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
        acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertFalse(acceptedOrds.get(0));
@ -137,7 +124,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
            null, // means everything included
            new TreeSet<>(Collections.singleton(new BytesRef("foo")))
        );
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
        acceptedOrds = filter.acceptedGlobalOrdinals(ords);
        assertEquals(1, acceptedOrds.length());
        assertFalse(acceptedOrds.get(0));