Decouple IndexSettings from IncludeExclude (#2860) (#2861)

This change refactors an earlier change to impose a reg-ex size limit on the include/exclude string. Instead of accepting an IndexSettings instance, the class now accepts a integer limit value. This is necessary because the IncludeExclude class is used outside the core codebase, whose use-cases may be unaware of indices and their settings. To ensure that a limit is always imposed, a default limit is defined in the class.

(cherry picked from commit ba1966853e728b153e42be59ba449420e79b09ee)
Signed-off-by: Kartik Ganesh <gkart@amazon.com>

Co-authored-by: Kartik Ganesh <gkart@amazon.com>
This commit is contained in:
opensearch-trigger-bot[bot] 2022-04-13 14:13:18 -04:00 committed by GitHub
parent 135177e28e
commit 3af4300c3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 50 additions and 44 deletions

View File

@ -79,6 +79,14 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
// can disagree on which terms hash to the required partition.
private static final int HASH_PARTITIONING_SEED = 31;
/**
* The default length limit for a reg-ex string. The value is derived from {@link IndexSettings#MAX_REGEX_LENGTH_SETTING}.
* For context, see:
* https://github.com/opensearch-project/OpenSearch/issues/1992
* https://github.com/opensearch-project/OpenSearch/issues/2858
*/
private static final int DEFAULT_MAX_REGEX_LENGTH = 1000;
// for parsing purposes only
// TODO: move all aggs to the same package so that this stuff could be pkg-private
public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclude) {
@ -576,10 +584,10 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
return incNumPartitions > 0;
}
private Automaton toAutomaton(IndexSettings indexSettings) {
private Automaton toAutomaton(int maxRegExLength) {
Automaton a;
if (include != null) {
validateRegExpStringLength(include, indexSettings);
validateRegExpStringLength(include, maxRegExLength);
a = new RegExp(include).toAutomaton();
} else if (includeValues != null) {
a = Automata.makeStringUnion(includeValues);
@ -587,7 +595,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
a = Automata.makeAnyString();
}
if (exclude != null) {
validateRegExpStringLength(exclude, indexSettings);
validateRegExpStringLength(exclude, maxRegExLength);
Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
} else if (excludeValues != null) {
@ -596,8 +604,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
return a;
}
private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
int maxRegexLength = indexSettings.getMaxRegexLength();
private static void validateRegExpStringLength(String source, int maxRegexLength) {
if (maxRegexLength > 0 && source.length() > maxRegexLength) {
throw new IllegalArgumentException(
"The length of regex ["
@ -613,9 +620,17 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
}
}
public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
/**
* Wrapper method that imposes a default regex limit.
* See https://github.com/opensearch-project/OpenSearch/issues/2858
*/
public StringFilter convertToStringFilter(DocValueFormat format) {
return convertToStringFilter(format, DEFAULT_MAX_REGEX_LENGTH);
}
public StringFilter convertToStringFilter(DocValueFormat format, int maxRegexLength) {
if (isRegexBased()) {
return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
return new AutomatonBackedStringFilter(toAutomaton(maxRegexLength));
}
if (isPartitionBased()) {
return new PartitionedStringFilter();
@ -636,10 +651,18 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
return result;
}
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, IndexSettings indexSettings) {
/**
* Wrapper method that imposes a default regex limit.
* See https://github.com/opensearch-project/OpenSearch/issues/2858
*/
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
return convertToOrdinalsFilter(format, DEFAULT_MAX_REGEX_LENGTH);
}
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, int maxRegexLength) {
if (isRegexBased()) {
return new AutomatonBackedOrdinalsFilter(toAutomaton(indexSettings));
return new AutomatonBackedOrdinalsFilter(toAutomaton(maxRegexLength));
}
if (isPartitionBased()) {
return new PartitionedOrdinalsFilter();

View File

@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;
import org.opensearch.common.ParseField;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.Aggregator;
@ -251,10 +250,10 @@ public class RareTermsAggregatorFactory extends ValuesSourceAggregatorFactory {
double precision,
CardinalityUpperBound cardinality
) throws IOException {
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.StringFilter filter = includeExclude == null
? null
: includeExclude.convertToStringFilter(format, indexSettings);
: includeExclude.convertToStringFilter(format, maxRegexLength);
return new StringRareTermsAggregator(
name,
factories,

View File

@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;
import org.opensearch.common.ParseField;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
@ -326,10 +325,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.StringFilter filter = includeExclude == null
? null
: includeExclude.convertToStringFilter(format, indexSettings);
: includeExclude.convertToStringFilter(format, maxRegexLength);
return new MapStringTermsAggregator(
name,
factories,
@ -367,10 +366,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
? null
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
boolean remapGlobalOrd = true;
if (cardinality == CardinalityUpperBound.ONE && factories == AggregatorFactories.EMPTY && includeExclude == null) {
/*

View File

@ -44,7 +44,6 @@ import org.opensearch.common.lease.Releasables;
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.BytesRefHash;
import org.opensearch.common.util.ObjectArray;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;
@ -138,10 +137,10 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
// TODO - need to check with mapping that this is indeed a text field....
IndexSettings indexSettings = searchContext.getQueryShardContext().getIndexSettings();
int maxRegexLength = searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
IncludeExclude.StringFilter incExcFilter = includeExclude == null
? null
: includeExclude.convertToStringFilter(DocValueFormat.RAW, indexSettings);
: includeExclude.convertToStringFilter(DocValueFormat.RAW, maxRegexLength);
MapStringTermsAggregator.CollectorSource collectorSource = new SignificantTextCollectorSource(
queryShardContext.lookup().source(),

View File

@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;
import org.apache.lucene.search.IndexSearcher;
import org.opensearch.common.ParseField;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.AggregationExecutionException;
@ -381,10 +380,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.StringFilter filter = includeExclude == null
? null
: includeExclude.convertToStringFilter(format, indexSettings);
: includeExclude.convertToStringFilter(format, maxRegexLength);
return new MapStringTermsAggregator(
name,
factories,
@ -462,10 +461,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
);
}
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
? null
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
boolean remapGlobalOrds;
if (cardinality == CardinalityUpperBound.ONE && REMAP_GLOBAL_ORDS != null) {
/*

View File

@ -36,16 +36,12 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.ParseField;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.ToXContent;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
@ -58,23 +54,14 @@ import java.util.TreeSet;
public class IncludeExcludeTests extends OpenSearchTestCase {
private final IndexSettings dummyIndexSettings = new IndexSettings(
IndexMetadata.builder("index")
.settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
.numberOfShards(1)
.numberOfReplicas(0)
.build(),
Settings.EMPTY
);
public void testEmptyTermsWithOrds() throws IOException {
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());
inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());
}
@ -113,13 +100,13 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
};
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertTrue(acceptedOrds.get(0));
inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
@ -128,7 +115,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
@ -137,7 +124,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
null, // means everything included
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));