This change refactors an earlier change to impose a reg-ex size limit on the include/exclude string. Instead of accepting an IndexSettings instance, the class now accepts a integer limit value. This is necessary because the IncludeExclude class is used outside the core codebase, whose use-cases may be unaware of indices and their settings. To ensure that a limit is always imposed, a default limit is defined in the class. (cherry picked from commit ba1966853e728b153e42be59ba449420e79b09ee) Signed-off-by: Kartik Ganesh <gkart@amazon.com> Co-authored-by: Kartik Ganesh <gkart@amazon.com>
This commit is contained in:
parent
135177e28e
commit
3af4300c3f
|
@ -79,6 +79,14 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
|
|||
// can disagree on which terms hash to the required partition.
|
||||
private static final int HASH_PARTITIONING_SEED = 31;
|
||||
|
||||
/**
|
||||
* The default length limit for a reg-ex string. The value is derived from {@link IndexSettings#MAX_REGEX_LENGTH_SETTING}.
|
||||
* For context, see:
|
||||
* https://github.com/opensearch-project/OpenSearch/issues/1992
|
||||
* https://github.com/opensearch-project/OpenSearch/issues/2858
|
||||
*/
|
||||
private static final int DEFAULT_MAX_REGEX_LENGTH = 1000;
|
||||
|
||||
// for parsing purposes only
|
||||
// TODO: move all aggs to the same package so that this stuff could be pkg-private
|
||||
public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclude) {
|
||||
|
@ -576,10 +584,10 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
|
|||
return incNumPartitions > 0;
|
||||
}
|
||||
|
||||
private Automaton toAutomaton(IndexSettings indexSettings) {
|
||||
private Automaton toAutomaton(int maxRegExLength) {
|
||||
Automaton a;
|
||||
if (include != null) {
|
||||
validateRegExpStringLength(include, indexSettings);
|
||||
validateRegExpStringLength(include, maxRegExLength);
|
||||
a = new RegExp(include).toAutomaton();
|
||||
} else if (includeValues != null) {
|
||||
a = Automata.makeStringUnion(includeValues);
|
||||
|
@ -587,7 +595,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
|
|||
a = Automata.makeAnyString();
|
||||
}
|
||||
if (exclude != null) {
|
||||
validateRegExpStringLength(exclude, indexSettings);
|
||||
validateRegExpStringLength(exclude, maxRegExLength);
|
||||
Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
|
||||
a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
} else if (excludeValues != null) {
|
||||
|
@ -596,8 +604,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
|
|||
return a;
|
||||
}
|
||||
|
||||
private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
|
||||
int maxRegexLength = indexSettings.getMaxRegexLength();
|
||||
private static void validateRegExpStringLength(String source, int maxRegexLength) {
|
||||
if (maxRegexLength > 0 && source.length() > maxRegexLength) {
|
||||
throw new IllegalArgumentException(
|
||||
"The length of regex ["
|
||||
|
@ -613,9 +620,17 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
|
|||
}
|
||||
}
|
||||
|
||||
public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
|
||||
/**
|
||||
* Wrapper method that imposes a default regex limit.
|
||||
* See https://github.com/opensearch-project/OpenSearch/issues/2858
|
||||
*/
|
||||
public StringFilter convertToStringFilter(DocValueFormat format) {
|
||||
return convertToStringFilter(format, DEFAULT_MAX_REGEX_LENGTH);
|
||||
}
|
||||
|
||||
public StringFilter convertToStringFilter(DocValueFormat format, int maxRegexLength) {
|
||||
if (isRegexBased()) {
|
||||
return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
|
||||
return new AutomatonBackedStringFilter(toAutomaton(maxRegexLength));
|
||||
}
|
||||
if (isPartitionBased()) {
|
||||
return new PartitionedStringFilter();
|
||||
|
@ -636,10 +651,18 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
|
|||
return result;
|
||||
}
|
||||
|
||||
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, IndexSettings indexSettings) {
|
||||
/**
|
||||
* Wrapper method that imposes a default regex limit.
|
||||
* See https://github.com/opensearch-project/OpenSearch/issues/2858
|
||||
*/
|
||||
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
|
||||
return convertToOrdinalsFilter(format, DEFAULT_MAX_REGEX_LENGTH);
|
||||
}
|
||||
|
||||
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, int maxRegexLength) {
|
||||
|
||||
if (isRegexBased()) {
|
||||
return new AutomatonBackedOrdinalsFilter(toAutomaton(indexSettings));
|
||||
return new AutomatonBackedOrdinalsFilter(toAutomaton(maxRegexLength));
|
||||
}
|
||||
if (isPartitionBased()) {
|
||||
return new PartitionedOrdinalsFilter();
|
||||
|
|
|
@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.opensearch.common.ParseField;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.query.QueryShardContext;
|
||||
import org.opensearch.search.DocValueFormat;
|
||||
import org.opensearch.search.aggregations.Aggregator;
|
||||
|
@ -251,10 +250,10 @@ public class RareTermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
|||
double precision,
|
||||
CardinalityUpperBound cardinality
|
||||
) throws IOException {
|
||||
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
|
||||
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
|
||||
final IncludeExclude.StringFilter filter = includeExclude == null
|
||||
? null
|
||||
: includeExclude.convertToStringFilter(format, indexSettings);
|
||||
: includeExclude.convertToStringFilter(format, maxRegexLength);
|
||||
return new StringRareTermsAggregator(
|
||||
name,
|
||||
factories,
|
||||
|
|
|
@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.opensearch.common.ParseField;
|
||||
import org.opensearch.common.logging.DeprecationLogger;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.query.QueryBuilder;
|
||||
import org.opensearch.index.query.QueryShardContext;
|
||||
import org.opensearch.search.DocValueFormat;
|
||||
|
@ -326,10 +325,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
CardinalityUpperBound cardinality,
|
||||
Map<String, Object> metadata
|
||||
) throws IOException {
|
||||
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
|
||||
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
|
||||
final IncludeExclude.StringFilter filter = includeExclude == null
|
||||
? null
|
||||
: includeExclude.convertToStringFilter(format, indexSettings);
|
||||
: includeExclude.convertToStringFilter(format, maxRegexLength);
|
||||
return new MapStringTermsAggregator(
|
||||
name,
|
||||
factories,
|
||||
|
@ -367,10 +366,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
CardinalityUpperBound cardinality,
|
||||
Map<String, Object> metadata
|
||||
) throws IOException {
|
||||
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
|
||||
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
|
||||
? null
|
||||
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
|
||||
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
|
||||
boolean remapGlobalOrd = true;
|
||||
if (cardinality == CardinalityUpperBound.ONE && factories == AggregatorFactories.EMPTY && includeExclude == null) {
|
||||
/*
|
||||
|
|
|
@ -44,7 +44,6 @@ import org.opensearch.common.lease.Releasables;
|
|||
import org.opensearch.common.util.BigArrays;
|
||||
import org.opensearch.common.util.BytesRefHash;
|
||||
import org.opensearch.common.util.ObjectArray;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.mapper.MappedFieldType;
|
||||
import org.opensearch.index.query.QueryBuilder;
|
||||
import org.opensearch.index.query.QueryShardContext;
|
||||
|
@ -138,10 +137,10 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
|
|||
|
||||
// TODO - need to check with mapping that this is indeed a text field....
|
||||
|
||||
IndexSettings indexSettings = searchContext.getQueryShardContext().getIndexSettings();
|
||||
int maxRegexLength = searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
|
||||
IncludeExclude.StringFilter incExcFilter = includeExclude == null
|
||||
? null
|
||||
: includeExclude.convertToStringFilter(DocValueFormat.RAW, indexSettings);
|
||||
: includeExclude.convertToStringFilter(DocValueFormat.RAW, maxRegexLength);
|
||||
|
||||
MapStringTermsAggregator.CollectorSource collectorSource = new SignificantTextCollectorSource(
|
||||
queryShardContext.lookup().source(),
|
||||
|
|
|
@ -34,7 +34,6 @@ package org.opensearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.opensearch.common.ParseField;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.query.QueryShardContext;
|
||||
import org.opensearch.search.DocValueFormat;
|
||||
import org.opensearch.search.aggregations.AggregationExecutionException;
|
||||
|
@ -381,10 +380,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
|||
CardinalityUpperBound cardinality,
|
||||
Map<String, Object> metadata
|
||||
) throws IOException {
|
||||
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
|
||||
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
|
||||
final IncludeExclude.StringFilter filter = includeExclude == null
|
||||
? null
|
||||
: includeExclude.convertToStringFilter(format, indexSettings);
|
||||
: includeExclude.convertToStringFilter(format, maxRegexLength);
|
||||
return new MapStringTermsAggregator(
|
||||
name,
|
||||
factories,
|
||||
|
@ -462,10 +461,10 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
|
|||
);
|
||||
|
||||
}
|
||||
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
|
||||
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
|
||||
? null
|
||||
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
|
||||
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
|
||||
boolean remapGlobalOrds;
|
||||
if (cardinality == CardinalityUpperBound.ONE && REMAP_GLOBAL_ORDS != null) {
|
||||
/*
|
||||
|
|
|
@ -36,16 +36,12 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.cluster.metadata.IndexMetadata;
|
||||
import org.opensearch.common.ParseField;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.common.xcontent.ToXContent;
|
||||
import org.opensearch.common.xcontent.XContentBuilder;
|
||||
import org.opensearch.common.xcontent.XContentFactory;
|
||||
import org.opensearch.common.xcontent.XContentParser;
|
||||
import org.opensearch.common.xcontent.XContentType;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
|
||||
import org.opensearch.search.DocValueFormat;
|
||||
import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
|
||||
|
@ -58,23 +54,14 @@ import java.util.TreeSet;
|
|||
|
||||
public class IncludeExcludeTests extends OpenSearchTestCase {
|
||||
|
||||
private final IndexSettings dummyIndexSettings = new IndexSettings(
|
||||
IndexMetadata.builder("index")
|
||||
.settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
|
||||
.numberOfShards(1)
|
||||
.numberOfReplicas(0)
|
||||
.build(),
|
||||
Settings.EMPTY
|
||||
);
|
||||
|
||||
public void testEmptyTermsWithOrds() throws IOException {
|
||||
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
|
||||
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
|
||||
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
|
||||
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
|
||||
assertEquals(0, acceptedOrds.length());
|
||||
|
||||
inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
|
||||
acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
|
||||
assertEquals(0, acceptedOrds.length());
|
||||
}
|
||||
|
@ -113,13 +100,13 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
|
|||
|
||||
};
|
||||
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
|
||||
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
|
||||
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
|
||||
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
|
||||
assertEquals(1, acceptedOrds.length());
|
||||
assertTrue(acceptedOrds.get(0));
|
||||
|
||||
inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
|
||||
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
|
||||
assertEquals(1, acceptedOrds.length());
|
||||
assertFalse(acceptedOrds.get(0));
|
||||
|
@ -128,7 +115,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
|
|||
new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
|
||||
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
|
||||
);
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
|
||||
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
|
||||
assertEquals(1, acceptedOrds.length());
|
||||
assertFalse(acceptedOrds.get(0));
|
||||
|
@ -137,7 +124,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
|
|||
null, // means everything included
|
||||
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
|
||||
);
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
|
||||
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
|
||||
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
|
||||
assertEquals(1, acceptedOrds.length());
|
||||
assertFalse(acceptedOrds.get(0));
|
||||
|
|
Loading…
Reference in New Issue