Updates to the large string reg-ex check (#2814)

* Updates to the large string reg-ex check

Removed the null-case for IndexSettings since this only occurs in tests. The tests now use a dummy Index Setting.
This change also fixes a bug with the base case handling of max regex length in the check.

Signed-off-by: Kartik Ganesh <gkart@amazon.com>
This commit is contained in:
Kartik 2022-04-07 18:12:33 -07:00 committed by GitHub
parent 47a22bb08d
commit 2d89bc7c61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 35 deletions

View File

@ -48,7 +48,6 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.opensearch.OpenSearchParseException;
import org.opensearch.common.Nullable;
import org.opensearch.common.ParseField;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.io.stream.StreamOutput;
@ -577,23 +576,10 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
return incNumPartitions > 0;
}
private Automaton toAutomaton(@Nullable IndexSettings indexSettings) {
int maxRegexLength = indexSettings == null ? -1 : indexSettings.getMaxRegexLength();
private Automaton toAutomaton(IndexSettings indexSettings) {
Automaton a;
if (include != null) {
if (include.length() > maxRegexLength) {
throw new IllegalArgumentException(
"The length of regex ["
+ include.length()
+ "] used in the request has exceeded "
+ "the allowed maximum of ["
+ maxRegexLength
+ "]. "
+ "This maximum can be set by changing the ["
+ IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey()
+ "] index level setting."
);
}
validateRegExpStringLength(include, indexSettings);
a = new RegExp(include).toAutomaton();
} else if (includeValues != null) {
a = Automata.makeStringUnion(includeValues);
@ -601,19 +587,7 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
a = Automata.makeAnyString();
}
if (exclude != null) {
if (exclude.length() > maxRegexLength) {
throw new IllegalArgumentException(
"The length of regex ["
+ exclude.length()
+ "] used in the request has exceeded "
+ "the allowed maximum of ["
+ maxRegexLength
+ "]. "
+ "This maximum can be set by changing the ["
+ IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey()
+ "] index level setting."
);
}
validateRegExpStringLength(exclude, indexSettings);
Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
} else if (excludeValues != null) {
@ -622,6 +596,23 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
return a;
}
private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
int maxRegexLength = indexSettings.getMaxRegexLength();
if (maxRegexLength > 0 && source.length() > maxRegexLength) {
throw new IllegalArgumentException(
"The length of regex ["
+ source.length()
+ "] used in the request has exceeded "
+ "the allowed maximum of ["
+ maxRegexLength
+ "]. "
+ "This maximum can be set by changing the ["
+ IndexSettings.MAX_REGEX_LENGTH_SETTING.getKey()
+ "] index level setting."
);
}
}
public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
if (isRegexBased()) {
return new AutomatonBackedStringFilter(toAutomaton(indexSettings));

View File

@ -36,12 +36,16 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.ParseField;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.ToXContent;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
@ -53,14 +57,24 @@ import java.util.Collections;
import java.util.TreeSet;
public class IncludeExcludeTests extends OpenSearchTestCase {
private final IndexSettings dummyIndexSettings = new IndexSettings(
IndexMetadata.builder("index")
.settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
.numberOfShards(1)
.numberOfReplicas(0)
.build(),
Settings.EMPTY
);
public void testEmptyTermsWithOrds() throws IOException {
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());
inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());
}
@ -99,13 +113,13 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
};
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertTrue(acceptedOrds.get(0));
inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
@ -114,7 +128,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
@ -123,7 +137,7 @@ public class IncludeExcludeTests extends OpenSearchTestCase {
null, // means everything included
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));