From 3b8db91abe9d9dffec3b7265b571537aadc2402b Mon Sep 17 00:00:00 2001 From: Matt Weber Date: Wed, 26 Jan 2022 11:06:39 -0800 Subject: [PATCH] Add max_expansions option to wildcard interval (#1916) Add support for setting the max expansions on a wildcard interval. The default value is still 128 and the max value is bounded by `BooleanQuery.getMaxClauseCount()`. Signed-off-by: Matt Weber --- .../index/query/IntervalsSourceProvider.java | 36 +++++++++++++++---- .../query/IntervalQueryBuilderTests.java | 30 ++++++++++++++++ .../WildcardIntervalsSourceProviderTests.java | 11 ++++-- 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java index 166ac3608ca..1bc9b42d277 100644 --- a/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java @@ -40,6 +40,7 @@ import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.util.BytesRef; import org.opensearch.LegacyESVersion; +import org.opensearch.Version; import org.opensearch.common.ParseField; import org.opensearch.common.ParsingException; import org.opensearch.common.io.stream.NamedWriteable; @@ -637,17 +638,24 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont private final String pattern; private final String analyzer; private final String useField; + private final Integer maxExpansions; - public Wildcard(String pattern, String analyzer, String useField) { + public Wildcard(String pattern, String analyzer, String useField, Integer maxExpansions) { this.pattern = pattern; this.analyzer = analyzer; this.useField = useField; + this.maxExpansions = (maxExpansions != null && maxExpansions > 0) ? maxExpansions : null; } public Wildcard(StreamInput in) throws IOException { this.pattern = in.readString(); this.analyzer = in.readOptionalString(); this.useField = in.readOptionalString(); + if (in.getVersion().onOrAfter(Version.V_2_0_0)) { + this.maxExpansions = in.readOptionalVInt(); + } else { + this.maxExpansions = null; + } } @Override @@ -665,11 +673,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); } BytesRef normalizedTerm = analyzer.normalize(useField, pattern); - source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm)); + IntervalsSource wildcardSource = maxExpansions == null + ? Intervals.wildcard(normalizedTerm) + : Intervals.wildcard(normalizedTerm, maxExpansions); + source = Intervals.fixField(useField, wildcardSource); } else { checkPositions(fieldType); BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern); - source = Intervals.wildcard(normalizedTerm); + source = maxExpansions == null ? Intervals.wildcard(normalizedTerm) : Intervals.wildcard(normalizedTerm, maxExpansions); } return source; } @@ -694,12 +705,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont Wildcard wildcard = (Wildcard) o; return Objects.equals(pattern, wildcard.pattern) && Objects.equals(analyzer, wildcard.analyzer) - && Objects.equals(useField, wildcard.useField); + && Objects.equals(useField, wildcard.useField) + && Objects.equals(maxExpansions, wildcard.maxExpansions); } @Override public int hashCode() { - return Objects.hash(pattern, analyzer, useField); + return Objects.hash(pattern, analyzer, useField, maxExpansions); } @Override @@ -712,6 +724,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont out.writeString(pattern); out.writeOptionalString(analyzer); out.writeOptionalString(useField); + if (out.getVersion().onOrAfter(Version.V_2_0_0)) { + out.writeOptionalVInt(maxExpansions); + } } @Override @@ -724,6 +739,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont if (useField != null) { builder.field("use_field", useField); } + if (maxExpansions != null) { + builder.field("max_expansions", maxExpansions); + } builder.endObject(); return builder; } @@ -732,12 +750,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont String term = (String) args[0]; String analyzer = (String) args[1]; String useField = (String) args[2]; - return new Wildcard(term, analyzer, useField); + Integer maxExpansions = (Integer) args[3]; + return new Wildcard(term, analyzer, useField, maxExpansions); }); static { PARSER.declareString(constructorArg(), new ParseField("pattern")); PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer")); PARSER.declareString(optionalConstructorArg(), new ParseField("use_field")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("max_expansions")); } public static Wildcard fromXContent(XContentParser parser) throws IOException { @@ -755,6 +775,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont String getUseField() { return useField; } + + Integer getMaxExpansions() { + return maxExpansions; + } } public static class Fuzzy extends IntervalsSourceProvider { diff --git a/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java index b0a7ffc94a1..11f8c165877 100644 --- a/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.IntervalQuery; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -654,6 +655,35 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase { + IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(wildcard_over_max_expand_json); + builder1.toQuery(createShardContext()); + }); } private static IntervalsSource buildFuzzySource(String term, String label, int prefixLength, boolean transpositions, int editDistance) { diff --git a/server/src/test/java/org/opensearch/index/query/WildcardIntervalsSourceProviderTests.java b/server/src/test/java/org/opensearch/index/query/WildcardIntervalsSourceProviderTests.java index 88623ab4d83..784159005e1 100644 --- a/server/src/test/java/org/opensearch/index/query/WildcardIntervalsSourceProviderTests.java +++ b/server/src/test/java/org/opensearch/index/query/WildcardIntervalsSourceProviderTests.java @@ -51,7 +51,8 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes return new Wildcard( randomAlphaOfLength(10), randomBoolean() ? randomAlphaOfLength(10) : null, - randomBoolean() ? randomAlphaOfLength(10) : null + randomBoolean() ? randomAlphaOfLength(10) : null, + randomBoolean() ? randomIntBetween(-1, Integer.MAX_VALUE) : null ); } @@ -60,7 +61,8 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes String wildcard = instance.getPattern(); String analyzer = instance.getAnalyzer(); String useField = instance.getUseField(); - switch (between(0, 2)) { + Integer maxExpansions = instance.getMaxExpansions(); + switch (between(0, 3)) { case 0: wildcard += "a"; break; @@ -70,10 +72,13 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes case 2: useField = useField == null ? randomAlphaOfLength(5) : null; break; + case 3: + maxExpansions = maxExpansions == null ? randomIntBetween(1, Integer.MAX_VALUE) : null; + break; default: throw new AssertionError("Illegal randomisation branch"); } - return new Wildcard(wildcard, analyzer, useField); + return new Wildcard(wildcard, analyzer, useField, maxExpansions); } @Override