Add max_expansions option to wildcard interval (#1916)

Add support for setting the max expansions on a wildcard interval.
The default value is still 128 and the max value is bounded by
`BooleanQuery.getMaxClauseCount()`.

Signed-off-by: Matt Weber <matt@mattweber.org>
This commit is contained in:
Matt Weber 2022-01-26 11:06:39 -08:00 committed by GitHub
parent 57ac788bb6
commit 3b8db91abe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 68 additions and 9 deletions

View File

@ -40,6 +40,7 @@ import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.LegacyESVersion;
import org.opensearch.Version;
import org.opensearch.common.ParseField;
import org.opensearch.common.ParsingException;
import org.opensearch.common.io.stream.NamedWriteable;
@ -637,17 +638,24 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
private final String pattern;
private final String analyzer;
private final String useField;
private final Integer maxExpansions;
public Wildcard(String pattern, String analyzer, String useField) {
public Wildcard(String pattern, String analyzer, String useField, Integer maxExpansions) {
this.pattern = pattern;
this.analyzer = analyzer;
this.useField = useField;
this.maxExpansions = (maxExpansions != null && maxExpansions > 0) ? maxExpansions : null;
}
public Wildcard(StreamInput in) throws IOException {
this.pattern = in.readString();
this.analyzer = in.readOptionalString();
this.useField = in.readOptionalString();
if (in.getVersion().onOrAfter(Version.V_2_0_0)) {
this.maxExpansions = in.readOptionalVInt();
} else {
this.maxExpansions = null;
}
}
@Override
@ -665,11 +673,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
}
BytesRef normalizedTerm = analyzer.normalize(useField, pattern);
source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm));
IntervalsSource wildcardSource = maxExpansions == null
? Intervals.wildcard(normalizedTerm)
: Intervals.wildcard(normalizedTerm, maxExpansions);
source = Intervals.fixField(useField, wildcardSource);
} else {
checkPositions(fieldType);
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern);
source = Intervals.wildcard(normalizedTerm);
source = maxExpansions == null ? Intervals.wildcard(normalizedTerm) : Intervals.wildcard(normalizedTerm, maxExpansions);
}
return source;
}
@ -694,12 +705,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
Wildcard wildcard = (Wildcard) o;
return Objects.equals(pattern, wildcard.pattern)
&& Objects.equals(analyzer, wildcard.analyzer)
&& Objects.equals(useField, wildcard.useField);
&& Objects.equals(useField, wildcard.useField)
&& Objects.equals(maxExpansions, wildcard.maxExpansions);
}
@Override
public int hashCode() {
return Objects.hash(pattern, analyzer, useField);
return Objects.hash(pattern, analyzer, useField, maxExpansions);
}
@Override
@ -712,6 +724,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
out.writeString(pattern);
out.writeOptionalString(analyzer);
out.writeOptionalString(useField);
if (out.getVersion().onOrAfter(Version.V_2_0_0)) {
out.writeOptionalVInt(maxExpansions);
}
}
@Override
@ -724,6 +739,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (useField != null) {
builder.field("use_field", useField);
}
if (maxExpansions != null) {
builder.field("max_expansions", maxExpansions);
}
builder.endObject();
return builder;
}
@ -732,12 +750,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
String term = (String) args[0];
String analyzer = (String) args[1];
String useField = (String) args[2];
return new Wildcard(term, analyzer, useField);
Integer maxExpansions = (Integer) args[3];
return new Wildcard(term, analyzer, useField, maxExpansions);
});
static {
PARSER.declareString(constructorArg(), new ParseField("pattern"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
PARSER.declareInt(optionalConstructorArg(), new ParseField("max_expansions"));
}
public static Wildcard fromXContent(XContentParser parser) throws IOException {
@ -755,6 +775,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
String getUseField() {
return useField;
}
Integer getMaxExpansions() {
return maxExpansions;
}
}
public static class Fuzzy extends IntervalsSourceProvider {

View File

@ -36,6 +36,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.IntervalQuery;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
@ -654,6 +655,35 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
builder = (IntervalQueryBuilder) parseQuery(fixed_field_analyzer_json);
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard(new BytesRef("Te?m"))));
assertEquals(expected, builder.toQuery(createShardContext()));
String wildcard_max_expand_json = "{ \"intervals\" : { \""
+ TEXT_FIELD_NAME
+ "\": { "
+ "\"wildcard\" : { \"pattern\" : \"Te?m\", \"max_expansions\" : 500 } } } }";
builder = (IntervalQueryBuilder) parseQuery(wildcard_max_expand_json);
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m"), 500));
assertEquals(expected, builder.toQuery(createShardContext()));
String wildcard_neg_max_expand_json = "{ \"intervals\" : { \""
+ TEXT_FIELD_NAME
+ "\": { "
+ "\"wildcard\" : { \"pattern\" : \"Te?m\", \"max_expansions\" : -20 } } } }";
builder = (IntervalQueryBuilder) parseQuery(wildcard_neg_max_expand_json);
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m"))); // max expansions use default
assertEquals(expected, builder.toQuery(createShardContext()));
String wildcard_over_max_expand_json = "{ \"intervals\" : { \""
+ TEXT_FIELD_NAME
+ "\": { "
+ "\"wildcard\" : { \"pattern\" : \"Te?m\", \"max_expansions\" : "
+ (BooleanQuery.getMaxClauseCount() + 1)
+ " } } } }";
expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(wildcard_over_max_expand_json);
builder1.toQuery(createShardContext());
});
}
private static IntervalsSource buildFuzzySource(String term, String label, int prefixLength, boolean transpositions, int editDistance) {

View File

@ -51,7 +51,8 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes
return new Wildcard(
randomAlphaOfLength(10),
randomBoolean() ? randomAlphaOfLength(10) : null,
randomBoolean() ? randomAlphaOfLength(10) : null
randomBoolean() ? randomAlphaOfLength(10) : null,
randomBoolean() ? randomIntBetween(-1, Integer.MAX_VALUE) : null
);
}
@ -60,7 +61,8 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes
String wildcard = instance.getPattern();
String analyzer = instance.getAnalyzer();
String useField = instance.getUseField();
switch (between(0, 2)) {
Integer maxExpansions = instance.getMaxExpansions();
switch (between(0, 3)) {
case 0:
wildcard += "a";
break;
@ -70,10 +72,13 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes
case 2:
useField = useField == null ? randomAlphaOfLength(5) : null;
break;
case 3:
maxExpansions = maxExpansions == null ? randomIntBetween(1, Integer.MAX_VALUE) : null;
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
return new Wildcard(wildcard, analyzer, useField);
return new Wildcard(wildcard, analyzer, useField, maxExpansions);
}
@Override