Add max_expansions option to wildcard interval (#1916)
Add support for setting the max expansions on a wildcard interval. The default value is still 128 and the max value is bounded by `BooleanQuery.getMaxClauseCount()`. Signed-off-by: Matt Weber <matt@mattweber.org>
This commit is contained in:
parent
57ac788bb6
commit
3b8db91abe
|
@ -40,6 +40,7 @@ import org.apache.lucene.queries.intervals.IntervalsSource;
|
|||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.common.ParseField;
|
||||
import org.opensearch.common.ParsingException;
|
||||
import org.opensearch.common.io.stream.NamedWriteable;
|
||||
|
@ -637,17 +638,24 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
private final String pattern;
|
||||
private final String analyzer;
|
||||
private final String useField;
|
||||
private final Integer maxExpansions;
|
||||
|
||||
public Wildcard(String pattern, String analyzer, String useField) {
|
||||
public Wildcard(String pattern, String analyzer, String useField, Integer maxExpansions) {
|
||||
this.pattern = pattern;
|
||||
this.analyzer = analyzer;
|
||||
this.useField = useField;
|
||||
this.maxExpansions = (maxExpansions != null && maxExpansions > 0) ? maxExpansions : null;
|
||||
}
|
||||
|
||||
public Wildcard(StreamInput in) throws IOException {
|
||||
this.pattern = in.readString();
|
||||
this.analyzer = in.readOptionalString();
|
||||
this.useField = in.readOptionalString();
|
||||
if (in.getVersion().onOrAfter(Version.V_2_0_0)) {
|
||||
this.maxExpansions = in.readOptionalVInt();
|
||||
} else {
|
||||
this.maxExpansions = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -665,11 +673,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
|
||||
}
|
||||
BytesRef normalizedTerm = analyzer.normalize(useField, pattern);
|
||||
source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm));
|
||||
IntervalsSource wildcardSource = maxExpansions == null
|
||||
? Intervals.wildcard(normalizedTerm)
|
||||
: Intervals.wildcard(normalizedTerm, maxExpansions);
|
||||
source = Intervals.fixField(useField, wildcardSource);
|
||||
} else {
|
||||
checkPositions(fieldType);
|
||||
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern);
|
||||
source = Intervals.wildcard(normalizedTerm);
|
||||
source = maxExpansions == null ? Intervals.wildcard(normalizedTerm) : Intervals.wildcard(normalizedTerm, maxExpansions);
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
@ -694,12 +705,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
Wildcard wildcard = (Wildcard) o;
|
||||
return Objects.equals(pattern, wildcard.pattern)
|
||||
&& Objects.equals(analyzer, wildcard.analyzer)
|
||||
&& Objects.equals(useField, wildcard.useField);
|
||||
&& Objects.equals(useField, wildcard.useField)
|
||||
&& Objects.equals(maxExpansions, wildcard.maxExpansions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(pattern, analyzer, useField);
|
||||
return Objects.hash(pattern, analyzer, useField, maxExpansions);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -712,6 +724,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
out.writeString(pattern);
|
||||
out.writeOptionalString(analyzer);
|
||||
out.writeOptionalString(useField);
|
||||
if (out.getVersion().onOrAfter(Version.V_2_0_0)) {
|
||||
out.writeOptionalVInt(maxExpansions);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -724,6 +739,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
if (useField != null) {
|
||||
builder.field("use_field", useField);
|
||||
}
|
||||
if (maxExpansions != null) {
|
||||
builder.field("max_expansions", maxExpansions);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
@ -732,12 +750,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
String term = (String) args[0];
|
||||
String analyzer = (String) args[1];
|
||||
String useField = (String) args[2];
|
||||
return new Wildcard(term, analyzer, useField);
|
||||
Integer maxExpansions = (Integer) args[3];
|
||||
return new Wildcard(term, analyzer, useField, maxExpansions);
|
||||
});
|
||||
static {
|
||||
PARSER.declareString(constructorArg(), new ParseField("pattern"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
|
||||
PARSER.declareInt(optionalConstructorArg(), new ParseField("max_expansions"));
|
||||
}
|
||||
|
||||
public static Wildcard fromXContent(XContentParser parser) throws IOException {
|
||||
|
@ -755,6 +775,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
String getUseField() {
|
||||
return useField;
|
||||
}
|
||||
|
||||
Integer getMaxExpansions() {
|
||||
return maxExpansions;
|
||||
}
|
||||
}
|
||||
|
||||
public static class Fuzzy extends IntervalsSourceProvider {
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.queries.intervals.IntervalQuery;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
|
@ -654,6 +655,35 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
|||
builder = (IntervalQueryBuilder) parseQuery(fixed_field_analyzer_json);
|
||||
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard(new BytesRef("Te?m"))));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String wildcard_max_expand_json = "{ \"intervals\" : { \""
|
||||
+ TEXT_FIELD_NAME
|
||||
+ "\": { "
|
||||
+ "\"wildcard\" : { \"pattern\" : \"Te?m\", \"max_expansions\" : 500 } } } }";
|
||||
|
||||
builder = (IntervalQueryBuilder) parseQuery(wildcard_max_expand_json);
|
||||
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m"), 500));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String wildcard_neg_max_expand_json = "{ \"intervals\" : { \""
|
||||
+ TEXT_FIELD_NAME
|
||||
+ "\": { "
|
||||
+ "\"wildcard\" : { \"pattern\" : \"Te?m\", \"max_expansions\" : -20 } } } }";
|
||||
|
||||
builder = (IntervalQueryBuilder) parseQuery(wildcard_neg_max_expand_json);
|
||||
expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m"))); // max expansions use default
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String wildcard_over_max_expand_json = "{ \"intervals\" : { \""
|
||||
+ TEXT_FIELD_NAME
|
||||
+ "\": { "
|
||||
+ "\"wildcard\" : { \"pattern\" : \"Te?m\", \"max_expansions\" : "
|
||||
+ (BooleanQuery.getMaxClauseCount() + 1)
|
||||
+ " } } } }";
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(wildcard_over_max_expand_json);
|
||||
builder1.toQuery(createShardContext());
|
||||
});
|
||||
}
|
||||
|
||||
private static IntervalsSource buildFuzzySource(String term, String label, int prefixLength, boolean transpositions, int editDistance) {
|
||||
|
|
|
@ -51,7 +51,8 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes
|
|||
return new Wildcard(
|
||||
randomAlphaOfLength(10),
|
||||
randomBoolean() ? randomAlphaOfLength(10) : null,
|
||||
randomBoolean() ? randomAlphaOfLength(10) : null
|
||||
randomBoolean() ? randomAlphaOfLength(10) : null,
|
||||
randomBoolean() ? randomIntBetween(-1, Integer.MAX_VALUE) : null
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -60,7 +61,8 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes
|
|||
String wildcard = instance.getPattern();
|
||||
String analyzer = instance.getAnalyzer();
|
||||
String useField = instance.getUseField();
|
||||
switch (between(0, 2)) {
|
||||
Integer maxExpansions = instance.getMaxExpansions();
|
||||
switch (between(0, 3)) {
|
||||
case 0:
|
||||
wildcard += "a";
|
||||
break;
|
||||
|
@ -70,10 +72,13 @@ public class WildcardIntervalsSourceProviderTests extends AbstractSerializingTes
|
|||
case 2:
|
||||
useField = useField == null ? randomAlphaOfLength(5) : null;
|
||||
break;
|
||||
case 3:
|
||||
maxExpansions = maxExpansions == null ? randomIntBetween(1, Integer.MAX_VALUE) : null;
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Illegal randomisation branch");
|
||||
}
|
||||
return new Wildcard(wildcard, analyzer, useField);
|
||||
return new Wildcard(wildcard, analyzer, useField, maxExpansions);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue