Wildcard intervals (#43691)

This commit adds a wildcard intervals source, similar to the prefix. It
also changes the term parameter in prefix to read prefix, to bring it
in to line with the pattern parameter in wildcard.

Closes #43198
This commit is contained in:
Alan Woodward 2019-06-28 13:58:06 +01:00
parent 74dd6e49fc
commit 81dbcfb268
5 changed files with 258 additions and 15 deletions

View File

@ -101,6 +101,34 @@ If specified, then match intervals from this field rather than the top-level fie
The `prefix` will be normalized using the search analyzer from this field, unless
`analyzer` is specified separately.
[[intervals-wildcard]]
==== `wildcard`
The `wildcard` rule finds terms that match a wildcard pattern. The pattern will
expand to match at most 128 terms; if there are more matching terms in the index,
then an error will be returned.
[horizontal]
`pattern`::
Find terms matching this pattern
+
--
This parameter supports two wildcard operators:
* `?`, which matches any single character
* `*`, which can match zero or more characters, including an empty one
WARNING: Avoid beginning patterns with `*` or `?`. This can increase
the iterations needed to find matching terms and slow search performance.
--
`analyzer`::
Which analyzer should be used to normalize the `pattern`. By default, the
search analyzer of the top-level field will be used.
`use_field`::
If specified, then match intervals from this field rather than the top-level field.
The `pattern` will be normalized using the search analyzer from this field, unless
`analyzer` is specified separately.
[[intervals-all_of]]
==== `all_of`

View File

@ -407,3 +407,23 @@ setup:
prefix: out
- match: { hits.total.value: 3 }
---
"Test wildcard":
- skip:
version: " - 8.0.0"
reason: "TODO: change to 7.3 in backport"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cold
- wildcard:
pattern: out?ide
- match: { hits.total.value: 3 }

View File

@ -19,10 +19,12 @@
package org.elasticsearch.index.query;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.intervals.FilteredIntervalsSource;
import org.apache.lucene.search.intervals.IntervalIterator;
import org.apache.lucene.search.intervals.Intervals;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
@ -80,6 +82,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
return Combine.fromXContent(parser);
case "prefix":
return Prefix.fromXContent(parser);
case "wildcard":
return Wildcard.fromXContent(parser);
}
throw new ParsingException(parser.getTokenLocation(),
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]");
@ -446,18 +450,18 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
public static final String NAME = "prefix";
private final String term;
private final String prefix;
private final String analyzer;
private final String useField;
public Prefix(String term, String analyzer, String useField) {
this.term = term;
public Prefix(String prefix, String analyzer, String useField) {
this.prefix = prefix;
this.analyzer = analyzer;
this.useField = useField;
}
public Prefix(StreamInput in) throws IOException {
this.term = in.readString();
this.prefix = in.readString();
this.analyzer = in.readOptionalString();
this.useField = in.readOptionalString();
}
@ -472,10 +476,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true));
source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true));
}
else {
source = fieldType.intervals(term, 0, false, analyzer, true);
source = fieldType.intervals(prefix, 0, false, analyzer, true);
}
return source;
}
@ -492,14 +496,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Prefix prefix = (Prefix) o;
return Objects.equals(term, prefix.term) &&
return Objects.equals(this.prefix, prefix.prefix) &&
Objects.equals(analyzer, prefix.analyzer) &&
Objects.equals(useField, prefix.useField);
}
@Override
public int hashCode() {
return Objects.hash(term, analyzer, useField);
return Objects.hash(prefix, analyzer, useField);
}
@Override
@ -509,7 +513,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(term);
out.writeString(prefix);
out.writeOptionalString(analyzer);
out.writeOptionalString(useField);
}
@ -517,7 +521,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
builder.field("term", term);
builder.field("prefix", prefix);
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
@ -535,7 +539,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
return new Prefix(term, analyzer, useField);
});
static {
PARSER.declareString(constructorArg(), new ParseField("term"));
PARSER.declareString(constructorArg(), new ParseField("prefix"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
}
@ -545,6 +549,123 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
}
}
public static class Wildcard extends IntervalsSourceProvider {
public static final String NAME = "wildcard";
private final String pattern;
private final String analyzer;
private final String useField;
public Wildcard(String pattern, String analyzer, String useField) {
this.pattern = pattern;
this.analyzer = analyzer;
this.useField = useField;
}
public Wildcard(StreamInput in) throws IOException {
this.pattern = in.readString();
this.analyzer = in.readOptionalString();
this.useField = in.readOptionalString();
}
@Override
public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) {
NamedAnalyzer analyzer = fieldType.searchAnalyzer();
if (this.analyzer != null) {
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source;
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
checkPositions(fieldType);
if (this.analyzer == null) {
analyzer = fieldType.searchAnalyzer();
}
BytesRef normalizedTerm = analyzer.normalize(useField, pattern);
// TODO Intervals.wildcard() should take BytesRef
source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm.utf8ToString()));
}
else {
checkPositions(fieldType);
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern);
source = Intervals.wildcard(normalizedTerm.utf8ToString());
}
return source;
}
private void checkPositions(MappedFieldType type) {
if (type.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed");
}
}
@Override
public void extractFields(Set<String> fields) {
if (useField != null) {
fields.add(useField);
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Prefix prefix = (Prefix) o;
return Objects.equals(pattern, prefix.prefix) &&
Objects.equals(analyzer, prefix.analyzer) &&
Objects.equals(useField, prefix.useField);
}
@Override
public int hashCode() {
return Objects.hash(pattern, analyzer, useField);
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(pattern);
out.writeOptionalString(analyzer);
out.writeOptionalString(useField);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
builder.field("pattern", pattern);
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (useField != null) {
builder.field("use_field", useField);
}
builder.endObject();
return builder;
}
private static final ConstructingObjectParser<Wildcard, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
String term = (String) args[0];
String analyzer = (String) args[1];
String useField = (String) args[2];
return new Wildcard(term, analyzer, useField);
});
static {
PARSER.declareString(constructorArg(), new ParseField("pattern"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
}
public static Wildcard fromXContent(XContentParser parser) throws IOException {
return PARSER.parse(parser, null);
}
}
static class ScriptFilterSource extends FilteredIntervalsSource {
final IntervalFilterScript script;

View File

@ -849,6 +849,8 @@ public class SearchModule {
IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new));
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new));
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
IntervalsSourceProvider.Wildcard.NAME, IntervalsSourceProvider.Wildcard::new));
}
private void registerQuery(QuerySpec<?> spec) {

View File

@ -393,32 +393,104 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
public void testPrefixes() throws IOException {
String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
"\"prefix\" : { \"prefix\" : \"term\" } } } }";
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix("term"));
assertEquals(expected, builder.toQuery(createShardContext()));
String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
"\"prefix\" : { \"prefix\" : \"term\" } } } }";
expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
builder1.toQuery(createShardContext());
});
String no_positions_fixed_field_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"prefix\" : { \"prefix\" : \"term\", \"use_field\" : \"" + NO_POSITIONS_FIELD + "\" } } } }";
expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_fixed_field_json);
builder1.toQuery(createShardContext());
});
String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
"\"prefix\" : { \"prefix\" : \"term\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(prefix_json);
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term")));
assertEquals(expected, builder.toQuery(createShardContext()));
String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"t\" } } } }";
"\"prefix\" : { \"prefix\" : \"t\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")),
Intervals.term("t")));
assertEquals(expected, builder.toQuery(createShardContext()));
String fix_field_prefix_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"prefix\" : { \"prefix\" : \"term\", \"use_field\" : \"" + PREFIXED_FIELD + "\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(fix_field_prefix_json);
// This looks weird, but it's fine, because the innermost fixField wins
expected = new IntervalQuery(STRING_FIELD_NAME,
Intervals.fixField(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term"))));
assertEquals(expected, builder.toQuery(createShardContext()));
String keyword_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
"\"prefix\" : { \"prefix\" : \"Term\", \"analyzer\" : \"keyword\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(keyword_json);
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("Term")));
assertEquals(expected, builder.toQuery(createShardContext()));
String keyword_fix_field_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"prefix\" : { \"prefix\" : \"Term\", \"analyzer\" : \"keyword\", \"use_field\" : \"" + PREFIXED_FIELD + "\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(keyword_fix_field_json);
expected = new IntervalQuery(STRING_FIELD_NAME,
Intervals.fixField(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("Term"))));
assertEquals(expected, builder.toQuery(createShardContext()));
}
public void testWildcard() throws IOException {
String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"wildcard\" : { \"pattern\" : \"Te?m\" } } } }";
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.wildcard("te?m"));
assertEquals(expected, builder.toQuery(createShardContext()));
String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
"\"wildcard\" : { \"pattern\" : \"term\" } } } }";
expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
builder1.toQuery(createShardContext());
});
String keyword_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"wildcard\" : { \"pattern\" : \"Te?m\", \"analyzer\" : \"keyword\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(keyword_json);
expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.wildcard("Te?m"));
assertEquals(expected, builder.toQuery(createShardContext()));
String fixed_field_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"wildcard\" : { \"pattern\" : \"Te?m\", \"use_field\" : \"masked_field\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(fixed_field_json);
expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard("te?m")));
assertEquals(expected, builder.toQuery(createShardContext()));
String fixed_field_json_no_positions = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"wildcard\" : { \"pattern\" : \"Te?m\", \"use_field\" : \"" + NO_POSITIONS_FIELD + "\" } } } }";
expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(fixed_field_json_no_positions);
builder1.toQuery(createShardContext());
});
String fixed_field_analyzer_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"wildcard\" : { \"pattern\" : \"Te?m\", \"use_field\" : \"masked_field\", \"analyzer\" : \"keyword\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(fixed_field_analyzer_json);
expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard("Te?m")));
assertEquals(expected, builder.toQuery(createShardContext()));
}
}