Add prefix intervals source ()

This commit adds a prefix intervals source, allowing you to search
for intervals that contain terms starting with a given prefix. The source
can make use of the index_prefixes mapping option.

Relates to 
This commit is contained in:
Alan Woodward 2019-06-26 15:36:47 +01:00 committed by Alan Woodward
parent c121b00c98
commit 76d0edd1a4
7 changed files with 217 additions and 6 deletions
docs/reference/query-dsl
rest-api-spec/src/main/resources/rest-api-spec/test/search
server/src
main/java/org/elasticsearch
test/java/org/elasticsearch/index/query

@ -82,6 +82,25 @@ to search across multiple fields as if they were all the same field; for example
you could index the same text into stemmed and unstemmed fields, and search for
stemmed tokens near unstemmed ones.
[[intervals-prefix]]
==== `prefix`
The `prefix` rule finds terms that start with a specified prefix. The prefix will
expand to match at most 128 terms; if there are more matching terms in the index,
then an error will be returned. To avoid this limit, enable the
<<index-prefixes,`index-prefixes`>> option on the field being searched.
[horizontal]
`prefix`::
Match terms starting with this prefix
`analyzer`::
Which analyzer should be used to normalize the `prefix`. By default, the
search analyzer of the top-level field will be used.
`use_field`::
If specified, then match intervals from this field rather than the top-level field.
The `prefix` will be normalized using the search analyzer from this field, unless
`analyzer` is specified separately.
[[intervals-all_of]]
==== `all_of`

@ -387,3 +387,23 @@ setup:
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "4" }
---
"Test prefix":
- skip:
version: " - 8.0.0"
reason: "TODO: change to 7.3 in backport"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cold
- prefix:
prefix: out
- match: { hits.total.value: 3 }

@ -390,7 +390,8 @@ public abstract class MappedFieldType extends FieldType {
/**
* Create an {@link IntervalsSource} to be used for proximity queries
*/
public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
public IntervalsSource intervals(String query, int max_gaps, boolean ordered,
NamedAnalyzer analyzer, boolean prefix) throws IOException {
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
+ "] which is of type [" + typeName() + "]");
}

@ -44,6 +44,7 @@ import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.intervals.Intervals;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
@ -51,6 +52,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
@ -408,6 +410,21 @@ public class TextFieldMapper extends FieldMapper {
.build();
}
public IntervalsSource intervals(BytesRef term) {
if (term.length > maxChars) {
return Intervals.prefix(term.utf8ToString());
}
if (term.length >= minChars) {
return Intervals.fixField(name(), Intervals.term(term));
}
StringBuilder sb = new StringBuilder(term.utf8ToString());
for (int i = term.length; i < minChars; i++) {
sb.append("?");
}
String wildcardTerm = sb.toString();
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(wildcardTerm)), Intervals.term(term));
}
@Override
public PrefixFieldType clone() {
return new PrefixFieldType(parentField, name(), minChars, maxChars);
@ -636,10 +653,21 @@ public class TextFieldMapper extends FieldMapper {
}
@Override
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
NamedAnalyzer analyzer, boolean prefix) throws IOException {
if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
if (analyzer == null) {
analyzer = searchAnalyzer();
}
if (prefix) {
BytesRef normalizedTerm = analyzer.normalize(name(), text);
if (prefixFieldType != null) {
return prefixFieldType.intervals(normalizedTerm);
}
return Intervals.prefix(normalizedTerm.utf8ToString()); // TODO make Intervals.prefix() take a BytesRef
}
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
return builder.analyzeText(text, maxGaps, ordered);
}

@ -78,9 +78,11 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
return Disjunction.fromXContent(parser);
case "all_of":
return Combine.fromXContent(parser);
case "prefix":
return Prefix.fromXContent(parser);
}
throw new ParsingException(parser.getTokenLocation(),
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]");
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]");
}
private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException {
@ -138,10 +140,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
}
else {
source = fieldType.intervals(query, maxGaps, ordered, analyzer);
source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
}
if (filter != null) {
return filter.filter(source, context, fieldType);
@ -440,6 +442,109 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
}
}
public static class Prefix extends IntervalsSourceProvider {
public static final String NAME = "prefix";
private final String term;
private final String analyzer;
private final String useField;
public Prefix(String term, String analyzer, String useField) {
this.term = term;
this.analyzer = analyzer;
this.useField = useField;
}
public Prefix(StreamInput in) throws IOException {
this.term = in.readString();
this.analyzer = in.readOptionalString();
this.useField = in.readOptionalString();
}
@Override
public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException {
NamedAnalyzer analyzer = null;
if (this.analyzer != null) {
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source;
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true));
}
else {
source = fieldType.intervals(term, 0, false, analyzer, true);
}
return source;
}
@Override
public void extractFields(Set<String> fields) {
if (useField != null) {
fields.add(useField);
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Prefix prefix = (Prefix) o;
return Objects.equals(term, prefix.term) &&
Objects.equals(analyzer, prefix.analyzer) &&
Objects.equals(useField, prefix.useField);
}
@Override
public int hashCode() {
return Objects.hash(term, analyzer, useField);
}
@Override
public String getWriteableName() {
return NAME;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(term);
out.writeOptionalString(analyzer);
out.writeOptionalString(useField);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
builder.field("term", term);
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (useField != null) {
builder.field("use_field", useField);
}
builder.endObject();
return builder;
}
private static final ConstructingObjectParser<Prefix, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
String term = (String) args[0];
String analyzer = (String) args[1];
String useField = (String) args[2];
return new Prefix(term, analyzer, useField);
});
static {
PARSER.declareString(constructorArg(), new ParseField("term"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
}
public static Prefix fromXContent(XContentParser parser) throws IOException {
return PARSER.parse(parser, null);
}
}
static class ScriptFilterSource extends FilteredIntervalsSource {
final IntervalFilterScript script;

@ -847,6 +847,8 @@ public class SearchModule {
IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new));
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new));
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new));
}
private void registerQuery(QuerySpec<?> spec) {

@ -59,6 +59,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
private static final String MASKED_FIELD = "masked_field";
private static final String NO_POSITIONS_FIELD = "no_positions_field";
private static final String PREFIXED_FIELD = "prefixed_field";
@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
@ -70,6 +71,10 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
.field("type", "text")
.field("index_options", "freqs")
.endObject()
.startObject(PREFIXED_FIELD)
.field("type", "text")
.startObject("index_prefixes").endObject()
.endObject()
.endObject().endObject().endObject();
mapperService.merge("_doc",
@ -384,5 +389,36 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
assertEquals(expected, q);
}
public void testPrefixes() throws IOException {
String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix("term"));
assertEquals(expected, builder.toQuery(createShardContext()));
String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
builder1.toQuery(createShardContext());
});
String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"term\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(prefix_json);
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term")));
assertEquals(expected, builder.toQuery(createShardContext()));
String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
"\"prefix\" : { \"term\" : \"t\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")),
Intervals.term("t")));
assertEquals(expected, builder.toQuery(createShardContext()));
}
}