mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-31 04:18:39 +00:00
Add prefix intervals source (#43635)
This commit adds a prefix intervals source, allowing you to search for intervals that contain terms starting with a given prefix. The source can make use of the index_prefixes mapping option. Relates to #43198
This commit is contained in:
parent
c121b00c98
commit
76d0edd1a4
docs/reference/query-dsl
rest-api-spec/src/main/resources/rest-api-spec/test/search
server/src
main/java/org/elasticsearch
index
search
test/java/org/elasticsearch/index/query
@ -82,6 +82,25 @@ to search across multiple fields as if they were all the same field; for example
|
||||
you could index the same text into stemmed and unstemmed fields, and search for
|
||||
stemmed tokens near unstemmed ones.
|
||||
|
||||
[[intervals-prefix]]
|
||||
==== `prefix`
|
||||
|
||||
The `prefix` rule finds terms that start with a specified prefix. The prefix will
|
||||
expand to match at most 128 terms; if there are more matching terms in the index,
|
||||
then an error will be returned. To avoid this limit, enable the
|
||||
<<index-prefixes,`index-prefixes`>> option on the field being searched.
|
||||
|
||||
[horizontal]
|
||||
`prefix`::
|
||||
Match terms starting with this prefix
|
||||
`analyzer`::
|
||||
Which analyzer should be used to normalize the `prefix`. By default, the
|
||||
search analyzer of the top-level field will be used.
|
||||
`use_field`::
|
||||
If specified, then match intervals from this field rather than the top-level field.
|
||||
The `prefix` will be normalized using the search analyzer from this field, unless
|
||||
`analyzer` is specified separately.
|
||||
|
||||
[[intervals-all_of]]
|
||||
==== `all_of`
|
||||
|
||||
|
@ -387,3 +387,23 @@ setup:
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "4" }
|
||||
|
||||
---
|
||||
"Test prefix":
|
||||
- skip:
|
||||
version: " - 8.0.0"
|
||||
reason: "TODO: change to 7.3 in backport"
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
query:
|
||||
intervals:
|
||||
text:
|
||||
all_of:
|
||||
intervals:
|
||||
- match:
|
||||
query: cold
|
||||
- prefix:
|
||||
prefix: out
|
||||
- match: { hits.total.value: 3 }
|
||||
|
||||
|
@ -390,7 +390,8 @@ public abstract class MappedFieldType extends FieldType {
|
||||
/**
|
||||
* Create an {@link IntervalsSource} to be used for proximity queries
|
||||
*/
|
||||
public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
|
||||
public IntervalsSource intervals(String query, int max_gaps, boolean ordered,
|
||||
NamedAnalyzer analyzer, boolean prefix) throws IOException {
|
||||
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
|
||||
+ "] which is of type [" + typeName() + "]");
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.intervals.Intervals;
|
||||
import org.apache.lucene.search.intervals.IntervalsSource;
|
||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
@ -51,6 +52,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
@ -408,6 +410,21 @@ public class TextFieldMapper extends FieldMapper {
|
||||
.build();
|
||||
}
|
||||
|
||||
public IntervalsSource intervals(BytesRef term) {
|
||||
if (term.length > maxChars) {
|
||||
return Intervals.prefix(term.utf8ToString());
|
||||
}
|
||||
if (term.length >= minChars) {
|
||||
return Intervals.fixField(name(), Intervals.term(term));
|
||||
}
|
||||
StringBuilder sb = new StringBuilder(term.utf8ToString());
|
||||
for (int i = term.length; i < minChars; i++) {
|
||||
sb.append("?");
|
||||
}
|
||||
String wildcardTerm = sb.toString();
|
||||
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(wildcardTerm)), Intervals.term(term));
|
||||
}
|
||||
|
||||
@Override
|
||||
public PrefixFieldType clone() {
|
||||
return new PrefixFieldType(parentField, name(), minChars, maxChars);
|
||||
@ -636,10 +653,21 @@ public class TextFieldMapper extends FieldMapper {
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
|
||||
public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
|
||||
NamedAnalyzer analyzer, boolean prefix) throws IOException {
|
||||
if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
|
||||
}
|
||||
if (analyzer == null) {
|
||||
analyzer = searchAnalyzer();
|
||||
}
|
||||
if (prefix) {
|
||||
BytesRef normalizedTerm = analyzer.normalize(name(), text);
|
||||
if (prefixFieldType != null) {
|
||||
return prefixFieldType.intervals(normalizedTerm);
|
||||
}
|
||||
return Intervals.prefix(normalizedTerm.utf8ToString()); // TODO make Intervals.prefix() take a BytesRef
|
||||
}
|
||||
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
|
||||
return builder.analyzeText(text, maxGaps, ordered);
|
||||
}
|
||||
|
@ -78,9 +78,11 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||
return Disjunction.fromXContent(parser);
|
||||
case "all_of":
|
||||
return Combine.fromXContent(parser);
|
||||
case "prefix":
|
||||
return Prefix.fromXContent(parser);
|
||||
}
|
||||
throw new ParsingException(parser.getTokenLocation(),
|
||||
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]");
|
||||
"Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of, prefix]");
|
||||
}
|
||||
|
||||
private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException {
|
||||
@ -138,10 +140,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||
if (useField != null) {
|
||||
fieldType = context.fieldMapper(useField);
|
||||
assert fieldType != null;
|
||||
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
|
||||
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
|
||||
}
|
||||
else {
|
||||
source = fieldType.intervals(query, maxGaps, ordered, analyzer);
|
||||
source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
|
||||
}
|
||||
if (filter != null) {
|
||||
return filter.filter(source, context, fieldType);
|
||||
@ -440,6 +442,109 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||
}
|
||||
}
|
||||
|
||||
public static class Prefix extends IntervalsSourceProvider {
|
||||
|
||||
public static final String NAME = "prefix";
|
||||
|
||||
private final String term;
|
||||
private final String analyzer;
|
||||
private final String useField;
|
||||
|
||||
public Prefix(String term, String analyzer, String useField) {
|
||||
this.term = term;
|
||||
this.analyzer = analyzer;
|
||||
this.useField = useField;
|
||||
}
|
||||
|
||||
public Prefix(StreamInput in) throws IOException {
|
||||
this.term = in.readString();
|
||||
this.analyzer = in.readOptionalString();
|
||||
this.useField = in.readOptionalString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException {
|
||||
NamedAnalyzer analyzer = null;
|
||||
if (this.analyzer != null) {
|
||||
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
|
||||
}
|
||||
IntervalsSource source;
|
||||
if (useField != null) {
|
||||
fieldType = context.fieldMapper(useField);
|
||||
assert fieldType != null;
|
||||
source = Intervals.fixField(useField, fieldType.intervals(term, 0, false, analyzer, true));
|
||||
}
|
||||
else {
|
||||
source = fieldType.intervals(term, 0, false, analyzer, true);
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractFields(Set<String> fields) {
|
||||
if (useField != null) {
|
||||
fields.add(useField);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
Prefix prefix = (Prefix) o;
|
||||
return Objects.equals(term, prefix.term) &&
|
||||
Objects.equals(analyzer, prefix.analyzer) &&
|
||||
Objects.equals(useField, prefix.useField);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(term, analyzer, useField);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getWriteableName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(term);
|
||||
out.writeOptionalString(analyzer);
|
||||
out.writeOptionalString(useField);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(NAME);
|
||||
builder.field("term", term);
|
||||
if (analyzer != null) {
|
||||
builder.field("analyzer", analyzer);
|
||||
}
|
||||
if (useField != null) {
|
||||
builder.field("use_field", useField);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
private static final ConstructingObjectParser<Prefix, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
|
||||
String term = (String) args[0];
|
||||
String analyzer = (String) args[1];
|
||||
String useField = (String) args[2];
|
||||
return new Prefix(term, analyzer, useField);
|
||||
});
|
||||
static {
|
||||
PARSER.declareString(constructorArg(), new ParseField("term"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
|
||||
}
|
||||
|
||||
public static Prefix fromXContent(XContentParser parser) throws IOException {
|
||||
return PARSER.parse(parser, null);
|
||||
}
|
||||
}
|
||||
|
||||
static class ScriptFilterSource extends FilteredIntervalsSource {
|
||||
|
||||
final IntervalFilterScript script;
|
||||
|
@ -847,6 +847,8 @@ public class SearchModule {
|
||||
IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new));
|
||||
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
|
||||
IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new));
|
||||
namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class,
|
||||
IntervalsSourceProvider.Prefix.NAME, IntervalsSourceProvider.Prefix::new));
|
||||
}
|
||||
|
||||
private void registerQuery(QuerySpec<?> spec) {
|
||||
|
@ -59,6 +59,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
|
||||
private static final String MASKED_FIELD = "masked_field";
|
||||
private static final String NO_POSITIONS_FIELD = "no_positions_field";
|
||||
private static final String PREFIXED_FIELD = "prefixed_field";
|
||||
|
||||
@Override
|
||||
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
|
||||
@ -70,6 +71,10 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
.field("type", "text")
|
||||
.field("index_options", "freqs")
|
||||
.endObject()
|
||||
.startObject(PREFIXED_FIELD)
|
||||
.field("type", "text")
|
||||
.startObject("index_prefixes").endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().endObject();
|
||||
|
||||
mapperService.merge("_doc",
|
||||
@ -384,5 +389,36 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
assertEquals(expected, q);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void testPrefixes() throws IOException {
|
||||
|
||||
String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
|
||||
"\"prefix\" : { \"term\" : \"term\" } } } }";
|
||||
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
|
||||
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix("term"));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
|
||||
"\"prefix\" : { \"term\" : \"term\" } } } }";
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
IntervalQueryBuilder builder1 = (IntervalQueryBuilder) parseQuery(no_positions_json);
|
||||
builder1.toQuery(createShardContext());
|
||||
});
|
||||
|
||||
String prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
|
||||
"\"prefix\" : { \"term\" : \"term\" } } } }";
|
||||
builder = (IntervalQueryBuilder) parseQuery(prefix_json);
|
||||
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.term("term")));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String short_prefix_json = "{ \"intervals\" : { \"" + PREFIXED_FIELD + "\": { " +
|
||||
"\"prefix\" : { \"term\" : \"t\" } } } }";
|
||||
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
|
||||
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
|
||||
Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard("t?")),
|
||||
Intervals.term("t")));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user