Add `use_field` option to intervals query (#40157)
This is the equivalent of the `field_masking_span` query, allowing users to merge intervals from multiple fields - for example, to search for stemmed tokens near unstemmed tokens.
This commit is contained in:
parent
6f64267626
commit
83d2870308
|
@ -76,6 +76,12 @@ Which analyzer should be used to analyze terms in the `query`. By
|
|||
default, the search analyzer of the top-level field will be used.
|
||||
`filter`::
|
||||
An optional <<interval_filter,interval filter>>
|
||||
`use_field`::
|
||||
If specified, then match intervals from this field rather than the top-level field.
|
||||
Terms will be analyzed using the search analyzer from this field. This allows you
|
||||
to search across multiple fields as if they were all the same field; for example,
|
||||
you could index the same text into stemmed and unstemmed fields, and search for
|
||||
stemmed tokens near unstemmed ones.
|
||||
|
||||
[[intervals-all_of]]
|
||||
==== `all_of`
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
# integration tests for intervals queries using analyzers
|
||||
setup:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
properties:
|
||||
text:
|
||||
type: text
|
||||
analyzer: standard
|
||||
text_en:
|
||||
type: text
|
||||
analyzer: english
|
||||
- do:
|
||||
bulk:
|
||||
refresh: true
|
||||
body:
|
||||
- '{"index": {"_index": "test", "_id": "4"}}'
|
||||
- '{"text" : "Outside it is cold and wet and raining cats and dogs",
|
||||
"text_en" : "Outside it is cold and wet and raining cats and dogs"}'
|
||||
|
||||
---
|
||||
"Test use_field":
|
||||
- skip:
|
||||
version: " - 7.9.99" # TODO change to 7.0.99 after backport
|
||||
reason: "Implemented in 7.1"
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
query:
|
||||
intervals:
|
||||
text:
|
||||
all_of:
|
||||
intervals:
|
||||
- match:
|
||||
query: cats
|
||||
- match:
|
||||
query: dog
|
||||
max_gaps: 1
|
||||
- match: { hits.total.value: 0 }
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
query:
|
||||
intervals:
|
||||
text:
|
||||
all_of:
|
||||
intervals:
|
||||
- match:
|
||||
query: cats
|
||||
- match:
|
||||
query: dog
|
||||
use_field: text_en
|
||||
max_gaps: 1
|
||||
- match: { hits.total.value: 1 }
|
||||
|
|
@ -637,7 +637,7 @@ public class TextFieldMapper extends FieldMapper {
|
|||
@Override
|
||||
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
|
||||
if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
throw new IllegalArgumentException("Cannot create intervals against field [" + name() + "] with no positions indexed");
|
||||
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
|
||||
}
|
||||
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
|
||||
return builder.analyzeText(text, maxGaps, ordered);
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.intervals.IntervalQuery;
|
||||
|
@ -31,7 +30,9 @@ import org.elasticsearch.common.xcontent.XContentParser;
|
|||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Builder for {@link IntervalQuery}
|
||||
|
@ -128,9 +129,14 @@ public class IntervalQueryBuilder extends AbstractQueryBuilder<IntervalQueryBuil
|
|||
// Be lenient with unmapped fields so that cross-index search will work nicely
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
if (fieldType.tokenized() == false ||
|
||||
fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
throw new IllegalArgumentException("Cannot create IntervalQuery over field [" + field + "] with no indexed positions");
|
||||
Set<String> maskedFields = new HashSet<>();
|
||||
sourceProvider.extractFields(maskedFields);
|
||||
for (String maskedField : maskedFields) {
|
||||
MappedFieldType ft = context.fieldMapper(maskedField);
|
||||
if (ft == null) {
|
||||
// Be lenient with unmapped fields so that cross-index search will work nicely
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
}
|
||||
return new IntervalQuery(field, sourceProvider.getSource(context, fieldType));
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.search.intervals.FilteredIntervalsSource;
|
|||
import org.apache.lucene.search.intervals.IntervalIterator;
|
||||
import org.apache.lucene.search.intervals.Intervals;
|
||||
import org.apache.lucene.search.intervals.IntervalsSource;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.io.stream.NamedWriteable;
|
||||
|
@ -43,6 +44,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
|
||||
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
|
||||
|
@ -59,6 +61,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
|
||||
public abstract IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException;
|
||||
|
||||
public abstract void extractFields(Set<String> fields);
|
||||
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
|
@ -99,13 +103,15 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
private final boolean ordered;
|
||||
private final String analyzer;
|
||||
private final IntervalFilter filter;
|
||||
private final String useField;
|
||||
|
||||
public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter) {
|
||||
public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter, String useField) {
|
||||
this.query = query;
|
||||
this.maxGaps = maxGaps;
|
||||
this.ordered = ordered;
|
||||
this.analyzer = analyzer;
|
||||
this.filter = filter;
|
||||
this.useField = useField;
|
||||
}
|
||||
|
||||
public Match(StreamInput in) throws IOException {
|
||||
|
@ -114,6 +120,12 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
this.ordered = in.readBoolean();
|
||||
this.analyzer = in.readOptionalString();
|
||||
this.filter = in.readOptionalWriteable(IntervalFilter::new);
|
||||
if (in.getVersion().onOrAfter(Version.V_7_1_0)) {
|
||||
this.useField = in.readOptionalString();
|
||||
}
|
||||
else {
|
||||
this.useField = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -122,13 +134,28 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
if (this.analyzer != null) {
|
||||
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
|
||||
}
|
||||
IntervalsSource source = fieldType.intervals(query, maxGaps, ordered, analyzer);
|
||||
IntervalsSource source;
|
||||
if (useField != null) {
|
||||
fieldType = context.fieldMapper(useField);
|
||||
assert fieldType != null;
|
||||
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
|
||||
}
|
||||
else {
|
||||
source = fieldType.intervals(query, maxGaps, ordered, analyzer);
|
||||
}
|
||||
if (filter != null) {
|
||||
return filter.filter(source, context, fieldType);
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractFields(Set<String> fields) {
|
||||
if (useField != null) {
|
||||
fields.add(useField);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
@ -138,12 +165,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
ordered == match.ordered &&
|
||||
Objects.equals(query, match.query) &&
|
||||
Objects.equals(filter, match.filter) &&
|
||||
Objects.equals(useField, match.useField) &&
|
||||
Objects.equals(analyzer, match.analyzer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(query, maxGaps, ordered, analyzer, filter);
|
||||
return Objects.hash(query, maxGaps, ordered, analyzer, filter, useField);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -158,6 +186,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
out.writeBoolean(ordered);
|
||||
out.writeOptionalString(analyzer);
|
||||
out.writeOptionalWriteable(filter);
|
||||
if (out.getVersion().onOrAfter(Version.V_7_1_0)) {
|
||||
out.writeOptionalString(useField);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -173,6 +204,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
if (filter != null) {
|
||||
builder.field("filter", filter);
|
||||
}
|
||||
if (useField != null) {
|
||||
builder.field("use_field", useField);
|
||||
}
|
||||
return builder.endObject();
|
||||
}
|
||||
|
||||
|
@ -183,7 +217,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
boolean ordered = (args[2] != null && (boolean) args[2]);
|
||||
String analyzer = (String) args[3];
|
||||
IntervalFilter filter = (IntervalFilter) args[4];
|
||||
return new Match(query, max_gaps, ordered, analyzer, filter);
|
||||
String useField = (String) args[5];
|
||||
return new Match(query, max_gaps, ordered, analyzer, filter, useField);
|
||||
});
|
||||
static {
|
||||
PARSER.declareString(constructorArg(), new ParseField("query"));
|
||||
|
@ -191,6 +226,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
|
||||
PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter"));
|
||||
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
|
||||
}
|
||||
|
||||
public static Match fromXContent(XContentParser parser) {
|
||||
|
@ -228,6 +264,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
return filter.filter(source, ctx, fieldType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractFields(Set<String> fields) {
|
||||
for (IntervalsSourceProvider provider : subSources) {
|
||||
provider.extractFields(fields);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
@ -323,6 +366,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
|||
return source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractFields(Set<String> fields) {
|
||||
for (IntervalsSourceProvider provider : subSources) {
|
||||
provider.extractFields(fields);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
|
|
@ -25,7 +25,11 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.intervals.IntervalQuery;
|
||||
import org.apache.lucene.search.intervals.Intervals;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.script.ScriptContext;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
@ -37,6 +41,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
|
@ -64,7 +69,27 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
|||
return null;
|
||||
}
|
||||
|
||||
private static final String MASKED_FIELD = "masked_field";
|
||||
private static final String NO_POSITIONS_FIELD = "no_positions_field";
|
||||
|
||||
@Override
|
||||
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
|
||||
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
|
||||
.startObject(MASKED_FIELD)
|
||||
.field("type", "text")
|
||||
.endObject()
|
||||
.startObject(NO_POSITIONS_FIELD)
|
||||
.field("type", "text")
|
||||
.field("index_options", "freqs")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject();
|
||||
|
||||
mapperService.merge("_doc",
|
||||
new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE);
|
||||
}
|
||||
|
||||
private IntervalsSourceProvider createRandomSource() {
|
||||
String useField = rarely() ? MASKED_FIELD : null;
|
||||
switch (randomInt(20)) {
|
||||
case 0:
|
||||
case 1:
|
||||
|
@ -95,7 +120,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
|||
boolean mOrdered = randomBoolean();
|
||||
int maxMGaps = randomInt(5) - 1;
|
||||
String analyzer = randomFrom("simple", "keyword", "whitespace");
|
||||
return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter());
|
||||
return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter(), useField);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -151,6 +176,21 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
|||
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
json = "{ \"intervals\" : " +
|
||||
"{ \"" + STRING_FIELD_NAME + "\" : { " +
|
||||
" \"match\" : { " +
|
||||
" \"query\" : \"Hello world\"," +
|
||||
" \"max_gaps\" : 10," +
|
||||
" \"analyzer\" : \"whitespace\"," +
|
||||
" \"use_field\" : \"" + MASKED_FIELD + "\"," +
|
||||
" \"ordered\" : true } } } }";
|
||||
|
||||
builder = (IntervalQueryBuilder) parseQuery(json);
|
||||
expected = new IntervalQuery(STRING_FIELD_NAME,
|
||||
Intervals.fixField(MASKED_FIELD,
|
||||
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
json = "{ \"intervals\" : " +
|
||||
"{ \"" + STRING_FIELD_NAME + "\" : { " +
|
||||
" \"match\" : { " +
|
||||
|
@ -262,14 +302,31 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
|||
IntervalQueryBuilder builder = new IntervalQueryBuilder(INT_FIELD_NAME, provider);
|
||||
builder.doToQuery(createShardContext());
|
||||
});
|
||||
assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + INT_FIELD_NAME + "] with no indexed positions"));
|
||||
assertThat(e.getMessage(), equalTo("Can only use interval queries on text fields - not on ["
|
||||
+ INT_FIELD_NAME + "] which is of type [integer]"));
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () -> {
|
||||
IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider);
|
||||
IntervalQueryBuilder builder = new IntervalQueryBuilder(NO_POSITIONS_FIELD, provider);
|
||||
builder.doToQuery(createShardContext());
|
||||
});
|
||||
assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field ["
|
||||
+ STRING_FIELD_NAME_2 + "] with no indexed positions"));
|
||||
assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
|
||||
+ NO_POSITIONS_FIELD + "] with no positions indexed"));
|
||||
|
||||
String json = "{ \"intervals\" : " +
|
||||
"{ \"" + STRING_FIELD_NAME + "\" : { " +
|
||||
" \"match\" : { " +
|
||||
" \"query\" : \"Hello world\"," +
|
||||
" \"max_gaps\" : 10," +
|
||||
" \"analyzer\" : \"whitespace\"," +
|
||||
" \"use_field\" : \"" + NO_POSITIONS_FIELD + "\"," +
|
||||
" \"ordered\" : true } } } }";
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () -> {
|
||||
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
|
||||
builder.doToQuery(createShardContext());
|
||||
});
|
||||
assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
|
||||
+ NO_POSITIONS_FIELD + "] with no positions indexed"));
|
||||
}
|
||||
|
||||
public void testMultipleProviders() {
|
||||
|
|
Loading…
Reference in New Issue