Add `use_field` option to intervals query (#40157)

This is the equivalent of the `field_masking_span` query, allowing users to
merge intervals from multiple fields - for example, to search for stemmed tokens
near unstemmed tokens.
This commit is contained in:
Alan Woodward 2019-03-20 16:25:15 +00:00 committed by Alan Woodward
parent 6f64267626
commit 83d2870308
6 changed files with 192 additions and 14 deletions

View File

@ -76,6 +76,12 @@ Which analyzer should be used to analyze terms in the `query`. By
default, the search analyzer of the top-level field will be used.
`filter`::
An optional <<interval_filter,interval filter>>
`use_field`::
If specified, then match intervals from this field rather than the top-level field.
Terms will be analyzed using the search analyzer from this field. This allows you
to search across multiple fields as if they were all the same field; for example,
you could index the same text into stemmed and unstemmed fields, and search for
stemmed tokens near unstemmed ones.
[[intervals-all_of]]
==== `all_of`

View File

@ -0,0 +1,59 @@
# integration tests for intervals queries using analyzers
setup:
- do:
indices.create:
index: test
body:
mappings:
properties:
text:
type: text
analyzer: standard
text_en:
type: text
analyzer: english
- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "test", "_id": "4"}}'
- '{"text" : "Outside it is cold and wet and raining cats and dogs",
"text_en" : "Outside it is cold and wet and raining cats and dogs"}'
---
"Test use_field":
- skip:
version: " - 7.9.99" # TODO change to 7.0.99 after backport
reason: "Implemented in 7.1"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cats
- match:
query: dog
max_gaps: 1
- match: { hits.total.value: 0 }
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cats
- match:
query: dog
use_field: text_en
max_gaps: 1
- match: { hits.total.value: 1 }

View File

@ -637,7 +637,7 @@ public class TextFieldMapper extends FieldMapper {
@Override
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot create intervals against field [" + name() + "] with no positions indexed");
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
return builder.analyzeText(text, maxGaps, ordered);

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.query;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.intervals.IntervalQuery;
@ -31,7 +30,9 @@ import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;
import java.io.IOException;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
/**
* Builder for {@link IntervalQuery}
@ -128,9 +129,14 @@ public class IntervalQueryBuilder extends AbstractQueryBuilder<IntervalQueryBuil
// Be lenient with unmapped fields so that cross-index search will work nicely
return new MatchNoDocsQuery();
}
if (fieldType.tokenized() == false ||
fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot create IntervalQuery over field [" + field + "] with no indexed positions");
Set<String> maskedFields = new HashSet<>();
sourceProvider.extractFields(maskedFields);
for (String maskedField : maskedFields) {
MappedFieldType ft = context.fieldMapper(maskedField);
if (ft == null) {
// Be lenient with unmapped fields so that cross-index search will work nicely
return new MatchNoDocsQuery();
}
}
return new IntervalQuery(field, sourceProvider.getSource(context, fieldType));
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.search.intervals.FilteredIntervalsSource;
import org.apache.lucene.search.intervals.IntervalIterator;
import org.apache.lucene.search.intervals.Intervals;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.NamedWriteable;
@ -43,6 +44,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
@ -59,6 +61,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
public abstract IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException;
public abstract void extractFields(Set<String> fields);
@Override
public abstract int hashCode();
@ -99,13 +103,15 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
private final boolean ordered;
private final String analyzer;
private final IntervalFilter filter;
private final String useField;
public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter) {
public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter, String useField) {
this.query = query;
this.maxGaps = maxGaps;
this.ordered = ordered;
this.analyzer = analyzer;
this.filter = filter;
this.useField = useField;
}
public Match(StreamInput in) throws IOException {
@ -114,6 +120,12 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
this.ordered = in.readBoolean();
this.analyzer = in.readOptionalString();
this.filter = in.readOptionalWriteable(IntervalFilter::new);
if (in.getVersion().onOrAfter(Version.V_7_1_0)) {
this.useField = in.readOptionalString();
}
else {
this.useField = null;
}
}
@Override
@ -122,13 +134,28 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (this.analyzer != null) {
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source = fieldType.intervals(query, maxGaps, ordered, analyzer);
IntervalsSource source;
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
}
else {
source = fieldType.intervals(query, maxGaps, ordered, analyzer);
}
if (filter != null) {
return filter.filter(source, context, fieldType);
}
return source;
}
@Override
public void extractFields(Set<String> fields) {
if (useField != null) {
fields.add(useField);
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
@ -138,12 +165,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
ordered == match.ordered &&
Objects.equals(query, match.query) &&
Objects.equals(filter, match.filter) &&
Objects.equals(useField, match.useField) &&
Objects.equals(analyzer, match.analyzer);
}
@Override
public int hashCode() {
return Objects.hash(query, maxGaps, ordered, analyzer, filter);
return Objects.hash(query, maxGaps, ordered, analyzer, filter, useField);
}
@Override
@ -158,6 +186,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
out.writeBoolean(ordered);
out.writeOptionalString(analyzer);
out.writeOptionalWriteable(filter);
if (out.getVersion().onOrAfter(Version.V_7_1_0)) {
out.writeOptionalString(useField);
}
}
@Override
@ -173,6 +204,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (filter != null) {
builder.field("filter", filter);
}
if (useField != null) {
builder.field("use_field", useField);
}
return builder.endObject();
}
@ -183,7 +217,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
boolean ordered = (args[2] != null && (boolean) args[2]);
String analyzer = (String) args[3];
IntervalFilter filter = (IntervalFilter) args[4];
return new Match(query, max_gaps, ordered, analyzer, filter);
String useField = (String) args[5];
return new Match(query, max_gaps, ordered, analyzer, filter, useField);
});
static {
PARSER.declareString(constructorArg(), new ParseField("query"));
@ -191,6 +226,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
}
public static Match fromXContent(XContentParser parser) {
@ -228,6 +264,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
return filter.filter(source, ctx, fieldType);
}
@Override
public void extractFields(Set<String> fields) {
for (IntervalsSourceProvider provider : subSources) {
provider.extractFields(fields);
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
@ -323,6 +366,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
return source;
}
@Override
public void extractFields(Set<String> fields) {
for (IntervalsSourceProvider provider : subSources) {
provider.extractFields(fields);
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -25,7 +25,11 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.intervals.IntervalQuery;
import org.apache.lucene.search.intervals.Intervals;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptService;
@ -37,6 +41,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
@ -64,7 +69,27 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
return null;
}
private static final String MASKED_FIELD = "masked_field";
private static final String NO_POSITIONS_FIELD = "no_positions_field";
@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
.startObject(MASKED_FIELD)
.field("type", "text")
.endObject()
.startObject(NO_POSITIONS_FIELD)
.field("type", "text")
.field("index_options", "freqs")
.endObject()
.endObject().endObject().endObject();
mapperService.merge("_doc",
new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE);
}
private IntervalsSourceProvider createRandomSource() {
String useField = rarely() ? MASKED_FIELD : null;
switch (randomInt(20)) {
case 0:
case 1:
@ -95,7 +120,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
boolean mOrdered = randomBoolean();
int maxMGaps = randomInt(5) - 1;
String analyzer = randomFrom("simple", "keyword", "whitespace");
return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter());
return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter(), useField);
}
}
@ -151,6 +176,21 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))));
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : " +
"{ \"" + STRING_FIELD_NAME + "\" : { " +
" \"match\" : { " +
" \"query\" : \"Hello world\"," +
" \"max_gaps\" : 10," +
" \"analyzer\" : \"whitespace\"," +
" \"use_field\" : \"" + MASKED_FIELD + "\"," +
" \"ordered\" : true } } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(STRING_FIELD_NAME,
Intervals.fixField(MASKED_FIELD,
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))));
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : " +
"{ \"" + STRING_FIELD_NAME + "\" : { " +
" \"match\" : { " +
@ -262,14 +302,31 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
IntervalQueryBuilder builder = new IntervalQueryBuilder(INT_FIELD_NAME, provider);
builder.doToQuery(createShardContext());
});
assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + INT_FIELD_NAME + "] with no indexed positions"));
assertThat(e.getMessage(), equalTo("Can only use interval queries on text fields - not on ["
+ INT_FIELD_NAME + "] which is of type [integer]"));
e = expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider);
IntervalQueryBuilder builder = new IntervalQueryBuilder(NO_POSITIONS_FIELD, provider);
builder.doToQuery(createShardContext());
});
assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field ["
+ STRING_FIELD_NAME_2 + "] with no indexed positions"));
assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
+ NO_POSITIONS_FIELD + "] with no positions indexed"));
String json = "{ \"intervals\" : " +
"{ \"" + STRING_FIELD_NAME + "\" : { " +
" \"match\" : { " +
" \"query\" : \"Hello world\"," +
" \"max_gaps\" : 10," +
" \"analyzer\" : \"whitespace\"," +
" \"use_field\" : \"" + NO_POSITIONS_FIELD + "\"," +
" \"ordered\" : true } } } }";
e = expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
builder.doToQuery(createShardContext());
});
assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
+ NO_POSITIONS_FIELD + "] with no positions indexed"));
}
public void testMultipleProviders() {