From 83d2870308ebc512f0524c6d5d61da1863c7dac3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 20 Mar 2019 16:25:15 +0000 Subject: [PATCH] Add `use_field` option to intervals query (#40157) This is the equivalent of the `field_masking_span` query, allowing users to merge intervals from multiple fields - for example, to search for stemmed tokens near unstemmed tokens. --- .../query-dsl/intervals-query.asciidoc | 6 ++ .../test/search.query/70_intervals.yml | 59 ++++++++++++++++ .../index/mapper/TextFieldMapper.java | 2 +- .../index/query/IntervalQueryBuilder.java | 14 ++-- .../index/query/IntervalsSourceProvider.java | 58 ++++++++++++++-- .../query/IntervalQueryBuilderTests.java | 67 +++++++++++++++++-- 6 files changed, 192 insertions(+), 14 deletions(-) create mode 100644 modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/70_intervals.yml diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 94a8b007d84..f5788783f7e 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -76,6 +76,12 @@ Which analyzer should be used to analyze terms in the `query`. By default, the search analyzer of the top-level field will be used. `filter`:: An optional <> +`use_field`:: +If specified, then match intervals from this field rather than the top-level field. +Terms will be analyzed using the search analyzer from this field. This allows you +to search across multiple fields as if they were all the same field; for example, +you could index the same text into stemmed and unstemmed fields, and search for +stemmed tokens near unstemmed ones. [[intervals-all_of]] ==== `all_of` diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/70_intervals.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/70_intervals.yml new file mode 100644 index 00000000000..e427a27d067 --- /dev/null +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/70_intervals.yml @@ -0,0 +1,59 @@ +# integration tests for intervals queries using analyzers +setup: + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: text + analyzer: standard + text_en: + type: text + analyzer: english + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet and raining cats and dogs", + "text_en" : "Outside it is cold and wet and raining cats and dogs"}' + +--- +"Test use_field": + - skip: + version: " - 7.9.99" # TODO change to 7.0.99 after backport + reason: "Implemented in 7.1" + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + max_gaps: 1 + - match: { hits.total.value: 0 } + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + use_field: text_en + max_gaps: 1 + - match: { hits.total.value: 1 } + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index e5fc470e130..805b50e628b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -637,7 +637,7 @@ public class TextFieldMapper extends FieldMapper { @Override public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { - throw new IllegalArgumentException("Cannot create intervals against field [" + name() + "] with no positions indexed"); + throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed"); } IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer); return builder.analyzeText(text, maxGaps, ordered); diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java index a1badc38323..b9ea7d96e46 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.query; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; @@ -31,7 +30,9 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MappedFieldType; import java.io.IOException; +import java.util.HashSet; import java.util.Objects; +import java.util.Set; /** * Builder for {@link IntervalQuery} @@ -128,9 +129,14 @@ public class IntervalQueryBuilder extends AbstractQueryBuilder maskedFields = new HashSet<>(); + sourceProvider.extractFields(maskedFields); + for (String maskedField : maskedFields) { + MappedFieldType ft = context.fieldMapper(maskedField); + if (ft == null) { + // Be lenient with unmapped fields so that cross-index search will work nicely + return new MatchNoDocsQuery(); + } } return new IntervalQuery(field, sourceProvider.getSource(context, fieldType)); } diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index dacd843c377..8aef53bc20e 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.intervals.FilteredIntervalsSource; import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.intervals.Intervals; import org.apache.lucene.search.intervals.IntervalsSource; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.NamedWriteable; @@ -43,6 +44,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Objects; +import java.util.Set; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -59,6 +61,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont public abstract IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException; + public abstract void extractFields(Set fields); + @Override public abstract int hashCode(); @@ -99,13 +103,15 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont private final boolean ordered; private final String analyzer; private final IntervalFilter filter; + private final String useField; - public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter) { + public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter, String useField) { this.query = query; this.maxGaps = maxGaps; this.ordered = ordered; this.analyzer = analyzer; this.filter = filter; + this.useField = useField; } public Match(StreamInput in) throws IOException { @@ -114,6 +120,12 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont this.ordered = in.readBoolean(); this.analyzer = in.readOptionalString(); this.filter = in.readOptionalWriteable(IntervalFilter::new); + if (in.getVersion().onOrAfter(Version.V_7_1_0)) { + this.useField = in.readOptionalString(); + } + else { + this.useField = null; + } } @Override @@ -122,13 +134,28 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont if (this.analyzer != null) { analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer); } - IntervalsSource source = fieldType.intervals(query, maxGaps, ordered, analyzer); + IntervalsSource source; + if (useField != null) { + fieldType = context.fieldMapper(useField); + assert fieldType != null; + source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer)); + } + else { + source = fieldType.intervals(query, maxGaps, ordered, analyzer); + } if (filter != null) { return filter.filter(source, context, fieldType); } return source; } + @Override + public void extractFields(Set fields) { + if (useField != null) { + fields.add(useField); + } + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -138,12 +165,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont ordered == match.ordered && Objects.equals(query, match.query) && Objects.equals(filter, match.filter) && + Objects.equals(useField, match.useField) && Objects.equals(analyzer, match.analyzer); } @Override public int hashCode() { - return Objects.hash(query, maxGaps, ordered, analyzer, filter); + return Objects.hash(query, maxGaps, ordered, analyzer, filter, useField); } @Override @@ -158,6 +186,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont out.writeBoolean(ordered); out.writeOptionalString(analyzer); out.writeOptionalWriteable(filter); + if (out.getVersion().onOrAfter(Version.V_7_1_0)) { + out.writeOptionalString(useField); + } } @Override @@ -173,6 +204,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont if (filter != null) { builder.field("filter", filter); } + if (useField != null) { + builder.field("use_field", useField); + } return builder.endObject(); } @@ -183,7 +217,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont boolean ordered = (args[2] != null && (boolean) args[2]); String analyzer = (String) args[3]; IntervalFilter filter = (IntervalFilter) args[4]; - return new Match(query, max_gaps, ordered, analyzer, filter); + String useField = (String) args[5]; + return new Match(query, max_gaps, ordered, analyzer, filter, useField); }); static { PARSER.declareString(constructorArg(), new ParseField("query")); @@ -191,6 +226,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered")); PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer")); PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter")); + PARSER.declareString(optionalConstructorArg(), new ParseField("use_field")); } public static Match fromXContent(XContentParser parser) { @@ -228,6 +264,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont return filter.filter(source, ctx, fieldType); } + @Override + public void extractFields(Set fields) { + for (IntervalsSourceProvider provider : subSources) { + provider.extractFields(fields); + } + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -323,6 +366,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont return source; } + @Override + public void extractFields(Set fields) { + for (IntervalsSourceProvider provider : subSources) { + provider.extractFields(fields); + } + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 6c33d582452..84df2c51fb5 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -25,7 +25,11 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.apache.lucene.search.intervals.Intervals; import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptContext; import org.elasticsearch.script.ScriptService; @@ -37,6 +41,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -64,7 +69,27 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase { - IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider); + IntervalQueryBuilder builder = new IntervalQueryBuilder(NO_POSITIONS_FIELD, provider); builder.doToQuery(createShardContext()); }); - assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" - + STRING_FIELD_NAME_2 + "] with no indexed positions")); + assertThat(e.getMessage(), equalTo("Cannot create intervals over field [" + + NO_POSITIONS_FIELD + "] with no positions indexed")); + + String json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"max_gaps\" : 10," + + " \"analyzer\" : \"whitespace\"," + + " \"use_field\" : \"" + NO_POSITIONS_FIELD + "\"," + + " \"ordered\" : true } } } }"; + + e = expectThrows(IllegalArgumentException.class, () -> { + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + builder.doToQuery(createShardContext()); + }); + assertThat(e.getMessage(), equalTo("Cannot create intervals over field [" + + NO_POSITIONS_FIELD + "] with no positions indexed")); } public void testMultipleProviders() {