diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index 76b832a529f..c0db156dc3a 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -52,6 +52,7 @@ string:: <> and <> <>:: Record sparse vectors of float values. +<>:: A text-like field optimized for queries to implement as-you-type completion [float] === Multi-fields @@ -110,3 +111,5 @@ include::types/rank-features.asciidoc[] include::types/dense-vector.asciidoc[] include::types/sparse-vector.asciidoc[] + +include::types/search-as-you-type.asciidoc[] diff --git a/docs/reference/mapping/types/search-as-you-type.asciidoc b/docs/reference/mapping/types/search-as-you-type.asciidoc new file mode 100644 index 00000000000..aec21f2e3ca --- /dev/null +++ b/docs/reference/mapping/types/search-as-you-type.asciidoc @@ -0,0 +1,258 @@ +[[search-as-you-type]] +=== Search as you type datatype + +experimental[] + +The `search_as_you_type` field type is a text-like field that is optimized to +provide out-of-the-box support for queries that serve an as-you-type completion +use case. It creates a series of subfields that are analyzed to index terms +that can be efficiently matched by a query that partially matches the entire +indexed text value. Both prefix completion (i.e matching terms starting at the +beginning of the input) and infix completion (i.e. matching terms at any +position within the input) are supported. + +When adding a field of this type to a mapping + +[source,js] +-------------------------------------------------- +PUT my_index +{ + "mappings": { + "properties": { + "my_field": { + "type": "search_as_you_type" + } + } + } +} +-------------------------------------------------- +// CONSOLE + +This creates the following fields + +[horizontal] + +`my_field`:: + + Analyzed as configured in the mapping. If an analyzer is not configured, + the default analyzer for the index is used + +`my_field._2gram`:: + + Wraps the analyzer of `my_field` with a shingle token filter of shingle + size 2 + +`my_field._3gram`:: + + Wraps the analyzer of `my_field` with a shingle token filter of shingle + size 3 + +`my_field._index_prefix`:: + + Wraps the analyzer of `my_field._3gram` with an edge ngram token filter + + +The size of shingles in subfields can be configured with the `max_shingle_size` +mapping parameter. The default is 3, and valid values for this parameter are +integer values 2 - 4 inclusive. Shingle subfields will be created for each +shingle size from 2 up to and including the `max_shingle_size`. The +`my_field._index_prefix` subfield will always use the analyzer from the shingle +subfield with the `max_shingle_size` when constructing its own analyzer. + +Increasing the `max_shingle_size` will improve matches for queries with more +consecutive terms, at the cost of larger index size. The default +`max_shingle_size` should usually be sufficient. + +The same input text is indexed into each of these fields automatically, with +their differing analysis chains, when an indexed document has a value for the +root field `my_field`. + +[source,js] +-------------------------------------------------- +PUT my_index/_doc/1?refresh +{ + "my_field": "quick brown fox jump lazy dog" +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +The most efficient way of querying to serve a search-as-you-type use case is +usually a <> query of type +<> that targets the root +`search_as_you_type` field and its shingle subfields. This can match the query +terms in any order, but will score documents higher if they contain the terms +in order in a shingle subfield. + +[source,js] +-------------------------------------------------- +GET my_index/_search +{ + "query": { + "multi_match": { + "query": "brown f", + "type": "bool_prefix", + "fields": [ + "my_field", + "my_field._2gram", + "my_field._3gram" + ] + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +[source,js] +-------------------------------------------------- +{ + "took" : 44, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : 0.8630463, + "hits" : [ + { + "_index" : "my_index", + "_type" : "_doc", + "_id" : "1", + "_score" : 0.8630463, + "_source" : { + "my_field" : "quick brown fox jump lazy dog" + } + } + ] + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"took" : 44/"took" : $body.took/] +// TESTRESPONSE[s/"max_score" : 0.8630463/"max_score" : $body.hits.max_score/] +// TESTRESPONSE[s/"_score" : 0.8630463/"_score" : $body.hits.hits.0._score/] + +To search for documents that strictly match the query terms in order, or to +search using other properties of phrase queries, use a +<> on the root +field. A <> can also be used +if the last term should be matched exactly, and not as a prefix. Using phrase +queries may be less efficient than using the `match_bool_prefix` query. + +[source,js] +-------------------------------------------------- +GET my_index/_search +{ + "query": { + "match_phrase_prefix": { + "my_field": "brown f" + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +[[specific-params]] +==== Parameters specific to the `search_as_you_type` field + +The following parameters are accepted in a mapping for the `search_as_you_type` +field and are specific to this field type + +[horizontal] + +`max_shingle_size`:: + + The largest shingle size to index the input with and create subfields for, + creating one subfield for each shingle size between 2 and + `max_shingle_size`. Accepts integer values between 2 and 4 inclusive. This + option defaults to 3. + + +[[general-params]] +==== Parameters of the field type as a text field + +The following parameters are accepted in a mapping for the `search_as_you_type` +field due to its nature as a text-like field, and behave similarly to their +behavior when configuring a field of the <> datatype. Unless +otherwise noted, these options configure the root fields subfields in +the same way. + +<>:: + + The <> which should be used for + <> string fields, both at index-time and at + search-time (unless overridden by the + <>). Defaults to the default index + analyzer, or the <>. + +<>:: + + Should the field be searchable? Accepts `true` (default) or `false`. + +<>:: + + What information should be stored in the index, for search and highlighting + purposes. Defaults to `positions`. + +<>:: + + Whether field-length should be taken into account when scoring queries. + Accepts `true` or `false`. This option configures the root field + and shingle subfields, where its default is `true`. It does not configure + the prefix subfield, where it it `false`. + +<>:: + + Whether the field value should be stored and retrievable separately from + the <> field. Accepts `true` or `false` + (default). This option only configures the root field, and does not + configure any subfields. + +<>:: + + The <> that should be used at search time on + <> fields. Defaults to the `analyzer` setting. + +<>:: + + The <> that should be used at search time when a + phrase is encountered. Defaults to the `search_analyzer` setting. + +<>:: + + Which scoring algorithm or _similarity_ should be used. Defaults + to `BM25`. + +<>:: + + Whether term vectors should be stored for an <> + field. Defaults to `no`. This option configures the root field and shingle + subfields, but not the prefix subfield. + + +[[prefix-queries]] +==== Optimization of prefix queries + +When making a <> query to the root field or +any of its subfields, the query will be rewritten to a +<> query on the `._index_prefix` subfield. This +matches more efficiently than is typical of `prefix` queries on text fields, +as prefixes up to a certain length of each shingle are indexed directly as +terms in the `._index_prefix` subfield. + +The analyzer of the `._index_prefix` subfield slightly modifies the +shingle-building behavior to also index prefixes of the terms at the end of the +field's value that normally would not be produced as shingles. For example, if +the value `quick brown fox` is indexed into a `search_as_you_type` field with +`max_shingle_size` of 3, prefixes for `brown fox` and `fox` are also indexed +into the `._index_prefix` subfield even though they do not appear as terms in +the `._3gram` subfield. This allows for completion of all the terms in the +field's input. diff --git a/docs/reference/query-dsl/full-text-queries.asciidoc b/docs/reference/query-dsl/full-text-queries.asciidoc index 5fb5447dbb7..0af99b61f19 100644 --- a/docs/reference/query-dsl/full-text-queries.asciidoc +++ b/docs/reference/query-dsl/full-text-queries.asciidoc @@ -18,7 +18,12 @@ The queries in this group are: <>:: - The poor man's _search-as-you-type_. Like the `match_phrase` query, but does a wildcard search on the final word. + Like the `match_phrase` query, but does a wildcard search on the final word. + +<>:: + + Creates a `bool` query that matches each term as a `term` query, except for + the last term, which is matched as a `prefix` query <>:: @@ -50,6 +55,8 @@ include::match-phrase-query.asciidoc[] include::match-phrase-prefix-query.asciidoc[] +include::match-bool-prefix-query.asciidoc[] + include::multi-match-query.asciidoc[] include::common-terms-query.asciidoc[] diff --git a/docs/reference/query-dsl/match-bool-prefix-query.asciidoc b/docs/reference/query-dsl/match-bool-prefix-query.asciidoc new file mode 100644 index 00000000000..623f2423d80 --- /dev/null +++ b/docs/reference/query-dsl/match-bool-prefix-query.asciidoc @@ -0,0 +1,85 @@ +[[query-dsl-match-bool-prefix-query]] +=== Match Bool Prefix Query + +A `match_bool_prefix` query analyzes its input and constructs a +<> from the terms. Each term except the last +is used in a `term` query. The last term is used in a `prefix` query. A +`match_bool_prefix` query such as + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "match_bool_prefix" : { + "message" : "quick brown f" + } + } +} +-------------------------------------------------- +// CONSOLE + +where analysis produces the terms `quick`, `brown`, and `f` is similar to the +following `bool` query + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "bool" : { + "should": [ + { "term": { "message": "quick" }}, + { "term": { "message": "brown" }}, + { "prefix": { "message": "f"}} + ] + } + } +} +-------------------------------------------------- +// CONSOLE + +An important difference between the `match_bool_prefix` query and +<> is that the +`match_phrase_prefix` query matches its terms as a phrase, but the +`match_bool_prefix` query can match its terms in any position. The example +`match_bool_prefix` query above could match a field containing containing +`quick brown fox`, but it could also match `brown fox quick`. It could also +match a field containing the term `quick`, the term `brown` and a term +starting with `f`, appearing in any position. + +==== Parameters + +By default, `match_bool_prefix` queries' input text will be analyzed using the +analyzer from the queried field's mapping. A different search analyzer can be +configured with the `analyzer` parameter + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "match_bool_prefix" : { + "message": { + "query": "quick brown f", + "analyzer": "keyword" + } + } + } +} +-------------------------------------------------- +// CONSOLE + +`match_bool_prefix` queries support the +<> and `operator` +parameters as described for the +<>, applying the setting to the +constructed `bool` query. The number of clauses in the constructed `bool` +query will in most cases be the number of terms produced by analysis of the +query text. + +The <>, `prefix_length`, +`max_expansions`, `fuzzy_transpositions`, and `fuzzy_rewrite` parameters can +be applied to the `term` subqueries constructed for all terms but the final +term. They do not have any effect on the prefix query constructed for the +final term. diff --git a/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc b/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc index 73f1be9143c..304eaf9a5b4 100644 --- a/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc +++ b/docs/reference/query-dsl/match-phrase-prefix-query.asciidoc @@ -59,6 +59,6 @@ for appears. For better solutions for _search-as-you-type_ see the <> and -{defguide}/_index_time_search_as_you_type.html[Index-Time Search-as-You-Type]. +the <>. =================================================== diff --git a/docs/reference/query-dsl/match-query.asciidoc b/docs/reference/query-dsl/match-query.asciidoc index 5c397d603be..b4e90e77650 100644 --- a/docs/reference/query-dsl/match-query.asciidoc +++ b/docs/reference/query-dsl/match-query.asciidoc @@ -186,7 +186,6 @@ process. It does not support field name prefixes, wildcard characters, or other "advanced" features. For this reason, chances of it failing are very small / non existent, and it provides an excellent behavior when it comes to just analyze and run that text as a query behavior (which is -usually what a text search box does). Also, the <> -type can provide a great "as you type" behavior to automatically load search results. +usually what a text search box does). ************************************************** diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index 512eee4900b..b8fbb61a950 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -91,6 +91,10 @@ parameter, which can be set to: `phrase_prefix`:: Runs a `match_phrase_prefix` query on each field and combines the `_score` from each field. See <>. +`bool_prefix`:: Creates a `match_bool_prefix` query on each field and + combines the `_score` from each field. See + <>. + [[type-best-fields]] ==== `best_fields` @@ -516,3 +520,36 @@ per-term `blended` queries. It accepts: =================================================== The `fuzziness` parameter cannot be used with the `cross_fields` type. =================================================== + +[[type-bool-prefix]] +==== `bool_prefix` + +The `bool_prefix` type's scoring behaves like <>, but using a +<> instead of a +`match` query. + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "multi_match" : { + "query": "quick brown f", + "type": "bool_prefix", + "fields": [ "subject", "message" ] + } + } +} +-------------------------------------------------- +// CONSOLE + +The `analyzer`, `boost`, `operator`, `minimum_should_match`, `lenient`, +`zero_terms_query`, and `auto_generate_synonyms_phrase_query` parameters as +explained in <> are supported. The +`fuzziness`, `prefix_length`, `max_expansions`, `rewrite`, and +`fuzzy_transpositions` parameters are supported for the terms that are used to +construct term queries, but do not have an effect on the prefix query +constructed from the final term. + +The `slop` and `cutoff_frequency` parameters are not supported by this query +type. diff --git a/docs/reference/search/suggesters/completion-suggest.asciidoc b/docs/reference/search/suggesters/completion-suggest.asciidoc index b27e6f0ef0b..c89dce3d241 100644 --- a/docs/reference/search/suggesters/completion-suggest.asciidoc +++ b/docs/reference/search/suggesters/completion-suggest.asciidoc @@ -2,7 +2,9 @@ === Completion Suggester NOTE: In order to understand the format of suggestions, please -read the <> page first. +read the <> page first. For more flexible +search-as-you-type searches that do not use suggesters, see the +<>. The `completion` suggester provides auto-complete/search-as-you-type functionality. This is a navigational feature to guide users to diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java index cbafd0fd1ef..45a067d7994 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java @@ -41,6 +41,7 @@ public class MapperExtrasPlugin extends Plugin implements MapperPlugin, SearchPl mappers.put(RankFeaturesFieldMapper.CONTENT_TYPE, new RankFeaturesFieldMapper.TypeParser()); mappers.put(DenseVectorFieldMapper.CONTENT_TYPE, new DenseVectorFieldMapper.TypeParser()); mappers.put(SparseVectorFieldMapper.CONTENT_TYPE, new SparseVectorFieldMapper.TypeParser()); + mappers.put(SearchAsYouTypeFieldMapper.CONTENT_TYPE, new SearchAsYouTypeFieldMapper.TypeParser()); return Collections.unmodifiableMap(mappers); } diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java new file mode 100644 index 00000000000..69948bf98a6 --- /dev/null +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapper.java @@ -0,0 +1,826 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +import org.apache.lucene.analysis.shingle.FixedShingleFilter; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.AutomatonQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.NormsFieldExistsQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.analysis.AnalyzerScope; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.query.QueryShardContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeIntegerValue; +import static org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType.hasGaps; +import static org.elasticsearch.index.mapper.TypeParsers.parseTextField; + +/** + * Mapper for a text field that optimizes itself for as-you-type completion by indexing its content into subfields. Each subfield + * modifies the analysis chain of the root field to index terms the user would create as they type out the value in the root field + * + * The structure of these fields is + * + *
+ *     [ SearchAsYouTypeFieldMapper, SearchAsYouTypeFieldType, unmodified analysis ]
+ *     ├── [ ShingleFieldMapper, ShingleFieldType, analysis wrapped with 2-shingles ]
+ *     ├── ...
+ *     ├── [ ShingleFieldMapper, ShingleFieldType, analysis wrapped with max_shingle_size-shingles ]
+ *     └── [ PrefixFieldMapper, PrefixFieldType, analysis wrapped with max_shingle_size-shingles and edge-ngrams ]
+ * 
+ */ +public class SearchAsYouTypeFieldMapper extends FieldMapper { + + public static final String CONTENT_TYPE = "search_as_you_type"; + private static final int MAX_SHINGLE_SIZE_LOWER_BOUND = 2; + private static final int MAX_SHINGLE_SIZE_UPPER_BOUND = 4; + private static final String PREFIX_FIELD_SUFFIX = "._index_prefix"; + + public static class Defaults { + + public static final int MIN_GRAM = 1; + public static final int MAX_GRAM = 20; + public static final int MAX_SHINGLE_SIZE = 3; + + public static final MappedFieldType FIELD_TYPE = new SearchAsYouTypeFieldType(); + + static { + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + FIELD_TYPE.freeze(); + } + } + + public static class TypeParser implements Mapper.TypeParser { + + @Override + public Mapper.Builder parse(String name, + Map node, + ParserContext parserContext) throws MapperParsingException { + + final Builder builder = new Builder(name); + + builder.fieldType().setIndexAnalyzer(parserContext.getIndexAnalyzers().getDefaultIndexAnalyzer()); + builder.fieldType().setSearchAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchAnalyzer()); + builder.fieldType().setSearchQuoteAnalyzer(parserContext.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer()); + parseTextField(builder, name, node, parserContext); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + final Map.Entry entry = iterator.next(); + final String fieldName = entry.getKey(); + final Object fieldNode = entry.getValue(); + + if (fieldName.equals("max_shingle_size")) { + builder.maxShingleSize(nodeIntegerValue(fieldNode)); + iterator.remove(); + } + // TODO should we allow to configure the prefix field + } + return builder; + } + } + + public static class Builder extends FieldMapper.Builder { + private int maxShingleSize = Defaults.MAX_SHINGLE_SIZE; + + public Builder(String name) { + super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); + this.builder = this; + } + + public Builder maxShingleSize(int maxShingleSize) { + if (maxShingleSize < MAX_SHINGLE_SIZE_LOWER_BOUND || maxShingleSize > MAX_SHINGLE_SIZE_UPPER_BOUND) { + throw new MapperParsingException("[max_shingle_size] must be at least [" + MAX_SHINGLE_SIZE_LOWER_BOUND + "] and at most " + + "[" + MAX_SHINGLE_SIZE_UPPER_BOUND + "], got [" + maxShingleSize + "]"); + } + this.maxShingleSize = maxShingleSize; + return builder; + } + + @Override + public SearchAsYouTypeFieldType fieldType() { + return (SearchAsYouTypeFieldType) this.fieldType; + } + + @Override + public SearchAsYouTypeFieldMapper build(Mapper.BuilderContext context) { + setupFieldType(context); + + final NamedAnalyzer indexAnalyzer = fieldType().indexAnalyzer(); + final NamedAnalyzer searchAnalyzer = fieldType().searchAnalyzer(); + final NamedAnalyzer searchQuoteAnalyzer = fieldType().searchQuoteAnalyzer(); + + // set up the prefix field + final String prefixFieldName = name() + PREFIX_FIELD_SUFFIX; + final PrefixFieldType prefixFieldType = new PrefixFieldType(name(), prefixFieldName, Defaults.MIN_GRAM, Defaults.MAX_GRAM); + prefixFieldType.setIndexOptions(fieldType().indexOptions()); + // wrap the root field's index analyzer with shingles and edge ngrams + final SearchAsYouTypeAnalyzer prefixIndexWrapper = + SearchAsYouTypeAnalyzer.withShingleAndPrefix(indexAnalyzer.analyzer(), maxShingleSize); + // wrap the root field's search analyzer with only shingles + final SearchAsYouTypeAnalyzer prefixSearchWrapper = + SearchAsYouTypeAnalyzer.withShingle(searchAnalyzer.analyzer(), maxShingleSize); + // don't wrap the root field's search quote analyzer as prefix field doesn't support phrase queries + prefixFieldType.setIndexAnalyzer(new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, prefixIndexWrapper)); + prefixFieldType.setSearchAnalyzer(new NamedAnalyzer(searchAnalyzer.name(), AnalyzerScope.INDEX, prefixSearchWrapper)); + final PrefixFieldMapper prefixFieldMapper = new PrefixFieldMapper(prefixFieldType, context.indexSettings()); + + // set up the shingle fields + final ShingleFieldMapper[] shingleFieldMappers = new ShingleFieldMapper[maxShingleSize - 1]; + final ShingleFieldType[] shingleFieldTypes = new ShingleFieldType[maxShingleSize - 1]; + for (int i = 0; i < shingleFieldMappers.length; i++) { + final int shingleSize = i + 2; + final ShingleFieldType shingleFieldType = new ShingleFieldType(fieldType(), shingleSize); + shingleFieldType.setName(getShingleFieldName(name(), shingleSize)); + // wrap the root field's index, search, and search quote analyzers with shingles + final SearchAsYouTypeAnalyzer shingleIndexWrapper = + SearchAsYouTypeAnalyzer.withShingle(indexAnalyzer.analyzer(), shingleSize); + final SearchAsYouTypeAnalyzer shingleSearchWrapper = + SearchAsYouTypeAnalyzer.withShingle(searchAnalyzer.analyzer(), shingleSize); + final SearchAsYouTypeAnalyzer shingleSearchQuoteWrapper = + SearchAsYouTypeAnalyzer.withShingle(searchQuoteAnalyzer.analyzer(), shingleSize); + shingleFieldType.setIndexAnalyzer(new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, shingleIndexWrapper)); + shingleFieldType.setSearchAnalyzer(new NamedAnalyzer(searchAnalyzer.name(), AnalyzerScope.INDEX, shingleSearchWrapper)); + shingleFieldType.setSearchQuoteAnalyzer( + new NamedAnalyzer(searchQuoteAnalyzer.name(), AnalyzerScope.INDEX, shingleSearchQuoteWrapper)); + shingleFieldType.setPrefixFieldType(prefixFieldType); + shingleFieldTypes[i] = shingleFieldType; + shingleFieldMappers[i] = new ShingleFieldMapper(shingleFieldType, context.indexSettings()); + } + fieldType().setPrefixField(prefixFieldType); + fieldType().setShingleFields(shingleFieldTypes); + return new SearchAsYouTypeFieldMapper(name, fieldType(), context.indexSettings(), copyTo, + maxShingleSize, prefixFieldMapper, shingleFieldMappers); + } + } + + private static int countPosition(TokenStream stream) throws IOException { + assert stream instanceof CachingTokenFilter; + PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); + stream.reset(); + int positionCount = 0; + while (stream.incrementToken()) { + if (posIncAtt.getPositionIncrement() != 0) { + positionCount += posIncAtt.getPositionIncrement(); + } + } + return positionCount; + } + + /** + * The root field type, which most queries should target as it will delegate queries to subfields better optimized for the query. When + * handling phrase queries, it analyzes the query text to find the appropriate sized shingle subfield to delegate to. When handling + * prefix or phrase prefix queries, it delegates to the prefix subfield + */ + static class SearchAsYouTypeFieldType extends StringFieldType { + + PrefixFieldType prefixField; + ShingleFieldType[] shingleFields = new ShingleFieldType[0]; + + SearchAsYouTypeFieldType() { + setTokenized(true); + } + + SearchAsYouTypeFieldType(SearchAsYouTypeFieldType other) { + super(other); + + if (other.prefixField != null) { + this.prefixField = other.prefixField.clone(); + } + if (other.shingleFields != null) { + this.shingleFields = new ShingleFieldType[other.shingleFields.length]; + for (int i = 0; i < this.shingleFields.length; i++) { + if (other.shingleFields[i] != null) { + this.shingleFields[i] = other.shingleFields[i].clone(); + } + } + } + } + + public void setPrefixField(PrefixFieldType prefixField) { + checkIfFrozen(); + this.prefixField = prefixField; + } + + public void setShingleFields(ShingleFieldType[] shingleFields) { + checkIfFrozen(); + this.shingleFields = shingleFields; + } + + @Override + public MappedFieldType clone() { + return new SearchAsYouTypeFieldType(this); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + private ShingleFieldType shingleFieldForPositions(int positions) { + final int indexFromShingleSize = Math.max(positions - 2, 0); + return shingleFields[Math.min(indexFromShingleSize, shingleFields.length - 1)]; + } + + @Override + public Query existsQuery(QueryShardContext context) { + if (omitNorms()) { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } else { + return new NormsFieldExistsQuery(name()); + } + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + if (prefixField == null || prefixField.termLengthWithinBounds(value.length()) == false) { + return super.prefixQuery(value, method, context); + } else { + final Query query = prefixField.prefixQuery(value, method, context); + if (method == null + || method == MultiTermQuery.CONSTANT_SCORE_REWRITE + || method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) { + return new ConstantScoreQuery(query); + } else { + return query; + } + } + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + int numPos = countPosition(stream); + if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + final ShingleFieldType shingleField = shingleFieldForPositions(numPos); + stream = new FixedShingleFilter(stream, shingleField.shingleSize); + return shingleField.phraseQuery(stream, 0, true); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + int numPos = countPosition(stream); + if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + final ShingleFieldType shingleField = shingleFieldForPositions(numPos); + stream = new FixedShingleFilter(stream, shingleField.shingleSize); + return shingleField.multiPhraseQuery(stream, 0, true); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + int numPos = countPosition(stream); + if (shingleFields.length == 0 || slop > 0 || hasGaps(stream) || numPos <= 1) { + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, + null, null); + } + final ShingleFieldType shingleField = shingleFieldForPositions(numPos); + stream = new FixedShingleFilter(stream, shingleField.shingleSize); + return shingleField.phrasePrefixQuery(stream, 0, maxExpansions); + } + + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + if (prefixField != null && prefixField.termLengthWithinBounds(value.length())) { + return new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField.name(), indexedValueForSearch(value))), name()); + } else { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; + } + } + + @Override + public void checkCompatibility(MappedFieldType other, List conflicts) { + super.checkCompatibility(other, conflicts); + final SearchAsYouTypeFieldType otherFieldType = (SearchAsYouTypeFieldType) other; + if (this.shingleFields.length != otherFieldType.shingleFields.length) { + conflicts.add("mapper [" + name() + "] has a different [max_shingle_size]"); + } else if (Arrays.equals(this.shingleFields, otherFieldType.shingleFields) == false) { + conflicts.add("mapper [" + name() + "] has shingle subfields that are configured differently"); + } + + if (Objects.equals(this.prefixField, otherFieldType.prefixField) == false) { + conflicts.add("mapper [" + name() + "] has different [index_prefixes] settings"); + } + } + + @Override + public boolean equals(Object otherObject) { + if (this == otherObject) { + return true; + } + if (otherObject == null || getClass() != otherObject.getClass()) { + return false; + } + if (!super.equals(otherObject)) { + return false; + } + final SearchAsYouTypeFieldType other = (SearchAsYouTypeFieldType) otherObject; + return Objects.equals(prefixField, other.prefixField) && + Arrays.equals(shingleFields, other.shingleFields); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), prefixField, Arrays.hashCode(shingleFields)); + } + } + + /** + * The prefix field type handles prefix and phrase prefix queries that are delegated to it by the other field types in a + * search_as_you_type structure + */ + static final class PrefixFieldType extends StringFieldType { + + final int minChars; + final int maxChars; + final String parentField; + + PrefixFieldType(String parentField, String name, int minChars, int maxChars) { + setTokenized(true); + setOmitNorms(true); + setStored(false); + setName(name); + this.minChars = minChars; + this.maxChars = maxChars; + this.parentField = parentField; + } + + PrefixFieldType(PrefixFieldType other) { + super(other); + this.minChars = other.minChars; + this.maxChars = other.maxChars; + this.parentField = other.parentField; + } + + boolean termLengthWithinBounds(int length) { + return length >= minChars - 1 && length <= maxChars; + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + if (value.length() >= minChars) { + return super.termQuery(value, context); + } + List automata = new ArrayList<>(); + automata.add(Automata.makeString(value)); + for (int i = value.length(); i < minChars; i++) { + automata.add(Automata.makeAnyChar()); + } + Automaton automaton = Operations.concatenate(automata); + AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton); + query.setRewriteMethod(method); + return new BooleanQuery.Builder() + .add(query, BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term(parentField, value)), BooleanClause.Occur.SHOULD) + .build(); + } + + @Override + public PrefixFieldType clone() { + return new PrefixFieldType(this); + } + + @Override + public String typeName() { + return "prefix"; + } + + @Override + public String toString() { + return super.toString() + ",prefixChars=" + minChars + ":" + maxChars; + } + + @Override + public Query existsQuery(QueryShardContext context) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + PrefixFieldType that = (PrefixFieldType) o; + return minChars == that.minChars && + maxChars == that.maxChars; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), minChars, maxChars); + } + } + + static final class PrefixFieldMapper extends FieldMapper { + + PrefixFieldMapper(PrefixFieldType fieldType, Settings indexSettings) { + super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); + } + + @Override + public PrefixFieldType fieldType() { + return (PrefixFieldType) super.fieldType(); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) { + throw new UnsupportedOperationException(); + } + + @Override + protected String contentType() { + return "prefix"; + } + + @Override + public String toString() { + return fieldType().toString(); + } + } + + static final class ShingleFieldMapper extends FieldMapper { + + ShingleFieldMapper(ShingleFieldType fieldType, Settings indexSettings) { + super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty()); + } + + @Override + public ShingleFieldType fieldType() { + return (ShingleFieldType) super.fieldType(); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) { + throw new UnsupportedOperationException(); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + } + + /** + * The shingle field type handles phrase queries and delegates prefix and phrase prefix queries to the prefix field + */ + static class ShingleFieldType extends StringFieldType { + final int shingleSize; + PrefixFieldType prefixFieldType; + + ShingleFieldType(MappedFieldType other, int shingleSize) { + super(other); + this.shingleSize = shingleSize; + this.setStored(false); + } + + ShingleFieldType(ShingleFieldType other) { + super(other); + this.shingleSize = other.shingleSize; + if (other.prefixFieldType != null) { + this.prefixFieldType = other.prefixFieldType.clone(); + } + } + + void setPrefixFieldType(PrefixFieldType prefixFieldType) { + checkIfFrozen(); + this.prefixFieldType = prefixFieldType; + } + + @Override + public ShingleFieldType clone() { + return new ShingleFieldType(this); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public Query existsQuery(QueryShardContext context) { + if (omitNorms()) { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } else { + return new NormsFieldExistsQuery(name()); + } + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + if (prefixFieldType == null || prefixFieldType.termLengthWithinBounds(value.length()) == false) { + return super.prefixQuery(value, method, context); + } else { + final Query query = prefixFieldType.prefixQuery(value, method, context); + if (method == null + || method == MultiTermQuery.CONSTANT_SCORE_REWRITE + || method == MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE) { + return new ConstantScoreQuery(query); + } else { + return query; + } + } + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + final String prefixFieldName = slop > 0 + ? null + : prefixFieldType.name(); + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, + prefixFieldName, prefixFieldType::termLengthWithinBounds); + } + + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + if (prefixFieldType != null && prefixFieldType.termLengthWithinBounds(value.length())) { + return new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), indexedValueForSearch(value))), name()); + } else { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; + } + } + + @Override + public void checkCompatibility(MappedFieldType other, List conflicts) { + super.checkCompatibility(other, conflicts); + ShingleFieldType ft = (ShingleFieldType) other; + if (ft.shingleSize != this.shingleSize) { + conflicts.add("mapper [" + name() + "] has different [shingle_size] values"); + } + if (Objects.equals(this.prefixFieldType, ft.prefixFieldType) == false) { + conflicts.add("mapper [" + name() + "] has different [index_prefixes] settings"); + } + } + + @Override + public boolean equals(Object otherObject) { + if (this == otherObject) { + return true; + } + if (otherObject == null || getClass() != otherObject.getClass()) { + return false; + } + if (!super.equals(otherObject)) { + return false; + } + final ShingleFieldType other = (ShingleFieldType) otherObject; + return shingleSize == other.shingleSize + && Objects.equals(prefixFieldType, other.prefixFieldType); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), shingleSize, prefixFieldType); + } + } + + private final int maxShingleSize; + private PrefixFieldMapper prefixField; + private final ShingleFieldMapper[] shingleFields; + + public SearchAsYouTypeFieldMapper(String simpleName, + SearchAsYouTypeFieldType fieldType, + Settings indexSettings, + CopyTo copyTo, + int maxShingleSize, + PrefixFieldMapper prefixField, + ShingleFieldMapper[] shingleFields) { + super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, MultiFields.empty(), copyTo); + this.prefixField = prefixField; + this.shingleFields = shingleFields; + this.maxShingleSize = maxShingleSize; + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + final String value = context.externalValueSet() ? context.externalValue().toString() : context.parser().textOrNull(); + if (value == null) { + return; + } + + List newFields = new ArrayList<>(); + newFields.add(new Field(fieldType().name(), value, fieldType())); + for (ShingleFieldMapper subFieldMapper : shingleFields) { + fields.add(new Field(subFieldMapper.fieldType().name(), value, subFieldMapper.fieldType())); + } + newFields.add(new Field(prefixField.fieldType().name(), value, prefixField.fieldType())); + if (fieldType().omitNorms()) { + createFieldNamesField(context, newFields); + } + fields.addAll(newFields); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + protected void doMerge(Mapper mergeWith) { + super.doMerge(mergeWith); + SearchAsYouTypeFieldMapper mw = (SearchAsYouTypeFieldMapper) mergeWith; + if (mw.maxShingleSize != maxShingleSize) { + throw new IllegalArgumentException("mapper [" + name() + "] has different maxShingleSize setting, current [" + + this.maxShingleSize + "], merged [" + mw.maxShingleSize + "]"); + } + this.prefixField = (PrefixFieldMapper) this.prefixField.merge(mw); + + ShingleFieldMapper[] shingleFieldMappers = new ShingleFieldMapper[mw.shingleFields.length]; + for (int i = 0; i < shingleFieldMappers.length; i++) { + this.shingleFields[i] = (ShingleFieldMapper) this.shingleFields[i].merge(mw.shingleFields[i]); + } + } + + public static String getShingleFieldName(String parentField, int shingleSize) { + return parentField + "._" + shingleSize + "gram"; + } + + @Override + public SearchAsYouTypeFieldType fieldType() { + return (SearchAsYouTypeFieldType) super.fieldType(); + } + + public int maxShingleSize() { + return maxShingleSize; + } + + public PrefixFieldMapper prefixField() { + return prefixField; + } + + public ShingleFieldMapper[] shingleFields() { + return shingleFields; + } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + doXContentAnalyzers(builder, includeDefaults); + builder.field("max_shingle_size", maxShingleSize); + } + + @Override + public Iterator iterator() { + List subIterators = new ArrayList<>(); + subIterators.add(prefixField); + subIterators.addAll(Arrays.asList(shingleFields)); + @SuppressWarnings("unchecked") Iterator concat = Iterators.concat(super.iterator(), subIterators.iterator()); + return concat; + } + + /** + * An analyzer wrapper to add a shingle token filter, an edge ngram token filter or both to its wrapped analyzer. When adding an edge + * ngrams token filter, it also adds a {@link TrailingShingleTokenFilter} to add extra position increments at the end of the stream + * to induce the shingle token filter to create tokens at the end of the stream smaller than the shingle size + */ + static class SearchAsYouTypeAnalyzer extends AnalyzerWrapper { + + private final Analyzer delegate; + private final int shingleSize; + private final boolean indexPrefixes; + + private SearchAsYouTypeAnalyzer(Analyzer delegate, + int shingleSize, + boolean indexPrefixes) { + + super(delegate.getReuseStrategy()); + this.delegate = Objects.requireNonNull(delegate); + this.shingleSize = shingleSize; + this.indexPrefixes = indexPrefixes; + } + + static SearchAsYouTypeAnalyzer withShingle(Analyzer delegate, int shingleSize) { + return new SearchAsYouTypeAnalyzer(delegate, shingleSize, false); + } + + static SearchAsYouTypeAnalyzer withShingleAndPrefix(Analyzer delegate, int shingleSize) { + return new SearchAsYouTypeAnalyzer(delegate, shingleSize, true); + } + + @Override + protected Analyzer getWrappedAnalyzer(String fieldName) { + return delegate; + } + + @Override + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + TokenStream tokenStream = components.getTokenStream(); + if (indexPrefixes) { + tokenStream = new TrailingShingleTokenFilter(tokenStream, shingleSize - 1); + } + tokenStream = new FixedShingleFilter(tokenStream, shingleSize, " ", ""); + if (indexPrefixes) { + tokenStream = new EdgeNGramTokenFilter(tokenStream, Defaults.MIN_GRAM, Defaults.MAX_GRAM, true); + } + return new TokenStreamComponents(components.getSource(), tokenStream); + } + + public int shingleSize() { + return shingleSize; + } + + public boolean indexPrefixes() { + return indexPrefixes; + } + + @Override + public String toString() { + return "<" + getClass().getCanonicalName() + " shingleSize=[" + shingleSize + "] indexPrefixes=[" + indexPrefixes + "]>"; + } + + private static class TrailingShingleTokenFilter extends TokenFilter { + + private final int extraPositionIncrements; + private final PositionIncrementAttribute positionIncrementAttribute; + + TrailingShingleTokenFilter(TokenStream input, int extraPositionIncrements) { + super(input); + this.extraPositionIncrements = extraPositionIncrements; + this.positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); + } + + @Override + public boolean incrementToken() throws IOException { + return input.incrementToken(); + } + + @Override + public void end() throws IOException { + super.end(); + positionIncrementAttribute.setPositionIncrement(extraPositionIncrements); + } + } + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeAnalyzerTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeAnalyzerTests.java new file mode 100644 index 00000000000..6cf0dc83d90 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeAnalyzerTests.java @@ -0,0 +1,197 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.SimpleAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeAnalyzer; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; +import java.util.stream.IntStream; + +import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; +import static java.util.stream.Collectors.toList; +import static org.hamcrest.Matchers.equalTo; + +public class SearchAsYouTypeAnalyzerTests extends ESTestCase { + + private static final Analyzer SIMPLE = new SimpleAnalyzer(); + + public static List analyze(SearchAsYouTypeAnalyzer analyzer, String text) throws IOException { + final List tokens = new ArrayList<>(); + try (TokenStream tokenStream = analyzer.tokenStream("field", text)) { + final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); + tokenStream.reset(); + while (tokenStream.incrementToken()) { + tokens.add(charTermAttribute.toString()); + } + } + return tokens; + } + + private void testCase(String text, + Function analyzerFunction, + Function> expectedTokensFunction) throws IOException { + + for (int shingleSize = 2; shingleSize <= 4; shingleSize++) { + final SearchAsYouTypeAnalyzer analyzer = analyzerFunction.apply(shingleSize); + final List expectedTokens = expectedTokensFunction.apply(shingleSize); + final List actualTokens = analyze(analyzer, text); + assertThat("analyzed correctly with " + analyzer, actualTokens, equalTo(expectedTokens)); + } + } + + public void testSingleTermShingles() throws IOException { + testCase( + "quick", + shingleSize -> SearchAsYouTypeAnalyzer.withShingle(SIMPLE, shingleSize), + shingleSize -> emptyList() + ); + } + + public void testMultiTermShingles() throws IOException { + testCase( + "quick brown fox jump lazy", + shingleSize -> SearchAsYouTypeAnalyzer.withShingle(SIMPLE, shingleSize), + shingleSize -> { + if (shingleSize == 2) { + return asList("quick brown", "brown fox", "fox jump", "jump lazy"); + } else if (shingleSize == 3) { + return asList("quick brown fox", "brown fox jump", "fox jump lazy"); + } else if (shingleSize == 4) { + return asList("quick brown fox jump", "brown fox jump lazy"); + } + throw new IllegalArgumentException(); + } + ); + } + + public void testSingleTermPrefix() throws IOException { + testCase( + "quick", + shingleSize -> SearchAsYouTypeAnalyzer.withShingleAndPrefix(SIMPLE, shingleSize), + shingleSize -> { + final List tokens = new ArrayList<>(asList("q", "qu", "qui", "quic", "quick")); + tokens.addAll(tokenWithSpaces("quick", shingleSize)); + return tokens; + } + ); + } + + public void testMultiTermPrefix() throws IOException { + testCase( + //"quick red fox lazy brown", + "quick brown fox jump lazy", + shingleSize -> SearchAsYouTypeAnalyzer.withShingleAndPrefix(SIMPLE, shingleSize), + shingleSize -> { + if (shingleSize == 2) { + final List tokens = new ArrayList<>(); + tokens.addAll(asList( + "q", "qu", "qui", "quic", "quick", "quick ", "quick b", "quick br", "quick bro", "quick brow", "quick brown" + )); + tokens.addAll(asList( + "b", "br", "bro", "brow", "brown", "brown ", "brown f", "brown fo", "brown fox" + )); + tokens.addAll(asList( + "f", "fo", "fox", "fox ", "fox j", "fox ju", "fox jum", "fox jump" + )); + tokens.addAll(asList( + "j", "ju", "jum", "jump", "jump ", "jump l", "jump la", "jump laz", "jump lazy" + )); + tokens.addAll(asList( + "l", "la", "laz", "lazy" + )); + tokens.addAll(tokenWithSpaces("lazy", shingleSize)); + return tokens; + } else if (shingleSize == 3) { + final List tokens = new ArrayList<>(); + tokens.addAll(asList( + "q", "qu", "qui", "quic", "quick", "quick ", "quick b", "quick br", "quick bro", "quick brow", "quick brown", + "quick brown ", "quick brown f", "quick brown fo", "quick brown fox" + )); + tokens.addAll(asList( + "b", "br", "bro", "brow", "brown", "brown ", "brown f", "brown fo", "brown fox", "brown fox ", "brown fox j", + "brown fox ju", "brown fox jum", "brown fox jump" + )); + tokens.addAll(asList( + "f", "fo", "fox", "fox ", "fox j", "fox ju", "fox jum", "fox jump", "fox jump ", "fox jump l", "fox jump la", + "fox jump laz", "fox jump lazy" + )); + tokens.addAll(asList( + "j", "ju", "jum", "jump", "jump ", "jump l", "jump la", "jump laz", "jump lazy" + )); + tokens.addAll(tokenWithSpaces("jump lazy", shingleSize - 1)); + tokens.addAll(asList( + "l", "la", "laz", "lazy" + )); + tokens.addAll(tokenWithSpaces("lazy", shingleSize)); + return tokens; + } else if (shingleSize == 4) { + final List tokens = new ArrayList<>(); + tokens.addAll(asList( + "q", "qu", "qui", "quic", "quick", "quick ", "quick b", "quick br", "quick bro", "quick brow", "quick brown", + "quick brown ", "quick brown f", "quick brown fo", "quick brown fox", "quick brown fox ", "quick brown fox j", + "quick brown fox ju", "quick brown fox jum", "quick brown fox jump" + )); + tokens.addAll(asList( + "b", "br", "bro", "brow", "brown", "brown ", "brown f", "brown fo", "brown fox", "brown fox ", "brown fox j", + "brown fox ju", "brown fox jum", "brown fox jump", "brown fox jump ", "brown fox jump l", "brown fox jump la", + "brown fox jump laz", "brown fox jump lazy" + )); + tokens.addAll(asList( + "f", "fo", "fox", "fox ", "fox j", "fox ju", "fox jum", "fox jump", "fox jump ", "fox jump l", "fox jump la", + "fox jump laz", "fox jump lazy" + )); + tokens.addAll(tokenWithSpaces("fox jump lazy", shingleSize - 2)); + tokens.addAll(asList( + "j", "ju", "jum", "jump", "jump ", "jump l", "jump la", "jump laz", "jump lazy" + )); + tokens.addAll(tokenWithSpaces("jump lazy", shingleSize - 1)); + tokens.addAll(asList( + "l", "la", "laz", "lazy" + )); + tokens.addAll(tokenWithSpaces("lazy", shingleSize)); + return tokens; + } + + throw new IllegalArgumentException(); + } + ); + } + + private static List tokenWithSpaces(String text, int maxShingleSize) { + return IntStream.range(1, maxShingleSize).mapToObj(i -> text + spaces(i)).collect(toList()); + } + + private static String spaces(int count) { + final StringBuilder builder = new StringBuilder(); + for (int i = 0; i < count; i++) { + builder.append(" "); + } + return builder.toString(); + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapperTests.java new file mode 100644 index 00000000000..9ed43a95056 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldMapperTests.java @@ -0,0 +1,758 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.PrefixFieldMapper; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.PrefixFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeAnalyzer; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.ShingleFieldMapper; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.ShingleFieldType; +import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.Arrays.asList; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasProperty; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.collection.IsArrayContainingInAnyOrder.arrayContainingInAnyOrder; +import static org.hamcrest.core.IsInstanceOf.instanceOf; + +public class SearchAsYouTypeFieldMapperTests extends ESSingleNodeTestCase { + + @Override + protected Collection> getPlugins() { + return pluginList(MapperExtrasPlugin.class); + } + + public void testIndexing() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper mapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + ParsedDocument doc = mapper.parse(new SourceToParse("test", "_doc", "1", BytesReference + .bytes(XContentFactory.jsonBuilder() + .startObject() + .field("a_field", "new york city") + .endObject()), + XContentType.JSON)); + + for (String field : new String[] { "a_field", "a_field._index_prefix", "a_field._2gram", "a_field._3gram"}) { + IndexableField[] fields = doc.rootDoc().getFields(field); + assertEquals(1, fields.length); + assertEquals("new york city", fields[0].stringValue()); + } + } + + public void testDefaultConfiguration() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + final SearchAsYouTypeFieldMapper rootMapper = getRootFieldMapper(defaultMapper, "a_field"); + assertRootFieldMapper(rootMapper, 3, "default"); + + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertPrefixFieldType(prefixFieldMapper.fieldType(), 3, "default"); + + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._2gram").fieldType(), 2, "default", prefixFieldMapper.fieldType()); + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._3gram").fieldType(), 3, "default", prefixFieldMapper.fieldType()); + } + + public void testConfiguration() throws IOException { + final int maxShingleSize = 4; + final String analyzerName = "simple"; + + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("analyzer", analyzerName) + .field("max_shingle_size", maxShingleSize) + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + final SearchAsYouTypeFieldMapper rootMapper = getRootFieldMapper(defaultMapper, "a_field"); + assertRootFieldMapper(rootMapper, maxShingleSize, analyzerName); + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertPrefixFieldType(prefixFieldMapper.fieldType(), maxShingleSize, analyzerName); + + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._2gram").fieldType(), 2, analyzerName, prefixFieldMapper.fieldType()); + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._3gram").fieldType(), 3, analyzerName, prefixFieldMapper.fieldType()); + assertShingleFieldType( + getShingleFieldMapper(defaultMapper, "a_field._4gram").fieldType(), 4, analyzerName, prefixFieldMapper.fieldType()); + } + + public void testIndexOptions() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("index_options", "offsets") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertThat("for " + mapper.name(), + mapper.fieldType().indexOptions(), equalTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS))); + } + + public void testStore() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("store", "true") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + assertTrue(getRootFieldMapper(defaultMapper, "a_field").fieldType().stored()); + Stream.of( + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertFalse("for " + mapper.name(), mapper.fieldType().stored())); + } + + public void testIndex() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("index", "false") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertThat("for " + mapper.name(), mapper.fieldType().indexOptions(), equalTo(IndexOptions.NONE))); + } + + public void testTermVectors() throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("term_vector", "yes") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertTrue("for " + mapper.name(), mapper.fieldType().storeTermVectors())); + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertFalse(prefixFieldMapper.fieldType().storeTermVectors()); + } + + public void testNorms() throws IOException { + // default setting + { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test-1") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertFalse("for " + mapper.name(), mapper.fieldType().omitNorms())); + + final PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"); + assertTrue(prefixFieldMapper.fieldType().omitNorms()); + } + + // can disable them on shingle fields + { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("norms", "false") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test-2") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + Stream.of( + getRootFieldMapper(defaultMapper, "a_field"), + getPrefixFieldMapper(defaultMapper, "a_field._index_prefix"), + getShingleFieldMapper(defaultMapper, "a_field._2gram"), + getShingleFieldMapper(defaultMapper, "a_field._3gram") + ).forEach(mapper -> assertTrue("for " + mapper.name(), mapper.fieldType().omitNorms())); + } + } + + + public void testDocumentParsingSingleValue() throws IOException { + documentParsingTestCase(Collections.singleton(randomAlphaOfLengthBetween(5, 20))); + } + + public void testDocumentParsingMultipleValues() throws IOException { + documentParsingTestCase(randomUnique(() -> randomAlphaOfLengthBetween(3, 20), randomIntBetween(2, 10))); + } + + public void testMatchPhrasePrefix() throws IOException { + IndexService indexService = createIndex("test", Settings.EMPTY); + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { + throw new UnsupportedOperationException(); + }, null); + + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject().endObject()); + + queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expected = new SynonymQuery(new Term("field._index_prefix", "two words")); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + Query expected = new SynonymQuery(new Term("field._index_prefix", "three words here")); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); + mpq.setSlop(1); + mpq.add(new Term("field", "two")); + mpq.add(new Term("field", "words")); + assertThat(q, equalTo(mpq)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "more than three words").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field._3gram", true) + .addClause(new SpanTermQuery(new Term("field._3gram", "more than three"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "than three words")), "field._3gram") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field._3gram", "more than three words").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field._3gram", true) + .addClause(new SpanTermQuery(new Term("field._3gram", "more than three"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "than three words")), "field._3gram") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field._3gram", "two words").toQuery(queryShardContext); + Query expected = new MatchNoDocsQuery(); + assertThat(q, equalTo(expected)); + } + + { + Query actual = new MatchPhrasePrefixQueryBuilder("field._3gram", "one two three four") + .slop(1) + .toQuery(queryShardContext); + MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field._3gram"); + expected.setSlop(1); + expected.add(new Term("field._3gram", "one two three")); + expected.add(new Term("field._3gram", "two three four")); + assertThat(actual, equalTo(expected)); + } + + } + + public void testMatchPhrase() throws IOException { + final IndexService indexService = createIndex("test", Settings.EMPTY); + final QueryShardContext queryShardContext = indexService.newQueryShardContext(randomInt(20), null, + () -> { throw new UnsupportedOperationException(); }, null); + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + queryShardContext.getMapperService().merge("_doc", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field", "one")); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._2gram", "one two")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two three") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._3gram", "one two three")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two three four") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._3gram", "one two three")) + .add(new Term("a_field._3gram", "two three four")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field", "one two") + .slop(1) + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field", "one")) + .add(new Term("a_field", "two")) + .setSlop(1) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._2gram", "one two") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field._2gram", "one two")); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._2gram", "one two three") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._2gram", "one two")) + .add(new Term("a_field._2gram", "two three")) + .build(); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._3gram", "one two three") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field._3gram", "one two three")); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._3gram", "one two three four") + .toQuery(queryShardContext); + final Query expected = new MultiPhraseQuery.Builder() + .add(new Term("a_field._3gram", "one two three")) + .add(new Term("a_field._3gram", "two three four")) + .build(); + assertThat(actual, equalTo(expected)); + } + + // todo are these queries generated for the prefix field right? + { + final Query actual = new MatchPhraseQueryBuilder("a_field._index_prefix", "one two") + .toQuery(queryShardContext); + final Query expected = new MatchNoDocsQuery("Matching no documents because no terms present"); + assertThat(actual, equalTo(expected)); + } + + { + final Query actual = new MatchPhraseQueryBuilder("a_field._index_prefix", "one two three") + .toQuery(queryShardContext); + final Query expected = new TermQuery(new Term("a_field._index_prefix", "one two three")); + assertThat(actual, equalTo(expected)); + } + + { + expectThrows(IllegalArgumentException.class, + () -> new MatchPhraseQueryBuilder("a_field._index_prefix", "one two three four").toQuery(queryShardContext)); + } + } + + private static BooleanQuery buildBoolPrefixQuery(String shingleFieldName, String prefixFieldName, List terms) { + final BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (int i = 0; i < terms.size() - 1; i++) { + final String term = terms.get(i); + builder.add(new BooleanClause(new TermQuery(new Term(shingleFieldName, term)), BooleanClause.Occur.SHOULD)); + } + final String finalTerm = terms.get(terms.size() - 1); + builder.add(new BooleanClause( + new ConstantScoreQuery(new TermQuery(new Term(prefixFieldName, finalTerm))), BooleanClause.Occur.SHOULD)); + return builder.build(); + } + + public void testMultiMatchBoolPrefix() throws IOException { + final IndexService indexService = createIndex("test", Settings.EMPTY); + final QueryShardContext queryShardContext = indexService.newQueryShardContext(randomInt(20), null, + () -> { throw new UnsupportedOperationException(); }, null); + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .field("max_shingle_size", 4) + .endObject() + .endObject() + .endObject() + .endObject()); + + queryShardContext.getMapperService().merge("_doc", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + final MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder( + "quick brown fox jump lazy dog", + "a_field", + "a_field._2gram", + "a_field._3gram", + "a_field._4gram" + ); + builder.type(MultiMatchQueryBuilder.Type.BOOL_PREFIX); + + final Query actual = builder.toQuery(queryShardContext); + assertThat(actual, instanceOf(DisjunctionMaxQuery.class)); + final DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) actual; + assertThat(disMaxQuery.getDisjuncts(), hasSize(4)); + assertThat(disMaxQuery.getDisjuncts(), containsInAnyOrder( + buildBoolPrefixQuery( + "a_field", "a_field._index_prefix", asList("quick", "brown", "fox", "jump", "lazy", "dog")), + buildBoolPrefixQuery("a_field._2gram", "a_field._index_prefix", + asList("quick brown", "brown fox", "fox jump", "jump lazy", "lazy dog")), + buildBoolPrefixQuery("a_field._3gram", "a_field._index_prefix", + asList("quick brown fox", "brown fox jump", "fox jump lazy", "jump lazy dog")), + buildBoolPrefixQuery("a_field._4gram", "a_field._index_prefix", + asList("quick brown fox jump", "brown fox jump lazy", "fox jump lazy dog")))); + } + + private void documentParsingTestCase(Collection values) throws IOException { + final String mapping = Strings.toString(XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("a_field") + .field("type", "search_as_you_type") + .endObject() + .endObject() + .endObject() + .endObject()); + + final DocumentMapper defaultMapper = createIndex("test") + .mapperService() + .documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + final XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + if (values.size() > 1) { + builder.array("a_field", values.toArray(new String[0])); + } else { + builder.field("a_field", values.iterator().next()); + } + builder.endObject(); + final ParsedDocument parsedDocument = defaultMapper.parse( + new SourceToParse("test", "_doc", "1", BytesReference.bytes(builder), XContentType.JSON)); + + + final Set> rootFieldMatchers = values.stream() + .map(value -> indexableFieldMatcher(value, SearchAsYouTypeFieldType.class)) + .collect(Collectors.toSet()); + final Set> shingleFieldMatchers = values.stream() + .map(value -> indexableFieldMatcher(value, ShingleFieldType.class)) + .collect(Collectors.toSet()); + final Set> prefixFieldMatchers = values.stream() + .map(value -> indexableFieldMatcher(value, PrefixFieldType.class)) + .collect(Collectors.toSet()); + + // the use of new ArrayList<>() here is to avoid the varargs form of arrayContainingInAnyOrder + assertThat( + parsedDocument.rootDoc().getFields("a_field"), + arrayContainingInAnyOrder(new ArrayList<>(rootFieldMatchers))); + + assertThat( + parsedDocument.rootDoc().getFields("a_field._index_prefix"), + arrayContainingInAnyOrder(new ArrayList<>(prefixFieldMatchers))); + + for (String name : asList("a_field._2gram", "a_field._3gram")) { + assertThat(parsedDocument.rootDoc().getFields(name), arrayContainingInAnyOrder(new ArrayList<>(shingleFieldMatchers))); + } + } + + private static Matcher indexableFieldMatcher(String value, Class fieldTypeClass) { + return Matchers.allOf( + hasProperty(IndexableField::stringValue, equalTo(value)), + hasProperty(IndexableField::fieldType, instanceOf(fieldTypeClass)) + ); + } + + private static void assertRootFieldMapper(SearchAsYouTypeFieldMapper mapper, + int maxShingleSize, + String analyzerName) { + + assertThat(mapper.maxShingleSize(), equalTo(maxShingleSize)); + assertThat(mapper.fieldType(), notNullValue()); + assertSearchAsYouTypeFieldType(mapper.fieldType(), maxShingleSize, analyzerName, mapper.prefixField().fieldType()); + + assertThat(mapper.prefixField(), notNullValue()); + assertThat(mapper.prefixField().fieldType().parentField, equalTo(mapper.name())); + assertPrefixFieldType(mapper.prefixField().fieldType(), maxShingleSize, analyzerName); + + + for (int shingleSize = 2; shingleSize <= maxShingleSize; shingleSize++) { + final ShingleFieldMapper shingleFieldMapper = mapper.shingleFields()[shingleSize - 2]; + assertThat(shingleFieldMapper, notNullValue()); + assertShingleFieldType(shingleFieldMapper.fieldType(), shingleSize, analyzerName, mapper.prefixField().fieldType()); + } + + final int numberOfShingleSubfields = (maxShingleSize - 2) + 1; + assertThat(mapper.shingleFields().length, equalTo(numberOfShingleSubfields)); + } + + private static void assertSearchAsYouTypeFieldType(SearchAsYouTypeFieldType fieldType, int maxShingleSize, + String analyzerName, + PrefixFieldType prefixFieldType) { + + assertThat(fieldType.shingleFields.length, equalTo(maxShingleSize-1)); + for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.searchAnalyzer())) { + assertThat(analyzer.name(), equalTo(analyzerName)); + } + int shingleSize = 2; + for (ShingleFieldType shingleField : fieldType.shingleFields) { + assertShingleFieldType(shingleField, shingleSize++, analyzerName, prefixFieldType); + } + + assertThat(fieldType.prefixField, equalTo(prefixFieldType)); + } + + private static void assertShingleFieldType(ShingleFieldType fieldType, + int shingleSize, + String analyzerName, + PrefixFieldType prefixFieldType) { + + assertThat(fieldType.shingleSize, equalTo(shingleSize)); + + for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.searchAnalyzer())) { + assertThat(analyzer.name(), equalTo(analyzerName)); + if (shingleSize > 1) { + final SearchAsYouTypeAnalyzer wrappedAnalyzer = (SearchAsYouTypeAnalyzer) analyzer.analyzer(); + assertThat(wrappedAnalyzer.shingleSize(), equalTo(shingleSize)); + assertThat(wrappedAnalyzer.indexPrefixes(), equalTo(false)); + } + } + + assertThat(fieldType.prefixFieldType, equalTo(prefixFieldType)); + + } + + private static void assertPrefixFieldType(PrefixFieldType fieldType, int shingleSize, String analyzerName) { + for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.searchAnalyzer())) { + assertThat(analyzer.name(), equalTo(analyzerName)); + } + + final SearchAsYouTypeAnalyzer wrappedIndexAnalyzer = (SearchAsYouTypeAnalyzer) fieldType.indexAnalyzer().analyzer(); + final SearchAsYouTypeAnalyzer wrappedSearchAnalyzer = (SearchAsYouTypeAnalyzer) fieldType.searchAnalyzer().analyzer(); + for (SearchAsYouTypeAnalyzer analyzer : asList(wrappedIndexAnalyzer, wrappedSearchAnalyzer)) { + assertThat(analyzer.shingleSize(), equalTo(shingleSize)); + } + assertThat(wrappedIndexAnalyzer.indexPrefixes(), equalTo(true)); + assertThat(wrappedSearchAnalyzer.indexPrefixes(), equalTo(false)); + } + + private static SearchAsYouTypeFieldMapper getRootFieldMapper(DocumentMapper defaultMapper, String fieldName) { + final Mapper mapper = defaultMapper.mappers().getMapper(fieldName); + assertThat(mapper, instanceOf(SearchAsYouTypeFieldMapper.class)); + return (SearchAsYouTypeFieldMapper) mapper; + } + + private static ShingleFieldMapper getShingleFieldMapper(DocumentMapper defaultMapper, String fieldName) { + final Mapper mapper = defaultMapper.mappers().getMapper(fieldName); + assertThat(mapper, instanceOf(ShingleFieldMapper.class)); + return (ShingleFieldMapper) mapper; + } + + private static PrefixFieldMapper getPrefixFieldMapper(DocumentMapper defaultMapper, String fieldName) { + final Mapper mapper = defaultMapper.mappers().getMapper(fieldName); + assertThat(mapper, instanceOf(PrefixFieldMapper.class)); + return (PrefixFieldMapper) mapper; + } +} diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldTypeTests.java new file mode 100644 index 00000000000..523de918091 --- /dev/null +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/SearchAsYouTypeFieldTypeTests.java @@ -0,0 +1,113 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.Defaults; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.PrefixFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.SearchAsYouTypeFieldType; +import org.elasticsearch.index.mapper.SearchAsYouTypeFieldMapper.ShingleFieldType; +import org.junit.Before; + +import static java.util.Arrays.asList; +import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE; +import static org.hamcrest.Matchers.equalTo; + +public class SearchAsYouTypeFieldTypeTests extends FieldTypeTestCase { + + private static final String NAME = "a_field"; + private static final String PREFIX_NAME = NAME + "._index_prefix"; + + @Before + public void setupProperties() { + addModifier(new Modifier("max_shingle_size", false) { + @Override + public void modify(MappedFieldType ft) { + SearchAsYouTypeFieldType fieldType = (SearchAsYouTypeFieldType) ft; + fieldType.setShingleFields(new ShingleFieldType[] { + new ShingleFieldType(fieldType, 2), + new ShingleFieldType(fieldType, 3) + }); + } + }); + addModifier(new Modifier("index_prefixes", false) { + @Override + public void modify(MappedFieldType ft) { + SearchAsYouTypeFieldType fieldType = (SearchAsYouTypeFieldType) ft; + fieldType.setPrefixField(new PrefixFieldType(NAME, PREFIX_NAME, 1, 10)); + } + }); + } + + @Override + protected SearchAsYouTypeFieldType createDefaultFieldType() { + final SearchAsYouTypeFieldType fieldType = new SearchAsYouTypeFieldType(); + fieldType.setName(NAME); + fieldType.setPrefixField(new PrefixFieldType(NAME, PREFIX_NAME, Defaults.MIN_GRAM, Defaults.MAX_GRAM)); + fieldType.setShingleFields(new ShingleFieldType[] { new ShingleFieldType(fieldType, 2) }); + return fieldType; + } + + public void testTermQuery() { + final MappedFieldType fieldType = createDefaultFieldType(); + + fieldType.setIndexOptions(IndexOptions.DOCS); + assertThat(fieldType.termQuery("foo", null), equalTo(new TermQuery(new Term(NAME, "foo")))); + + fieldType.setIndexOptions(IndexOptions.NONE); + final IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> fieldType.termQuery("foo", null)); + assertThat(e.getMessage(), equalTo("Cannot search on field [" + NAME + "] since it is not indexed.")); + } + + public void testTermsQuery() { + final MappedFieldType fieldType = createDefaultFieldType(); + + fieldType.setIndexOptions(IndexOptions.DOCS); + assertThat(fieldType.termsQuery(asList("foo", "bar"), null), + equalTo(new TermInSetQuery(NAME, asList(new BytesRef("foo"), new BytesRef("bar"))))); + + fieldType.setIndexOptions(IndexOptions.NONE); + final IllegalArgumentException e = + expectThrows(IllegalArgumentException.class, () -> fieldType.termsQuery(asList("foo", "bar"), null)); + assertThat(e.getMessage(), equalTo("Cannot search on field [" + NAME + "] since it is not indexed.")); + } + + public void testPrefixQuery() { + final SearchAsYouTypeFieldType fieldType = createDefaultFieldType(); + + // this term should be a length that can be rewriteable to a term query on the prefix field + final String withinBoundsTerm = "foo"; + assertThat(fieldType.prefixQuery(withinBoundsTerm, CONSTANT_SCORE_REWRITE, null), + equalTo(new ConstantScoreQuery(new TermQuery(new Term(PREFIX_NAME, withinBoundsTerm))))); + + // our defaults don't allow a situation where a term can be too small + + // this term should be too long to be rewriteable to a term query on the prefix field + final String longTerm = "toolongforourprefixfieldthistermis"; + assertThat(fieldType.prefixQuery(longTerm, CONSTANT_SCORE_REWRITE, null), + equalTo(new PrefixQuery(new Term(NAME, longTerm)))); + } +} diff --git a/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/10_basic.yml b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/10_basic.yml new file mode 100644 index 00000000000..f9b76a7399a --- /dev/null +++ b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/10_basic.yml @@ -0,0 +1,1249 @@ +setup: + - skip: + version: " - 7.1.0" + reason: "added in 7.1.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + + - do: + index: + index: test + type: _doc + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + + # this document should not be matched + - do: + index: + index: test + type: _doc + id: 2 + body: + a_field: "xylophone xylophone xylophone" + + - do: + indices.refresh: {} + +--- +"get document": + - do: + get: + index: test + type: _doc + id: 1 + + - is_true: found + - match: { _source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on root field": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field: "quick" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + +# these "search on Xgram" tests repeat the same search for each term we expect to generate +--- +"term query on 2gram": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "quick brown" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._2gram: "lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on 3gram": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "quick brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._3gram: "jump lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on 4gram": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._4gram: "quick brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._4gram: "brown fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._4gram: "fox jump lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +# we won't check all the terms that this field generates because there are many +--- +"term query on prefix field with prefix term": + + # search term as prefix + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._index_prefix: "quick br" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on prefix field with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._index_prefix: "jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"term query on prefix field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + term: + a_field._index_prefix: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 2gram with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._2gram: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 2gram with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._2gram: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 3gram with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._3gram: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 3gram with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._3gram: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 4gram with prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._4gram: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on 4gram with infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field._4gram: "brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quic" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quick b" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quick brown fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "quick brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fo" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fox jum" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "fox jump lazy do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"prefix query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + prefix: + a_field: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "quick brown fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox jump" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox jump lazy" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with 5 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown fox jump lazy dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "dog" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "qui" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick b" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "quick brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "br" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with 5 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "brown fox jump lazy d" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"phrase prefix query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + a_field: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "qui" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick b" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick brown fox ju" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "quick brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "br" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown f" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fox j" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fox jump la" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with 5 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fox jump lazy d" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field out of order partial trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "fox jump brown do" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"bool prefix query on root field out of order partial leading term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "fox jump brown qui" + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 1 prefix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "qui" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 2 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick br" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 3 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick brown f" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 4 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 5 prefix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "quick brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 1 infix term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "br" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 2 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown f" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 3 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with 4 infix terms": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query with trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "do" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query out of order with partial trailing term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "fox jump brown do" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query out of order with partial leading term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "fox jump lazy qui" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } diff --git a/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml new file mode 100644 index 00000000000..5a96a11a475 --- /dev/null +++ b/modules/mapper-extras/src/test/resources/rest-api-spec/test/search-as-you-type/20_highlighting.yml @@ -0,0 +1,202 @@ +setup: + - skip: + version: " - 7.1.0" + reason: "added in 7.1.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + text_field: + type: text + analyzer: simple + + - do: + index: + index: test + type: _doc + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + text_field: "quick brown fox jump lazy dog" + + - do: + indices.refresh: {} + +--- +"phrase query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"bool prefix query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fo" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query 1 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fo" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: null } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 2 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 3 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 4 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy d" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] } diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 97771745636..7b195bdc7b4 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -591,7 +591,7 @@ public class AnnotatedTextFieldMapper extends FieldMapper { @Override public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { - return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions); + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, null, null); } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix.yml new file mode 100644 index 00000000000..957d26036b4 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix.yml @@ -0,0 +1,363 @@ +setup: + - skip: + version: " - 7.1.0" + reason: "added in 7.1.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + my_field1: + type: text + my_field2: + type: text + + - do: + index: + index: test + id: 1 + body: + my_field1: "brown fox jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 2 + body: + my_field1: "brown emu jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 3 + body: + my_field1: "jumparound" + my_field2: "emu" + + - do: + index: + index: test + id: 4 + body: + my_field1: "dog" + my_field2: "brown fox jump lazy" + + - do: + indices.refresh: {} + +--- +"scoring complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox jump" + + - match: { hits.total: 3 } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"scoring partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox ju" + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"minimum should match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + minimum_should_match: 3 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "BROWN dog" + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + +--- +"operator": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + operator: AND + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field2: + query: "xylophoen foo" + fuzziness: 1 + prefix_length: 1 + max_expansions: 10 + fuzzy_transpositions: true + fuzzy_rewrite: constant_score + + - match: { hits.total: 2 } + - match: { hits.hits.0._source.my_field2: "xylophone" } + - match: { hits.hits.1._source.my_field2: "xylophone" } + +--- +"multi_match single field complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match single field partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match multiple fields complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump laz" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields with analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "BROWN FOX JUMP dog" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with minimum_should_match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + minimum_should_match: 4 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "dob nomatch" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + fuzziness: 1 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with boost": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown emu" + type: bool_prefix + fields: [ "my_field1", "my_field2^10" ] + fuzziness: 1 + + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0._source.my_field2: "emu" } + +--- +"multi_match multiple fields with slop throws exception": + + - do: + catch: /\[slop\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + slop: 1 + +--- +"multi_match multiple fields with cutoff_frequency throws exception": + + - do: + catch: /\[cutoff_frequency\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + cutoff_frequency: 0.001 diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 805b50e628b..5790248ead8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -74,6 +74,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.IntPredicate; import static org.elasticsearch.index.mapper.TypeParsers.parseTextField; @@ -687,69 +688,12 @@ public class TextFieldMapper extends FieldMapper { } private Query analyzePhrasePrefix(TokenStream stream, int slop, int maxExpansions) throws IOException { - final MultiPhrasePrefixQuery query = createPhrasePrefixQuery(stream, name(), slop, maxExpansions); - - if (slop > 0 - || prefixFieldType == null - || prefixFieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { - return query; - } - - int lastPos = query.getTerms().length - 1; - final Term[][] terms = query.getTerms(); - final int[] positions = query.getPositions(); - for (Term term : terms[lastPos]) { - String value = term.text(); - if (value.length() < prefixFieldType.minChars || value.length() > prefixFieldType.maxChars) { - return query; - } - } - - if (terms.length == 1) { - Term[] newTerms = Arrays.stream(terms[0]) - .map(term -> new Term(prefixFieldType.name(), term.bytes())) - .toArray(Term[]::new); - return new SynonymQuery(newTerms); - } - - SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(name(), true); - spanQuery.setSlop(slop); - int previousPos = -1; - for (int i = 0; i < terms.length; i++) { - Term[] posTerms = terms[i]; - int posInc = positions[i] - previousPos; - previousPos = positions[i]; - if (posInc > 1) { - spanQuery.addGap(posInc - 1); - } - if (i == lastPos) { - if (posTerms.length == 1) { - FieldMaskingSpanQuery fieldMask = - new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), posTerms[0].bytes())), name()); - spanQuery.addClause(fieldMask); - } else { - SpanQuery[] queries = Arrays.stream(posTerms) - .map(term -> new FieldMaskingSpanQuery( - new SpanTermQuery(new Term(prefixFieldType.name(), term.bytes())), name()) - ) - .toArray(SpanQuery[]::new); - spanQuery.addClause(new SpanOrQuery(queries)); - } - } else { - if (posTerms.length == 1) { - spanQuery.addClause(new SpanTermQuery(posTerms[0])); - } else { - SpanTermQuery[] queries = Arrays.stream(posTerms) - .map(SpanTermQuery::new) - .toArray(SpanTermQuery[]::new); - spanQuery.addClause(new SpanOrQuery(queries)); - } - } - } - return spanQuery.build(); + String prefixField = prefixFieldType == null || slop > 0 ? null : prefixFieldType.name(); + IntPredicate usePrefix = (len) -> len >= prefixFieldType.minChars && len <= prefixFieldType.maxChars; + return createPhrasePrefixQuery(stream, name(), slop, maxExpansions, prefixField, usePrefix); } - private static boolean hasGaps(TokenStream stream) throws IOException { + public static boolean hasGaps(TokenStream stream) throws IOException { assert stream instanceof CachingTokenFilter; PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); @@ -963,8 +907,8 @@ public class TextFieldMapper extends FieldMapper { return mpqb.build(); } - public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, String field, - int slop, int maxExpansions) throws IOException { + public static Query createPhrasePrefixQuery(TokenStream stream, String field, int slop, int maxExpansions, + String prefixField, IntPredicate usePrefixField) throws IOException { MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field); builder.setSlop(slop); builder.setMaxExpansions(maxExpansions); @@ -987,6 +931,61 @@ public class TextFieldMapper extends FieldMapper { currentTerms.add(new Term(field, termAtt.getBytesRef())); } builder.add(currentTerms.toArray(new Term[0]), position); - return builder; + if (prefixField == null) { + return builder; + } + + int lastPos = builder.getTerms().length - 1; + final Term[][] terms = builder.getTerms(); + final int[] positions = builder.getPositions(); + for (Term term : terms[lastPos]) { + String value = term.text(); + if (usePrefixField.test(value.length()) == false) { + return builder; + } + } + + if (terms.length == 1) { + Term[] newTerms = Arrays.stream(terms[0]) + .map(term -> new Term(prefixField, term.bytes())) + .toArray(Term[]::new); + return new SynonymQuery(newTerms); + } + + SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true); + spanQuery.setSlop(slop); + int previousPos = -1; + for (int i = 0; i < terms.length; i++) { + Term[] posTerms = terms[i]; + int posInc = positions[i] - previousPos; + previousPos = positions[i]; + if (posInc > 1) { + spanQuery.addGap(posInc - 1); + } + if (i == lastPos) { + if (posTerms.length == 1) { + FieldMaskingSpanQuery fieldMask = + new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, posTerms[0].bytes())), field); + spanQuery.addClause(fieldMask); + } else { + SpanQuery[] queries = Arrays.stream(posTerms) + .map(term -> new FieldMaskingSpanQuery( + new SpanTermQuery(new Term(prefixField, term.bytes())), field) + ) + .toArray(SpanQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } else { + if (posTerms.length == 1) { + spanQuery.addClause(new SpanTermQuery(posTerms[0])); + } else { + SpanTermQuery[] queries = Arrays.stream(posTerms) + .map(SpanTermQuery::new) + .toArray(SpanTermQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } + } + return spanQuery.build(); } } diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java new file mode 100644 index 00000000000..7f0c89f9df4 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java @@ -0,0 +1,393 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.support.QueryParsers; +import org.elasticsearch.index.search.MatchQuery; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.index.query.MatchQueryBuilder.FUZZY_REWRITE_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.FUZZY_TRANSPOSITIONS_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.MAX_EXPANSIONS_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.OPERATOR_FIELD; +import static org.elasticsearch.index.query.MatchQueryBuilder.PREFIX_LENGTH_FIELD; + +/** + * The boolean prefix query analyzes the input text and creates a boolean query containing a Term query for each term, except + * for the last term, which is used to create a prefix query + */ +public class MatchBoolPrefixQueryBuilder extends AbstractQueryBuilder { + + public static final String NAME = "match_bool_prefix"; + + private static final Operator DEFAULT_OPERATOR = Operator.OR; + + private final String fieldName; + + private final Object value; + + private String analyzer; + + private Operator operator = DEFAULT_OPERATOR; + + private String minimumShouldMatch; + + private Fuzziness fuzziness; + + private int prefixLength = FuzzyQuery.defaultPrefixLength; + + private int maxExpansions = FuzzyQuery.defaultMaxExpansions; + + private boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions; + + private String fuzzyRewrite; + + public MatchBoolPrefixQueryBuilder(String fieldName, Object value) { + if (Strings.isEmpty(fieldName)) { + throw new IllegalArgumentException("[" + NAME + "] requires fieldName"); + } + if (value == null) { + throw new IllegalArgumentException("[" + NAME + "] requires query value"); + } + this.fieldName = fieldName; + this.value = value; + } + + public MatchBoolPrefixQueryBuilder(StreamInput in) throws IOException { + super(in); + fieldName = in.readString(); + value = in.readGenericValue(); + analyzer = in.readOptionalString(); + operator = Operator.readFromStream(in); + minimumShouldMatch = in.readOptionalString(); + fuzziness = in.readOptionalWriteable(Fuzziness::new); + prefixLength = in.readVInt(); + maxExpansions = in.readVInt(); + fuzzyTranspositions = in.readBoolean(); + fuzzyRewrite = in.readOptionalString(); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeString(fieldName); + out.writeGenericValue(value); + out.writeOptionalString(analyzer); + operator.writeTo(out); + out.writeOptionalString(minimumShouldMatch); + out.writeOptionalWriteable(fuzziness); + out.writeVInt(prefixLength); + out.writeVInt(maxExpansions); + out.writeBoolean(fuzzyTranspositions); + out.writeOptionalString(fuzzyRewrite); + } + + /** Returns the field name used in this query. */ + public String fieldName() { + return this.fieldName; + } + + /** Returns the value used in this query. */ + public Object value() { + return this.value; + } + + /** Get the analyzer to use, if previously set, otherwise {@code null} */ + public String analyzer() { + return this.analyzer; + } + + /** + * Explicitly set the analyzer to use. Defaults to use explicit mapping + * config for the field, or, if not set, the default search analyzer. + */ + public MatchBoolPrefixQueryBuilder analyzer(String analyzer) { + this.analyzer = analyzer; + return this; + } + + /** Sets the operator to use when using a boolean query. Defaults to {@code OR}. */ + public MatchBoolPrefixQueryBuilder operator(Operator operator) { + if (operator == null) { + throw new IllegalArgumentException("[" + NAME + "] requires operator to be non-null"); + } + this.operator = operator; + return this; + } + + /** Returns the operator to use in a boolean query.*/ + public Operator operator() { + return this.operator; + } + + /** Sets optional minimumShouldMatch value to apply to the query */ + public MatchBoolPrefixQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + + /** Gets the minimumShouldMatch value */ + public String minimumShouldMatch() { + return this.minimumShouldMatch; + } + + /** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */ + public MatchBoolPrefixQueryBuilder fuzziness(Object fuzziness) { + this.fuzziness = Fuzziness.build(fuzziness); + return this; + } + + /** Gets the fuzziness used when evaluated to a fuzzy query type. */ + public Fuzziness fuzziness() { + return this.fuzziness; + } + + /** + * Sets the length of a length of common (non-fuzzy) prefix for fuzzy match queries + * @param prefixLength non-negative length of prefix + * @throws IllegalArgumentException in case the prefix is negative + */ + public MatchBoolPrefixQueryBuilder prefixLength(int prefixLength) { + if (prefixLength < 0 ) { + throw new IllegalArgumentException("[" + NAME + "] requires prefix length to be non-negative."); + } + this.prefixLength = prefixLength; + return this; + } + + /** + * Gets the length of a length of common (non-fuzzy) prefix for fuzzy match queries + */ + public int prefixLength() { + return this.prefixLength; + } + + /** + * When using fuzzy or prefix type query, the number of term expansions to use. + */ + public MatchBoolPrefixQueryBuilder maxExpansions(int maxExpansions) { + if (maxExpansions <= 0 ) { + throw new IllegalArgumentException("[" + NAME + "] requires maxExpansions to be positive."); + } + this.maxExpansions = maxExpansions; + return this; + } + + /** + * Get the (optional) number of term expansions when using fuzzy or prefix type query. + */ + public int maxExpansions() { + return this.maxExpansions; + } + + /** + * Sets whether transpositions are supported in fuzzy queries.

+ * The default metric used by fuzzy queries to determine a match is the Damerau-Levenshtein + * distance formula which supports transpositions. Setting transposition to false will + * switch to classic Levenshtein distance.
+ * If not set, Damerau-Levenshtein distance metric will be used. + */ + public MatchBoolPrefixQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { + this.fuzzyTranspositions = fuzzyTranspositions; + return this; + } + + /** Gets the fuzzy query transposition setting. */ + public boolean fuzzyTranspositions() { + return this.fuzzyTranspositions; + } + + /** Sets the fuzzy_rewrite parameter controlling how the fuzzy query will get rewritten */ + public MatchBoolPrefixQueryBuilder fuzzyRewrite(String fuzzyRewrite) { + this.fuzzyRewrite = fuzzyRewrite; + return this; + } + + /** + * Get the fuzzy_rewrite parameter + * @see #fuzzyRewrite(String) + */ + public String fuzzyRewrite() { + return this.fuzzyRewrite; + } + + @Override + protected void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.startObject(fieldName); + builder.field(MatchQueryBuilder.QUERY_FIELD.getPreferredName(), value); + if (analyzer != null) { + builder.field(MatchQueryBuilder.ANALYZER_FIELD.getPreferredName(), analyzer); + } + builder.field(OPERATOR_FIELD.getPreferredName(), operator.toString()); + if (minimumShouldMatch != null) { + builder.field(MatchQueryBuilder.MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatch); + } + if (fuzziness != null) { + fuzziness.toXContent(builder, params); + } + builder.field(PREFIX_LENGTH_FIELD.getPreferredName(), prefixLength); + builder.field(MAX_EXPANSIONS_FIELD.getPreferredName(), maxExpansions); + builder.field(FUZZY_TRANSPOSITIONS_FIELD.getPreferredName(), fuzzyTranspositions); + if (fuzzyRewrite != null) { + builder.field(FUZZY_REWRITE_FIELD.getPreferredName(), fuzzyRewrite); + } + printBoostAndQueryName(builder); + builder.endObject(); + builder.endObject(); + } + + public static MatchBoolPrefixQueryBuilder fromXContent(XContentParser parser) throws IOException { + String fieldName = null; + Object value = null; + float boost = AbstractQueryBuilder.DEFAULT_BOOST; + String analyzer = null; + Operator operator = DEFAULT_OPERATOR; + String minimumShouldMatch = null; + Fuzziness fuzziness = null; + int prefixLength = FuzzyQuery.defaultPrefixLength; + int maxExpansion = FuzzyQuery.defaultMaxExpansions; + boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions; + String fuzzyRewrite = null; + String queryName = null; + XContentParser.Token token; + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + throwParsingExceptionOnMultipleFields(NAME, parser.getTokenLocation(), fieldName, currentFieldName); + fieldName = currentFieldName; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if (MatchQueryBuilder.QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + value = parser.objectText(); + } else if (MatchQueryBuilder.ANALYZER_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + analyzer = parser.text(); + } else if (OPERATOR_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + operator = Operator.fromString(parser.text()); + } else if (MatchQueryBuilder.MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + minimumShouldMatch = parser.textOrNull(); + } else if (Fuzziness.FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + fuzziness = Fuzziness.parse(parser); + } else if (PREFIX_LENGTH_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + prefixLength = parser.intValue(); + } else if (MAX_EXPANSIONS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + maxExpansion = parser.intValue(); + } else if (FUZZY_TRANSPOSITIONS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + fuzzyTranspositions = parser.booleanValue(); + } else if (FUZZY_REWRITE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + fuzzyRewrite = parser.textOrNull(); + } else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + boost = parser.floatValue(); + } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + queryName = parser.text(); + } else { + throw new ParsingException(parser.getTokenLocation(), + "[" + NAME + "] query does not support [" + currentFieldName + "]"); + } + } else { + throw new ParsingException(parser.getTokenLocation(), + "[" + NAME + "] unknown token [" + token + "] after [" + currentFieldName + "]"); + } + } + } else { + throwParsingExceptionOnMultipleFields(NAME, parser.getTokenLocation(), fieldName, parser.currentName()); + fieldName = parser.currentName(); + value = parser.objectText(); + } + } + + MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value); + queryBuilder.analyzer(analyzer); + queryBuilder.operator(operator); + queryBuilder.minimumShouldMatch(minimumShouldMatch); + queryBuilder.boost(boost); + queryBuilder.queryName(queryName); + if (fuzziness != null) { + queryBuilder.fuzziness(fuzziness); + } + queryBuilder.prefixLength(prefixLength); + queryBuilder.maxExpansions(maxExpansion); + queryBuilder.fuzzyTranspositions(fuzzyTranspositions); + queryBuilder.fuzzyRewrite(fuzzyRewrite); + return queryBuilder; + } + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + if (analyzer != null && context.getIndexAnalyzers().get(analyzer) == null) { + throw new QueryShardException(context, "[" + NAME + "] analyzer [" + analyzer + "] not found"); + } + + final MatchQuery matchQuery = new MatchQuery(context); + if (analyzer != null) { + matchQuery.setAnalyzer(analyzer); + } + matchQuery.setOccur(operator.toBooleanClauseOccur()); + matchQuery.setFuzziness(fuzziness); + matchQuery.setFuzzyPrefixLength(prefixLength); + matchQuery.setMaxExpansions(maxExpansions); + matchQuery.setTranspositions(fuzzyTranspositions); + matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(fuzzyRewrite, null, LoggingDeprecationHandler.INSTANCE)); + + final Query query = matchQuery.parse(MatchQuery.Type.BOOLEAN_PREFIX, fieldName, value); + return Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); + } + + @Override + protected boolean doEquals(MatchBoolPrefixQueryBuilder other) { + return Objects.equals(fieldName, other.fieldName) && + Objects.equals(value, other.value) && + Objects.equals(analyzer, other.analyzer) && + Objects.equals(operator, other.operator) && + Objects.equals(minimumShouldMatch, other.minimumShouldMatch) && + Objects.equals(fuzziness, other.fuzziness) && + Objects.equals(prefixLength, other.prefixLength) && + Objects.equals(maxExpansions, other.maxExpansions) && + Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions) && + Objects.equals(fuzzyRewrite, other.fuzzyRewrite); + } + + @Override + protected int doHashCode() { + return Objects.hash(fieldName, value, analyzer, operator, minimumShouldMatch, fuzziness, prefixLength, maxExpansions, + fuzzyTranspositions, fuzzyRewrite); + } + + @Override + public String getWriteableName() { + return NAME; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index 9f2c85106de..ec8392c90c8 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -129,7 +129,12 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder 1) { // phrase @@ -433,7 +469,7 @@ public class MatchQuery { return analyzeBoolean(field, stream); } else { // complex case: multiple positions - return analyzeMultiBoolean(field, stream, operator); + return analyzeMultiBoolean(field, stream, operator, type == Type.BOOLEAN_PREFIX); } } } catch (IOException e) { @@ -462,13 +498,13 @@ public class MatchQuery { } } - private SpanQuery newSpanQuery(Term[] terms, boolean prefix) { + private SpanQuery newSpanQuery(Term[] terms, boolean isPrefix) { if (terms.length == 1) { - return prefix ? fieldType.spanPrefixQuery(terms[0].text(), spanRewriteMethod, context) : new SpanTermQuery(terms[0]); + return isPrefix ? fieldType.spanPrefixQuery(terms[0].text(), spanRewriteMethod, context) : new SpanTermQuery(terms[0]); } SpanQuery[] spanQueries = new SpanQuery[terms.length]; for (int i = 0; i < terms.length; i++) { - spanQueries[i] = prefix ? new SpanTermQuery(terms[i]) : + spanQueries[i] = isPrefix ? new SpanTermQuery(terms[i]) : fieldType.spanPrefixQuery(terms[i].text(), spanRewriteMethod, context); } return new SpanOrQuery(spanQueries); @@ -479,7 +515,7 @@ public class MatchQuery { return createSpanQuery(in, field, false); } - private SpanQuery createSpanQuery(TokenStream in, String field, boolean prefix) throws IOException { + private SpanQuery createSpanQuery(TokenStream in, String field, boolean isPrefix) throws IOException { TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncAtt = in.getAttribute(PositionIncrementAttribute.class); if (termAtt == null) { @@ -498,7 +534,7 @@ public class MatchQuery { lastTerm = new Term(field, termAtt.getBytesRef()); } if (lastTerm != null) { - SpanQuery spanQuery = prefix ? + SpanQuery spanQuery = isPrefix ? fieldType.spanPrefixQuery(lastTerm.text(), spanRewriteMethod, context) : new SpanTermQuery(lastTerm); builder.addClause(spanQuery); } @@ -537,6 +573,74 @@ public class MatchQuery { } } + /** + * Builds a new prefix query instance. + */ + protected Query newPrefixQuery(String field, Term term) { + try { + return fieldType.prefixQuery(term.text(), null, context); + } catch (RuntimeException e) { + if (lenient) { + return newLenientFieldQuery(field, e); + } + throw e; + } + } + + private Query analyzeTerm(String field, TokenStream stream, boolean isPrefix) throws IOException { + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); + + stream.reset(); + if (!stream.incrementToken()) { + throw new AssertionError(); + } + final Term term = new Term(field, termAtt.getBytesRef()); + int lastOffset = offsetAtt.endOffset(); + stream.end(); + return isPrefix && lastOffset == offsetAtt.endOffset() ? newPrefixQuery(field, term) : newTermQuery(term); + } + + private void add(BooleanQuery.Builder q, String field, List current, BooleanClause.Occur operator, boolean isPrefix) { + if (current.isEmpty()) { + return; + } + if (current.size() == 1) { + if (isPrefix) { + q.add(newPrefixQuery(field, current.get(0)), operator); + } else { + q.add(newTermQuery(current.get(0)), operator); + } + } else { + // We don't apply prefix on synonyms + q.add(newSynonymQuery(current.toArray(new Term[current.size()])), operator); + } + } + + private Query analyzeMultiBoolean(String field, TokenStream stream, + BooleanClause.Occur operator, boolean isPrefix) throws IOException { + BooleanQuery.Builder q = newBooleanQuery(); + List currentQuery = new ArrayList<>(); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); + + stream.reset(); + int lastOffset = 0; + while (stream.incrementToken()) { + if (posIncrAtt.getPositionIncrement() != 0) { + add(q, field, currentQuery, operator, false); + currentQuery.clear(); + } + currentQuery.add(new Term(field, termAtt.getBytesRef())); + lastOffset = offsetAtt.endOffset(); + } + stream.end(); + add(q, field, currentQuery, operator, isPrefix && lastOffset == offsetAtt.endOffset()); + return q.build(); + } + @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { try { @@ -577,6 +681,62 @@ public class MatchQuery { } } + private Query analyzeGraphBoolean(String field, TokenStream source, + BooleanClause.Occur operator, boolean isPrefix) throws IOException { + source.reset(); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + int[] articulationPoints = graph.articulationPoints(); + int lastState = 0; + for (int i = 0; i <= articulationPoints.length; i++) { + int start = lastState; + int end = -1; + if (i < articulationPoints.length) { + end = articulationPoints[i]; + } + lastState = end; + final Query queryPos; + boolean usePrefix = isPrefix && end == -1; + if (graph.hasSidePath(start)) { + final Iterator it = graph.getFiniteStrings(start, end); + Iterator queries = new Iterator() { + @Override + public boolean hasNext() { + return it.hasNext(); + } + + @Override + public Query next() { + TokenStream ts = it.next(); + final Type type; + if (getAutoGenerateMultiTermSynonymsPhraseQuery()) { + type = usePrefix + ? Type.PHRASE_PREFIX + : Type.PHRASE; + } else { + type = Type.BOOLEAN; + } + return createFieldQuery(ts, type, BooleanClause.Occur.MUST, field, 0); + } + }; + queryPos = newGraphSynonymQuery(queries); + } else { + Term[] terms = graph.getTerms(field, start); + assert terms.length > 0; + if (terms.length == 1) { + queryPos = usePrefix ? newPrefixQuery(field, terms[0]) : newTermQuery(terms[0]); + } else { + // We don't apply prefix on synonyms + queryPos = newSynonymQuery(terms); + } + } + if (queryPos != null) { + builder.add(queryPos, operator); + } + } + return builder.build(); + } + private Query analyzeGraphPhrase(TokenStream source, String field, Type type, int slop) throws IOException { assert type == Type.PHRASE_PREFIX || type == Type.PHRASE; @@ -615,13 +775,13 @@ public class MatchQuery { } lastState = end; final SpanQuery queryPos; - boolean endPrefix = end == -1 && type == Type.PHRASE_PREFIX; + boolean usePrefix = end == -1 && type == Type.PHRASE_PREFIX; if (graph.hasSidePath(start)) { List queries = new ArrayList<>(); Iterator it = graph.getFiniteStrings(start, end); while (it.hasNext()) { TokenStream ts = it.next(); - SpanQuery q = createSpanQuery(ts, field, endPrefix); + SpanQuery q = createSpanQuery(ts, field, usePrefix); if (q != null) { if (queries.size() >= maxClauseCount) { throw new BooleanQuery.TooManyClauses(); @@ -640,7 +800,7 @@ public class MatchQuery { if (terms.length >= maxClauseCount) { throw new BooleanQuery.TooManyClauses(); } - queryPos = newSpanQuery(terms, endPrefix); + queryPos = newSpanQuery(terms, usePrefix); } if (queryPos != null) { diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 88fd5293392..667d3a3823d 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -66,6 +66,7 @@ public class MultiMatchQuery extends MatchQuery { case PHRASE_PREFIX: case BEST_FIELDS: case MOST_FIELDS: + case BOOL_PREFIX: queries = buildFieldQueries(type, fieldNames, value, minimumShouldMatch); break; @@ -179,10 +180,23 @@ public class MultiMatchQuery extends MatchQuery { } @Override - public Query newTermQuery(Term term) { + protected Query newTermQuery(Term term) { return blendTerm(context, term.bytes(), commonTermsCutoff, tieBreaker, lenient, blendedFields); } + @Override + protected Query newPrefixQuery(String field, Term term) { + List disjunctions = new ArrayList<>(); + for (FieldAndBoost fieldType : blendedFields) { + Query query = fieldType.fieldType.prefixQuery(term.text(), null, context); + if (fieldType.boost != 1f) { + query = new BoostQuery(query, fieldType.boost); + } + disjunctions.add(query); + } + return new DisjunctionMaxQuery(disjunctions, tieBreaker); + } + @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index bd1bbb98281..8a4f4981605 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ParseFieldRegistry; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MatchBoolPrefixQueryBuilder; import org.elasticsearch.index.query.BoostingQueryBuilder; import org.elasticsearch.index.query.CommonTermsQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder; @@ -826,6 +827,8 @@ public class SearchModule { registerQuery(new QuerySpec<>(IntervalQueryBuilder.NAME, IntervalQueryBuilder::new, IntervalQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(DistanceFeatureQueryBuilder.NAME, DistanceFeatureQueryBuilder::new, DistanceFeatureQueryBuilder::fromXContent)); + registerQuery( + new QuerySpec<>(MatchBoolPrefixQueryBuilder.NAME, MatchBoolPrefixQueryBuilder::new, MatchBoolPrefixQueryBuilder::fromXContent)); if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) { registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent)); diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilderTests.java new file mode 100644 index 00000000000..b3a3a2512a5 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilderTests.java @@ -0,0 +1,284 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.MockSynonymAnalyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.index.search.MatchQuery; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.test.AbstractQueryTestCase; + +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.Arrays.asList; +import static org.hamcrest.CoreMatchers.anyOf; +import static org.hamcrest.CoreMatchers.everyItem; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.equalToIgnoringCase; +import static org.hamcrest.Matchers.hasProperty; +import static org.hamcrest.Matchers.hasSize; + +public class MatchBoolPrefixQueryBuilderTests extends AbstractQueryTestCase { + + @Override + protected MatchBoolPrefixQueryBuilder doCreateTestQueryBuilder() { + final String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME); + final Object value = IntStream.rangeClosed(0, randomIntBetween(0, 3)) + .mapToObj(i -> randomAlphaOfLengthBetween(1, 10) + " ") + .collect(Collectors.joining()) + .trim(); + + final MatchBoolPrefixQueryBuilder queryBuilder = new MatchBoolPrefixQueryBuilder(fieldName, value); + + if (randomBoolean() && isTextField(fieldName)) { + queryBuilder.analyzer(randomFrom("simple", "keyword", "whitespace")); + } + + if (randomBoolean()) { + queryBuilder.operator(randomFrom(Operator.values())); + } + + if (randomBoolean()) { + queryBuilder.minimumShouldMatch(randomMinimumShouldMatch()); + } + + if (randomBoolean()) { + queryBuilder.fuzziness(randomFuzziness(fieldName)); + } + + if (randomBoolean()) { + queryBuilder.prefixLength(randomIntBetween(0, 10)); + } + + if (randomBoolean()) { + queryBuilder.maxExpansions(randomIntBetween(1, 1000)); + } + + if (randomBoolean()) { + queryBuilder.fuzzyTranspositions(randomBoolean()); + } + + if (randomBoolean()) { + queryBuilder.fuzzyRewrite(getRandomRewriteMethod()); + } + + return queryBuilder; + } + + @Override + protected void doAssertLuceneQuery(MatchBoolPrefixQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { + assertThat(query, notNullValue()); + assertThat(query, anyOf(instanceOf(BooleanQuery.class), instanceOf(PrefixQuery.class))); + + if (query instanceof PrefixQuery) { + final PrefixQuery prefixQuery = (PrefixQuery) query; + assertThat(prefixQuery.getPrefix().text(), equalToIgnoringCase((String) queryBuilder.value())); + } else { + assertThat(query, instanceOf(BooleanQuery.class)); + final BooleanQuery booleanQuery = (BooleanQuery) query; + // all queries except the last should be TermQuery or SynonymQuery + final Set allQueriesExceptLast = IntStream.range(0, booleanQuery.clauses().size() - 1) + .mapToObj(booleanQuery.clauses()::get) + .map(BooleanClause::getQuery) + .collect(Collectors.toSet()); + assertThat(allQueriesExceptLast, anyOf( + everyItem(instanceOf(TermQuery.class)), + everyItem(instanceOf(SynonymQuery.class)), + everyItem(instanceOf(FuzzyQuery.class)) + )); + + if (allQueriesExceptLast.stream().anyMatch(subQuery -> subQuery instanceof FuzzyQuery)) { + assertThat(queryBuilder.fuzziness(), notNullValue()); + } + allQueriesExceptLast.stream().filter(subQuery -> subQuery instanceof FuzzyQuery).forEach(subQuery -> { + final FuzzyQuery fuzzyQuery = (FuzzyQuery) subQuery; + assertThat(fuzzyQuery.getPrefixLength(), equalTo(queryBuilder.prefixLength())); + assertThat(fuzzyQuery.getTranspositions(), equalTo(queryBuilder.fuzzyTranspositions())); + }); + + // the last query should be PrefixQuery + final Query shouldBePrefixQuery = booleanQuery.clauses().get(booleanQuery.clauses().size() - 1).getQuery(); + assertThat(shouldBePrefixQuery, instanceOf(PrefixQuery.class)); + + if (queryBuilder.minimumShouldMatch() != null) { + final int optionalClauses = + (int) booleanQuery.clauses().stream().filter(clause -> clause.getOccur() == BooleanClause.Occur.SHOULD).count(); + final int expected = Queries.calculateMinShouldMatch(optionalClauses, queryBuilder.minimumShouldMatch()); + assertThat(booleanQuery.getMinimumNumberShouldMatch(), equalTo(expected)); + } + } + } + + public void testIllegalValues() { + { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new MatchBoolPrefixQueryBuilder(null, "value")); + assertEquals("[match_bool_prefix] requires fieldName", e.getMessage()); + } + + { + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new MatchBoolPrefixQueryBuilder("name", null)); + assertEquals("[match_bool_prefix] requires query value", e.getMessage()); + } + + { + final MatchBoolPrefixQueryBuilder builder = new MatchBoolPrefixQueryBuilder("name", "value"); + builder.analyzer("bogusAnalyzer"); + QueryShardException e = expectThrows(QueryShardException.class, () -> builder.toQuery(createShardContext())); + assertThat(e.getMessage(), containsString("analyzer [bogusAnalyzer] not found")); + } + } + + public void testFromSimpleJson() throws IOException { + final String simple = + "{" + + "\"match_bool_prefix\": {" + + "\"fieldName\": \"fieldValue\"" + + "}" + + "}"; + final String expected = + "{" + + "\"match_bool_prefix\": {" + + "\"fieldName\": {" + + "\"query\": \"fieldValue\"," + + "\"operator\": \"OR\"," + + "\"prefix_length\": 0," + + "\"max_expansions\": 50," + + "\"fuzzy_transpositions\": true," + + "\"boost\": 1.0" + + "}" + + "}" + + "}"; + + final MatchBoolPrefixQueryBuilder builder = (MatchBoolPrefixQueryBuilder) parseQuery(simple); + checkGeneratedJson(expected, builder); + } + + public void testFromJson() throws IOException { + final String expected = + "{" + + "\"match_bool_prefix\": {" + + "\"fieldName\": {" + + "\"query\": \"fieldValue\"," + + "\"analyzer\": \"simple\"," + + "\"operator\": \"AND\"," + + "\"minimum_should_match\": \"2\"," + + "\"fuzziness\": \"1\"," + + "\"prefix_length\": 1," + + "\"max_expansions\": 10," + + "\"fuzzy_transpositions\": false," + + "\"fuzzy_rewrite\": \"constant_score\"," + + "\"boost\": 2.0" + + "}" + + "}" + + "}"; + + final MatchBoolPrefixQueryBuilder builder = (MatchBoolPrefixQueryBuilder) parseQuery(expected); + checkGeneratedJson(expected, builder); + } + + public void testParseFailsWithMultipleFields() { + { + final String json = + "{" + + "\"match_bool_prefix\" : {" + + "\"field_name_1\" : {" + + "\"query\" : \"foo\"" + + "}," + + "\"field_name_2\" : {" + + "\"query\" : \"foo\"\n" + + "}" + + "}" + + "}"; + final ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(json)); + assertEquals( + "[match_bool_prefix] query doesn't support multiple fields, found [field_name_1] and [field_name_2]", e.getMessage()); + } + + { + final String simpleJson = + "{" + + "\"match_bool_prefix\" : {" + + "\"field_name_1\" : \"foo\"," + + "\"field_name_2\" : \"foo\"" + + "}" + + "}"; + final ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(simpleJson)); + assertEquals( + "[match_bool_prefix] query doesn't support multiple fields, found [field_name_1] and [field_name_2]", e.getMessage()); + } + } + + public void testAnalysis() throws Exception { + final MatchBoolPrefixQueryBuilder builder = new MatchBoolPrefixQueryBuilder(STRING_FIELD_NAME, "foo bar baz"); + final Query query = builder.toQuery(createShardContext()); + + assertBooleanQuery(query, asList( + new TermQuery(new Term(STRING_FIELD_NAME, "foo")), + new TermQuery(new Term(STRING_FIELD_NAME, "bar")), + new PrefixQuery(new Term(STRING_FIELD_NAME, "baz")) + )); + } + + public void testAnalysisSynonym() throws Exception { + final MatchQuery matchQuery = new MatchQuery(createShardContext()); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + final Query query = matchQuery.parse(MatchQuery.Type.BOOLEAN_PREFIX, STRING_FIELD_NAME, "fox dogs red"); + + assertBooleanQuery(query, asList( + new TermQuery(new Term(STRING_FIELD_NAME, "fox")), + new SynonymQuery(new Term(STRING_FIELD_NAME, "dogs"), new Term(STRING_FIELD_NAME, "dog")), + new PrefixQuery(new Term(STRING_FIELD_NAME, "red")) + )); + } + + public void testAnalysisSingleTerm() throws Exception { + final MatchBoolPrefixQueryBuilder builder = new MatchBoolPrefixQueryBuilder(STRING_FIELD_NAME, "foo"); + final Query query = builder.toQuery(createShardContext()); + assertThat(query, equalTo(new PrefixQuery(new Term(STRING_FIELD_NAME, "foo")))); + } + + private static void assertBooleanQuery(Query actual, List expectedClauseQueries) { + assertThat(actual, instanceOf(BooleanQuery.class)); + final BooleanQuery actualBooleanQuery = (BooleanQuery) actual; + assertThat(actualBooleanQuery.clauses(), hasSize(expectedClauseQueries.size())); + assertThat(actualBooleanQuery.clauses(), everyItem(hasProperty("occur", equalTo(BooleanClause.Occur.SHOULD)))); + + for (int i = 0; i < actualBooleanQuery.clauses().size(); i++) { + final Query clauseQuery = actualBooleanQuery.clauses().get(i).getQuery(); + assertThat(clauseQuery, equalTo(expectedClauseQueries.get(i))); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java index c258cce6c7c..e9f2b447da1 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CannedBinaryTokenStream; +import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; @@ -28,6 +29,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -394,6 +396,76 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase { @@ -91,10 +93,11 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase type == Type.PHRASE_PREFIX || type == Type.BOOL_PREFIX, () -> randomFrom(MultiMatchQueryBuilder.Type.values()))); } } @@ -104,7 +107,7 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase