diff --git a/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index edd444aae43..243bf75660f 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; @@ -55,6 +56,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { public static final ParseField ANALYZER_FIELD = new ParseField("analyzer"); public static final ParseField TYPE_FIELD = new ParseField("type").withAllDeprecated("match_phrase and match_phrase_prefix query"); public static final ParseField QUERY_FIELD = new ParseField("query"); + public static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query"); /** The name for the match query */ public static final String NAME = "match"; @@ -98,6 +100,8 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { private Float cutoffFrequency = null; + private boolean autoGenerateSynonymsPhraseQuery = true; + /** * Constructs a new match query. */ @@ -133,6 +137,9 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { fuzzyRewrite = in.readOptionalString(); fuzziness = in.readOptionalWriteable(Fuzziness::new); cutoffFrequency = in.readOptionalFloat(); + if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { + autoGenerateSynonymsPhraseQuery = in.readBoolean(); + } } @Override @@ -153,6 +160,9 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { out.writeOptionalString(fuzzyRewrite); out.writeOptionalWriteable(fuzziness); out.writeOptionalFloat(cutoffFrequency); + if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { + out.writeBoolean(autoGenerateSynonymsPhraseQuery); + } } /** Returns the field name used in this query. */ @@ -395,6 +405,20 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { return this.zeroTermsQuery; } + + public MatchQueryBuilder autoGenerateSynonymsPhraseQuery(boolean enable) { + this.autoGenerateSynonymsPhraseQuery = enable; + return this; + } + + /** + * Whether phrase queries should be automatically generated for multi terms synonyms. + * Defaults to true. + */ + public boolean autoGenerateSynonymsPhraseQuery() { + return autoGenerateSynonymsPhraseQuery; + } + @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); @@ -431,6 +455,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { if (cutoffFrequency != null) { builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency); } + builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery); printBoostAndQueryName(builder); builder.endObject(); builder.endObject(); @@ -457,6 +482,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { matchQuery.setLenient(lenient); matchQuery.setCommonTermsCutoff(cutoffFrequency); matchQuery.setZeroTermsQuery(zeroTermsQuery); + matchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); Query query = matchQuery.parse(type, fieldName, value); return Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); @@ -478,14 +504,15 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { Objects.equals(lenient, other.lenient) && Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions) && Objects.equals(zeroTermsQuery, other.zeroTermsQuery) && - Objects.equals(cutoffFrequency, other.cutoffFrequency); + Objects.equals(cutoffFrequency, other.cutoffFrequency) && + Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery); } @Override protected int doHashCode() { return Objects.hash(fieldName, value, type, operator, analyzer, slop, fuzziness, prefixLength, maxExpansions, minimumShouldMatch, - fuzzyRewrite, lenient, fuzzyTranspositions, zeroTermsQuery, cutoffFrequency); + fuzzyRewrite, lenient, fuzzyTranspositions, zeroTermsQuery, cutoffFrequency, autoGenerateSynonymsPhraseQuery); } @Override @@ -510,6 +537,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { boolean lenient = MatchQuery.DEFAULT_LENIENCY; Float cutOffFrequency = null; ZeroTermsQuery zeroTermsQuery = MatchQuery.DEFAULT_ZERO_TERMS_QUERY; + boolean autoGenerateSynonymsPhraseQuery = true; String queryName = null; String currentFieldName = null; XContentParser.Token token; @@ -572,6 +600,8 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { } } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) { queryName = parser.text(); + } else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) { + autoGenerateSynonymsPhraseQuery = parser.booleanValue(); } else { throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support [" + currentFieldName + "]"); @@ -610,6 +640,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { matchQuery.cutoffFrequency(cutOffFrequency); } matchQuery.zeroTermsQuery(zeroTermsQuery); + matchQuery.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); matchQuery.queryName(queryName); matchQuery.boost(boost); return matchQuery; diff --git a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index 89ad4f616ac..e91d18f26be 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -22,17 +22,16 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.search.MultiMatchQuery; @@ -74,6 +73,8 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder fieldsBoosts; @@ -91,6 +92,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuildertrue. + */ + public boolean autoGenerateSynonymsPhraseQuery() { + return autoGenerateSynonymsPhraseQuery; + } + @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); @@ -551,6 +572,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuildertrue. + */ + public boolean autoGenerateSynonymsPhraseQuery() { + return autoGenerateSynonymsPhraseQuery; + } + @Override protected void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); @@ -682,6 +704,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder fieldsAndWeights = new HashMap<>(); + boolean autoGenerateSynonymsPhraseQuery = true; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); @@ -799,6 +823,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuildertrue. + */ + public boolean autoGenerateSynonymsPhraseQuery() { + return autoGenerateSynonymsPhraseQuery; + } + @Override public int hashCode() { - return Objects.hash(lenient, analyzeWildcard, quoteFieldSuffix); + return Objects.hash(lenient, analyzeWildcard, quoteFieldSuffix, autoGenerateSynonymsPhraseQuery); } @Override @@ -363,8 +378,10 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp return false; } Settings other = (Settings) obj; - return Objects.equals(lenient, other.lenient) && Objects.equals(analyzeWildcard, other.analyzeWildcard) - && Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix); + return Objects.equals(lenient, other.lenient) && + Objects.equals(analyzeWildcard, other.analyzeWildcard) && + Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix) && + Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery); } } } diff --git a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java index 75db17f0e4a..c486749626a 100644 --- a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java @@ -105,6 +105,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuildertrue. + */ + public boolean autoGenerateSynonymsPhraseQuery() { + return settings.autoGenerateSynonymsPhraseQuery(); + } + + @Override protected Query doToQuery(QueryShardContext context) throws IOException { // field names in builder can have wildcards etc, need to resolve them here @@ -459,7 +480,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder builders = getDocs(); + indexRandom(true, false, builders); + SearchResponse searchResponse = client().prepareSearch(INDEX) + .setQuery( + QueryBuilders.matchQuery("field", "wtf") + .analyzer("lower_graphsyns") + .operator(Operator.AND)) + .get(); + assertHitCount(searchResponse, 3L); + assertSearchHits(searchResponse, "1", "2", "3"); + } + public void testPhrasePrefix() throws ExecutionException, InterruptedException { List builders = getDocs(); builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!")); diff --git a/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java b/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java index 9c4ff8d85b5..733a910527c 100644 --- a/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java +++ b/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java @@ -316,6 +316,7 @@ public class QueryStringIT extends ESIntegTestCase { QueryBuilders.queryStringQuery("say what the fudge") .defaultField("field") .defaultOperator(Operator.AND) + .autoGenerateSynonymsPhraseQuery(false) .analyzer("lower_graphsyns")).get(); assertHitCount(searchResponse, 1L); @@ -326,6 +327,7 @@ public class QueryStringIT extends ESIntegTestCase { QueryBuilders.queryStringQuery("three what the fudge foo") .defaultField("field") .defaultOperator(Operator.OR) + .autoGenerateSynonymsPhraseQuery(false) .analyzer("lower_graphsyns")).get(); assertHitCount(searchResponse, 6L); @@ -336,11 +338,22 @@ public class QueryStringIT extends ESIntegTestCase { QueryBuilders.queryStringQuery("three what the fudge foo") .defaultField("field") .defaultOperator(Operator.OR) + .autoGenerateSynonymsPhraseQuery(false) .analyzer("lower_graphsyns") .minimumShouldMatch("80%")).get(); assertHitCount(searchResponse, 3L); assertSearchHits(searchResponse, "1", "2", "6"); + + // multi terms synonyms phrase + searchResponse = client().prepareSearch(index).setQuery( + QueryBuilders.queryStringQuery("what the fudge") + .defaultField("field") + .defaultOperator(Operator.AND) + .analyzer("lower_graphsyns")) + .get(); + assertHitCount(searchResponse, 3L); + assertSearchHits(searchResponse, "1", "2", "3"); } private void assertHits(SearchHits hits, String... ids) { diff --git a/docs/reference/query-dsl/match-query.asciidoc b/docs/reference/query-dsl/match-query.asciidoc index c0081f1de9a..ed47a1c8f1a 100644 --- a/docs/reference/query-dsl/match-query.asciidoc +++ b/docs/reference/query-dsl/match-query.asciidoc @@ -143,6 +143,41 @@ IMPORTANT: The `cutoff_frequency` option operates on a per-shard-level. This mea that when trying it out on test indexes with low document numbers you should follow the advice in {defguide}/relevance-is-broken.html[Relevance is broken]. +[[query-dsl-match-query-synonyms]] +===== Synonyms + +The `match` query supports multi-terms synonym expansion with the <> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms. +For example, the following synonym: `"ny, new york" would produce:` + +`(ny OR ("new york"))` + +It is also possible to match multi terms synonyms with conjunctions instead: + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "match" : { + "message": { + "query" : "ny city", + "auto_generate_synonyms_phrase_query" : false + } + } + } +} +-------------------------------------------------- +// CONSOLE + +The example above creates a boolean query: + +`(ny OR (new AND york)) city)` + +that matches documents with the term `ny` or the conjunction `new AND york`. +By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`. + + .Comparison to query_string / field ************************************************** diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index 4ad75c6ca75..48c3f77d3cb 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -136,8 +136,8 @@ follows: * plus `tie_breaker * _score` for all other matching fields Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`, -`fuzziness`, `lenient`, `prefix_length`, `max_expansions`, `rewrite`, `zero_terms_query` -and `cutoff_frequency`, as explained in <>. +`fuzziness`, `lenient`, `prefix_length`, `max_expansions`, `rewrite`, `zero_terms_query`, + `cutoff_frequency` and `auto_generate_synonyms_phrase_query`, as explained in <>. [IMPORTANT] [[operator-min]] diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index a7c5371169a..992c7f5e2e8 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -110,6 +110,9 @@ the query string. This allows to use a field that has a different analysis chain for exact matching. Look <> for a comprehensive example. +|`auto_generate_synonyms_phrase_query` |Whether phrase queries should be automatically generated for multi terms synonyms. +Defaults to `true`. + |`all_fields` | deprecated[6.0.0, set `default_field` to `*` instead] Perform the query on all fields detected in the mapping that can be queried. Will be used by default when the `_all` field is disabled and no @@ -273,4 +276,37 @@ GET /_search -------------------------------------------------- // CONSOLE +[float] +==== Synonyms + +The `query_string` query supports multi-terms synonym expansion with the <> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms. +For example, the following synonym: `"ny, new york" would produce:` + +`(ny OR ("new york"))` + +It is also possible to match multi terms synonyms with conjunctions instead: + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "query_string" : { + "default_field": "title", + "query" : "ny city", + "auto_generate_synonyms_phrase_query" : false + } + } +} +-------------------------------------------------- +// CONSOLE + +The example above creates a boolean query: + +`(ny OR (new AND york)) city)` + +that matches documents with the term `ny` or the conjunction `new AND york`. +By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`. + include::query-string-syntax.asciidoc[] diff --git a/docs/reference/query-dsl/simple-query-string-query.asciidoc b/docs/reference/query-dsl/simple-query-string-query.asciidoc index 7ef7ba60cc5..1251803fca9 100644 --- a/docs/reference/query-dsl/simple-query-string-query.asciidoc +++ b/docs/reference/query-dsl/simple-query-string-query.asciidoc @@ -62,6 +62,9 @@ the query string. This allows to use a field that has a different analysis chain for exact matching. Look <> for a comprehensive example. +|`auto_generate_synonyms_phrase_query` |Whether phrase queries should be automatically generated for multi terms synonyms. +Defaults to `true`. + |`all_fields` | Perform the query on all fields detected in the mapping that can be queried. Will be used by default when the `_all` field is disabled and no `default_field` is specified index settings, and no `fields` are specified. @@ -160,3 +163,36 @@ GET /_search The available flags are: `ALL`, `NONE`, `AND`, `OR`, `NOT`, `PREFIX`, `PHRASE`, `PRECEDENCE`, `ESCAPE`, `WHITESPACE`, `FUZZY`, `NEAR`, and `SLOP`. + +[float] +==== Synonyms + +The `simple_query_string` query supports multi-terms synonym expansion with the <> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms. +For example, the following synonym: `"ny, new york" would produce:` + +`(ny OR ("new york"))` + +It is also possible to match multi terms synonyms with conjunctions instead: + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "simple_query_string" : { + "query" : "ny city", + "auto_generate_synonyms_phrase_query" : false + } + } +} +-------------------------------------------------- +// CONSOLE + +The example above creates a boolean query: + +`(ny OR (new AND york)) city)` + +that matches documents with the term `ny` or the conjunction `new AND york`. +By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`. +