Add support for auto_generate_synonyms_phrase_query in match_query, multi_match_query, query_string and simple_query_string (#26097)

* Add support for auto_generate_synonyms_phrase_query in match_query, multi_match_query, query_string and simple_query_string

This change adds a new parameter called auto_generate_synonyms_phrase_query (defaults to true).
This option can be used in conjunction with synonym_graph token filter to generate phrase queries
when multi terms synonyms are encountered.
For example, a synonym like "ny, new york" would produce the following boolean query when "ny city" is parsed:
((ny OR "new york") AND city)

Note how the multi terms synonym "new york" produces a phrase query.
This commit is contained in:
Jim Ferenczi 2017-08-09 12:15:09 +02:00 committed by GitHub
parent 8cb1391f40
commit a7e1610134
18 changed files with 331 additions and 17 deletions

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.query;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
@ -55,6 +56,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
public static final ParseField ANALYZER_FIELD = new ParseField("analyzer");
public static final ParseField TYPE_FIELD = new ParseField("type").withAllDeprecated("match_phrase and match_phrase_prefix query");
public static final ParseField QUERY_FIELD = new ParseField("query");
public static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
/** The name for the match query */
public static final String NAME = "match";
@ -98,6 +100,8 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
private Float cutoffFrequency = null;
private boolean autoGenerateSynonymsPhraseQuery = true;
/**
* Constructs a new match query.
*/
@ -133,6 +137,9 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
fuzzyRewrite = in.readOptionalString();
fuzziness = in.readOptionalWriteable(Fuzziness::new);
cutoffFrequency = in.readOptionalFloat();
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
autoGenerateSynonymsPhraseQuery = in.readBoolean();
}
}
@Override
@ -153,6 +160,9 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
out.writeOptionalString(fuzzyRewrite);
out.writeOptionalWriteable(fuzziness);
out.writeOptionalFloat(cutoffFrequency);
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(autoGenerateSynonymsPhraseQuery);
}
}
/** Returns the field name used in this query. */
@ -395,6 +405,20 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
return this.zeroTermsQuery;
}
public MatchQueryBuilder autoGenerateSynonymsPhraseQuery(boolean enable) {
this.autoGenerateSynonymsPhraseQuery = enable;
return this;
}
/**
* Whether phrase queries should be automatically generated for multi terms synonyms.
* Defaults to <tt>true</tt>.
*/
public boolean autoGenerateSynonymsPhraseQuery() {
return autoGenerateSynonymsPhraseQuery;
}
@Override
public void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
@ -431,6 +455,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
if (cutoffFrequency != null) {
builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency);
}
builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery);
printBoostAndQueryName(builder);
builder.endObject();
builder.endObject();
@ -457,6 +482,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
matchQuery.setLenient(lenient);
matchQuery.setCommonTermsCutoff(cutoffFrequency);
matchQuery.setZeroTermsQuery(zeroTermsQuery);
matchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
Query query = matchQuery.parse(type, fieldName, value);
return Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch);
@ -478,14 +504,15 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
Objects.equals(lenient, other.lenient) &&
Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions) &&
Objects.equals(zeroTermsQuery, other.zeroTermsQuery) &&
Objects.equals(cutoffFrequency, other.cutoffFrequency);
Objects.equals(cutoffFrequency, other.cutoffFrequency) &&
Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
}
@Override
protected int doHashCode() {
return Objects.hash(fieldName, value, type, operator, analyzer, slop,
fuzziness, prefixLength, maxExpansions, minimumShouldMatch,
fuzzyRewrite, lenient, fuzzyTranspositions, zeroTermsQuery, cutoffFrequency);
fuzzyRewrite, lenient, fuzzyTranspositions, zeroTermsQuery, cutoffFrequency, autoGenerateSynonymsPhraseQuery);
}
@Override
@ -510,6 +537,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
boolean lenient = MatchQuery.DEFAULT_LENIENCY;
Float cutOffFrequency = null;
ZeroTermsQuery zeroTermsQuery = MatchQuery.DEFAULT_ZERO_TERMS_QUERY;
boolean autoGenerateSynonymsPhraseQuery = true;
String queryName = null;
String currentFieldName = null;
XContentParser.Token token;
@ -572,6 +600,8 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
}
} else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
queryName = parser.text();
} else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
autoGenerateSynonymsPhraseQuery = parser.booleanValue();
} else {
throw new ParsingException(parser.getTokenLocation(),
"[" + NAME + "] query does not support [" + currentFieldName + "]");
@ -610,6 +640,7 @@ public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> {
matchQuery.cutoffFrequency(cutOffFrequency);
}
matchQuery.zeroTermsQuery(zeroTermsQuery);
matchQuery.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
matchQuery.queryName(queryName);
matchQuery.boost(boost);
return matchQuery;

View File

@ -22,17 +22,16 @@ package org.elasticsearch.index.query;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.support.QueryParsers;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MultiMatchQuery;
@ -74,6 +73,8 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
private static final ParseField TYPE_FIELD = new ParseField("type");
private static final ParseField QUERY_FIELD = new ParseField("query");
private static final ParseField FIELDS_FIELD = new ParseField("fields");
private static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
private final Object value;
private final Map<String, Float> fieldsBoosts;
@ -91,6 +92,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
private boolean lenient = DEFAULT_LENIENCY;
private Float cutoffFrequency = null;
private MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
private boolean autoGenerateSynonymsPhraseQuery = true;
public enum Type implements Writeable {
@ -221,6 +223,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
lenient = in.readBoolean();
cutoffFrequency = in.readOptionalFloat();
zeroTermsQuery = MatchQuery.ZeroTermsQuery.readFromStream(in);
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
autoGenerateSynonymsPhraseQuery = in.readBoolean();
}
}
@Override
@ -245,6 +250,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
out.writeBoolean(lenient);
out.writeOptionalFloat(cutoffFrequency);
zeroTermsQuery.writeTo(out);
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(autoGenerateSynonymsPhraseQuery);
}
}
public Object value() {
@ -514,6 +522,19 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
return zeroTermsQuery;
}
public MultiMatchQueryBuilder autoGenerateSynonymsPhraseQuery(boolean enable) {
this.autoGenerateSynonymsPhraseQuery = enable;
return this;
}
/**
* Whether phrase queries should be automatically generated for multi terms synonyms.
* Defaults to <tt>true</tt>.
*/
public boolean autoGenerateSynonymsPhraseQuery() {
return autoGenerateSynonymsPhraseQuery;
}
@Override
public void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
@ -551,6 +572,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency);
}
builder.field(ZERO_TERMS_QUERY_FIELD.getPreferredName(), zeroTermsQuery.toString());
builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery);
printBoostAndQueryName(builder);
builder.endObject();
}
@ -572,6 +594,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
Float cutoffFrequency = null;
boolean lenient = DEFAULT_LENIENCY;
MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
boolean autoGenerateSynonymsPhraseQuery = true;
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
String queryName = null;
@ -634,6 +657,8 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
}
} else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
queryName = parser.text();
} else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
autoGenerateSynonymsPhraseQuery = parser.booleanValue();
} else {
throw new ParsingException(parser.getTokenLocation(),
"[" + NAME + "] query does not support [" + currentFieldName + "]");
@ -673,6 +698,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
.slop(slop)
.tieBreaker(tieBreaker)
.zeroTermsQuery(zeroTermsQuery)
.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery)
.boost(boost)
.queryName(queryName);
}
@ -728,6 +754,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
}
multiMatchQuery.setLenient(lenient);
multiMatchQuery.setZeroTermsQuery(zeroTermsQuery);
multiMatchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
if (useDisMax != null) { // backwards foobar
boolean typeUsesDismax = type.tieBreaker() != 1.0f;
@ -748,7 +775,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
protected int doHashCode() {
return Objects.hash(value, fieldsBoosts, type, operator, analyzer, slop, fuzziness,
prefixLength, maxExpansions, minimumShouldMatch, fuzzyRewrite, useDisMax, tieBreaker, lenient,
cutoffFrequency, zeroTermsQuery);
cutoffFrequency, zeroTermsQuery, autoGenerateSynonymsPhraseQuery);
}
@Override
@ -768,6 +795,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
Objects.equals(tieBreaker, other.tieBreaker) &&
Objects.equals(lenient, other.lenient) &&
Objects.equals(cutoffFrequency, other.cutoffFrequency) &&
Objects.equals(zeroTermsQuery, other.zeroTermsQuery);
Objects.equals(zeroTermsQuery, other.zeroTermsQuery) &&
Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
}
}

View File

@ -102,6 +102,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
private static final ParseField ALL_FIELDS_FIELD = new ParseField("all_fields")
.withAllDeprecated("Set [default_field] to `*` instead");
private static final ParseField TYPE_FIELD = new ParseField("type");
private static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
private final String queryString;
@ -157,6 +158,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
/** To limit effort spent determinizing regexp queries. */
private int maxDeterminizedStates = DEFAULT_MAX_DETERMINED_STATES;
private boolean autoGenerateSynonymsPhraseQuery = true;
public QueryStringQueryBuilder(String queryString) {
if (queryString == null) {
throw new IllegalArgumentException("query text missing");
@ -219,6 +222,9 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
}
}
}
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
autoGenerateSynonymsPhraseQuery = in.readBoolean();
}
}
@Override
@ -271,6 +277,9 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
out.writeOptionalBoolean(useAllFields);
}
}
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(autoGenerateSynonymsPhraseQuery);
}
}
public String queryString() {
@ -625,6 +634,19 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
return false;
}
public QueryStringQueryBuilder autoGenerateSynonymsPhraseQuery(boolean value) {
this.autoGenerateSynonymsPhraseQuery = value;
return this;
}
/**
* Whether phrase queries should be automatically generated for multi terms synonyms.
* Defaults to <tt>true</tt>.
*/
public boolean autoGenerateSynonymsPhraseQuery() {
return autoGenerateSynonymsPhraseQuery;
}
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
@ -682,6 +704,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
builder.field(TIME_ZONE_FIELD.getPreferredName(), this.timeZone.getID());
}
builder.field(ESCAPE_FIELD.getPreferredName(), this.escape);
builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery);
printBoostAndQueryName(builder);
builder.endObject();
}
@ -714,6 +737,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
String fuzzyRewrite = null;
String rewrite = null;
Map<String, Float> fieldsAndWeights = new HashMap<>();
boolean autoGenerateSynonymsPhraseQuery = true;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
@ -799,6 +823,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
}
} else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
queryName = parser.text();
} else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
autoGenerateSynonymsPhraseQuery = parser.booleanValue();
} else if (AUTO_GENERATE_PHRASE_QUERIES_FIELD.match(currentFieldName)) {
// ignore, deprecated setting
} else if (LOWERCASE_EXPANDED_TERMS_FIELD.match(currentFieldName)) {
@ -849,6 +875,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
queryStringQuery.timeZone(timeZone);
queryStringQuery.boost(boost);
queryStringQuery.queryName(queryName);
queryStringQuery.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
return queryStringQuery;
}
@ -882,7 +909,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
timeZone == null ? other.timeZone == null : other.timeZone != null &&
Objects.equals(timeZone.getID(), other.timeZone.getID()) &&
Objects.equals(escape, other.escape) &&
Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates);
Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates) &&
Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
}
@Override
@ -891,7 +919,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
quoteFieldSuffix, allowLeadingWildcard, analyzeWildcard,
enablePositionIncrements, fuzziness, fuzzyPrefixLength,
fuzzyMaxExpansions, fuzzyRewrite, phraseSlop, type, tieBreaker, rewrite, minimumShouldMatch, lenient,
timeZone == null ? 0 : timeZone.getID(), escape, maxDeterminizedStates);
timeZone == null ? 0 : timeZone.getID(), escape, maxDeterminizedStates, autoGenerateSynonymsPhraseQuery);
}
@Override
@ -963,6 +991,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue
queryParser.setMultiTermRewriteMethod(QueryParsers.parseRewriteMethod(this.rewrite));
queryParser.setTimeZone(timeZone);
queryParser.setMaxDeterminizedStates(maxDeterminizedStates);
queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
Query query;
try {

View File

@ -300,6 +300,8 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
private boolean analyzeWildcard = SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD;
/** Specifies a suffix, if any, to apply to field names for phrase matching. */
private String quoteFieldSuffix = null;
/** Whether phrase queries should be automatically generated for multi terms synonyms. */
private boolean autoGenerateSynonymsPhraseQuery = true;
/**
* Generates default {@link Settings} object (uses ROOT locale, does
@ -312,6 +314,7 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
this.lenient = other.lenient;
this.analyzeWildcard = other.analyzeWildcard;
this.quoteFieldSuffix = other.quoteFieldSuffix;
this.autoGenerateSynonymsPhraseQuery = other.autoGenerateSynonymsPhraseQuery;
}
/** Specifies whether to use lenient parsing, defaults to false. */
@ -349,9 +352,21 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
return quoteFieldSuffix;
}
public void autoGenerateSynonymsPhraseQuery(boolean value) {
this.autoGenerateSynonymsPhraseQuery = value;
}
/**
* Whether phrase queries should be automatically generated for multi terms synonyms.
* Defaults to <tt>true</tt>.
*/
public boolean autoGenerateSynonymsPhraseQuery() {
return autoGenerateSynonymsPhraseQuery;
}
@Override
public int hashCode() {
return Objects.hash(lenient, analyzeWildcard, quoteFieldSuffix);
return Objects.hash(lenient, analyzeWildcard, quoteFieldSuffix, autoGenerateSynonymsPhraseQuery);
}
@Override
@ -363,8 +378,10 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp
return false;
}
Settings other = (Settings) obj;
return Objects.equals(lenient, other.lenient) && Objects.equals(analyzeWildcard, other.analyzeWildcard)
&& Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix);
return Objects.equals(lenient, other.lenient) &&
Objects.equals(analyzeWildcard, other.analyzeWildcard) &&
Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix) &&
Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery);
}
}
}

View File

@ -105,6 +105,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
private static final ParseField FIELDS_FIELD = new ParseField("fields");
private static final ParseField QUOTE_FIELD_SUFFIX_FIELD = new ParseField("quote_field_suffix");
private static final ParseField ALL_FIELDS_FIELD = new ParseField("all_fields");
private static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");
/** Query text to parse. */
private final String queryText;
@ -174,6 +175,9 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
settings.quoteFieldSuffix(in.readOptionalString());
useAllFields = in.readOptionalBoolean();
}
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
settings.autoGenerateSynonymsPhraseQuery(in.readBoolean());
}
}
@Override
@ -203,6 +207,9 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
out.writeOptionalString(settings.quoteFieldSuffix());
out.writeOptionalBoolean(useAllFields);
}
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(settings.autoGenerateSynonymsPhraseQuery());
}
}
/** Returns the text to parse the query from. */
@ -358,6 +365,20 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
return minimumShouldMatch;
}
public SimpleQueryStringBuilder autoGenerateSynonymsPhraseQuery(boolean value) {
this.settings.autoGenerateSynonymsPhraseQuery(value);
return this;
}
/**
* Whether phrase queries should be automatically generated for multi terms synonyms.
* Defaults to <tt>true</tt>.
*/
public boolean autoGenerateSynonymsPhraseQuery() {
return settings.autoGenerateSynonymsPhraseQuery();
}
@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
// field names in builder can have wildcards etc, need to resolve them here
@ -459,7 +480,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
if (useAllFields != null) {
builder.field(ALL_FIELDS_FIELD.getPreferredName(), useAllFields);
}
builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), settings.autoGenerateSynonymsPhraseQuery());
printBoostAndQueryName(builder);
builder.endObject();
}
@ -478,6 +499,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
boolean analyzeWildcard = SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD;
String quoteFieldSuffix = null;
Boolean useAllFields = null;
boolean autoGenerateSynonymsPhraseQuery = true;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
@ -543,6 +565,8 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
quoteFieldSuffix = parser.textOrNull();
} else if (ALL_FIELDS_FIELD.match(currentFieldName)) {
useAllFields = parser.booleanValue();
} else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName)) {
autoGenerateSynonymsPhraseQuery = parser.booleanValue();
} else {
throw new ParsingException(parser.getTokenLocation(), "[" + SimpleQueryStringBuilder.NAME +
"] unsupported field [" + parser.currentName() + "]");
@ -571,6 +595,7 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder<SimpleQuerySt
}
qb.analyzeWildcard(analyzeWildcard).boost(boost).quoteFieldSuffix(quoteFieldSuffix);
qb.useAllFields(useAllFields);
qb.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
return qb;
}

View File

@ -167,6 +167,8 @@ public class MatchQuery {
protected Float commonTermsCutoff = null;
protected boolean autoGenerateSynonymsPhraseQuery = true;
public MatchQuery(QueryShardContext context) {
this.context = context;
}
@ -226,6 +228,10 @@ public class MatchQuery {
this.zeroTermsQuery = zeroTermsQuery;
}
public void setAutoGenerateSynonymsPhraseQuery(boolean enabled) {
this.autoGenerateSynonymsPhraseQuery = enabled;
}
protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) {
if (analyzer == null) {
return quoted ? context.getSearchQuoteAnalyzer(fieldType) : context.getSearchAnalyzer(fieldType);
@ -258,6 +264,7 @@ public class MatchQuery {
assert analyzer != null;
MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType);
builder.setEnablePositionIncrements(this.enablePositionIncrements);
builder.setAutoGenerateMultiTermSynonymsPhraseQuery(this.autoGenerateSynonymsPhraseQuery);
Query query = null;
switch (type) {

View File

@ -321,6 +321,11 @@ public class QueryStringQueryParser extends XQueryParser {
this.groupTieBreaker = groupTieBreaker;
}
@Override
public void setAutoGenerateMultiTermSynonymsPhraseQuery(boolean enable) {
queryBuilder.setAutoGenerateSynonymsPhraseQuery(enable);
}
private Query applyBoost(Query q, Float boost) {
if (boost != null && boost != 1f) {
return new BoostQuery(q, boost);

View File

@ -119,6 +119,10 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
if (randomBoolean()) {
matchQuery.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
}
if (randomBoolean()) {
matchQuery.autoGenerateSynonymsPhraseQuery(randomBoolean());
}
return matchQuery;
}
@ -274,6 +278,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
" \"fuzzy_transpositions\" : true,\n" +
" \"lenient\" : false,\n" +
" \"zero_terms_query\" : \"ALL\",\n" +
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
" \"boost\" : 1.0\n" +
" }\n" +
" }\n" +
@ -302,6 +307,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
" \"fuzzy_transpositions\" : true,\n" +
" \"lenient\" : false,\n" +
" \"zero_terms_query\" : \"NONE\",\n" +
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
" \"boost\" : 1.0\n" +
" }\n" +
" }\n" +
@ -333,6 +339,7 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
" \"fuzzy_transpositions\" : true,\n" +
" \"lenient\" : false,\n" +
" \"zero_terms_query\" : \"NONE\",\n" +
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
" \"boost\" : 1.0\n" +
" }\n" +
" }\n" +

View File

@ -121,6 +121,9 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
if (randomBoolean()) {
query.zeroTermsQuery(randomFrom(MatchQuery.ZeroTermsQuery.values()));
}
if (randomBoolean()) {
query.autoGenerateSynonymsPhraseQuery(randomBoolean());
}
// test with fields with boost and patterns delegated to the tests further below
return query;
}
@ -238,6 +241,7 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
" \"max_expansions\" : 50,\n" +
" \"lenient\" : false,\n" +
" \"zero_terms_query\" : \"NONE\",\n" +
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
" \"boost\" : 1.0\n" +
" }\n" +
"}";

View File

@ -157,6 +157,7 @@ public class NestedQueryBuilderTests extends AbstractQueryTestCase<NestedQueryBu
" \"fuzzy_transpositions\" : true,\n" +
" \"lenient\" : false,\n" +
" \"zero_terms_query\" : \"NONE\",\n" +
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
" \"boost\" : 1.0\n" +
" }\n" +
" }\n" +

View File

@ -160,6 +160,9 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
if (randomBoolean()) {
queryStringQueryBuilder.timeZone(randomDateTimeZone().getID());
}
if (randomBoolean()) {
queryStringQueryBuilder.autoGenerateSynonymsPhraseQuery(randomBoolean());
}
queryStringQueryBuilder.type(randomFrom(MultiMatchQueryBuilder.Type.values()));
return queryStringQueryBuilder;
}
@ -375,6 +378,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
queryParser.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
queryParser.setDefaultOperator(op.toQueryParserOperator());
queryParser.setForceAnalyzer(new MockSynonymAnalyzer());
queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(false);
// simple multi-term
Query query = queryParser.parse("guinea pig");
@ -393,6 +397,21 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
defaultOp).build();
assertThat(query, Matchers.equalTo(expectedQuery));
queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
// simple multi-term with phrase query
query = queryParser.parse("guinea pig");
expectedQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(new PhraseQuery.Builder()
.add(new Term(STRING_FIELD_NAME, "guinea"))
.add(new Term(STRING_FIELD_NAME, "pig"))
.build(), Occur.SHOULD)
.add(new TermQuery(new Term(STRING_FIELD_NAME, "cavy")), Occur.SHOULD)
.build(), defaultOp)
.build();
assertThat(query, Matchers.equalTo(expectedQuery));
queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(false);
// simple with additional tokens
query = queryParser.parse("that guinea pig smells");
expectedQuery = new BooleanQuery.Builder()
@ -850,6 +869,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
" \"fuzzy_max_expansions\" : 50,\n" +
" \"phrase_slop\" : 0,\n" +
" \"escape\" : false,\n" +
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
" \"boost\" : 1.0\n" +
" }\n" +
"}";

View File

@ -95,7 +95,9 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
}
}
result.fields(fields);
if (randomBoolean()) {
result.autoGenerateSynonymsPhraseQuery(randomBoolean());
}
return result;
}
@ -340,6 +342,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
" \"lenient\" : false,\n" +
" \"analyze_wildcard\" : false,\n" +
" \"quote_field_suffix\" : \".quote\",\n" +
" \"auto_generate_synonyms_phrase_query\" : true,\n" +
" \"boost\" : 1.0\n" +
" }\n" +
"}";

View File

@ -141,8 +141,12 @@ public class MatchQueryIT extends ESIntegTestCase {
indexRandom(true, false, getDocs());
// no min should match
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setQuery(
QueryBuilders.matchQuery("field", "three what the fudge foo")
.operator(Operator.OR).analyzer("lower_graphsyns").autoGenerateSynonymsPhraseQuery(false)
)
.get();
assertHitCount(searchResponse, 6L);
assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6");
@ -159,6 +163,19 @@ public class MatchQueryIT extends ESIntegTestCase {
assertSearchHits(searchResponse, "1", "2", "6");
}
public void testMultiTermsSynonymsPhrase() throws ExecutionException, InterruptedException {
List<IndexRequestBuilder> builders = getDocs();
indexRandom(true, false, builders);
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setQuery(
QueryBuilders.matchQuery("field", "wtf")
.analyzer("lower_graphsyns")
.operator(Operator.AND))
.get();
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "3");
}
public void testPhrasePrefix() throws ExecutionException, InterruptedException {
List<IndexRequestBuilder> builders = getDocs();
builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!"));

View File

@ -316,6 +316,7 @@ public class QueryStringIT extends ESIntegTestCase {
QueryBuilders.queryStringQuery("say what the fudge")
.defaultField("field")
.defaultOperator(Operator.AND)
.autoGenerateSynonymsPhraseQuery(false)
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 1L);
@ -326,6 +327,7 @@ public class QueryStringIT extends ESIntegTestCase {
QueryBuilders.queryStringQuery("three what the fudge foo")
.defaultField("field")
.defaultOperator(Operator.OR)
.autoGenerateSynonymsPhraseQuery(false)
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 6L);
@ -336,11 +338,22 @@ public class QueryStringIT extends ESIntegTestCase {
QueryBuilders.queryStringQuery("three what the fudge foo")
.defaultField("field")
.defaultOperator(Operator.OR)
.autoGenerateSynonymsPhraseQuery(false)
.analyzer("lower_graphsyns")
.minimumShouldMatch("80%")).get();
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "6");
// multi terms synonyms phrase
searchResponse = client().prepareSearch(index).setQuery(
QueryBuilders.queryStringQuery("what the fudge")
.defaultField("field")
.defaultOperator(Operator.AND)
.analyzer("lower_graphsyns"))
.get();
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "3");
}
private void assertHits(SearchHits hits, String... ids) {

View File

@ -143,6 +143,41 @@ IMPORTANT: The `cutoff_frequency` option operates on a per-shard-level. This mea
that when trying it out on test indexes with low document numbers you
should follow the advice in {defguide}/relevance-is-broken.html[Relevance is broken].
[[query-dsl-match-query-synonyms]]
===== Synonyms
The `match` query supports multi-terms synonym expansion with the <<analysis-synonym-graph-tokenfilter,
synonym_graph>> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms.
For example, the following synonym: `"ny, new york" would produce:`
`(ny OR ("new york"))`
It is also possible to match multi terms synonyms with conjunctions instead:
[source,js]
--------------------------------------------------
GET /_search
{
"query": {
"match" : {
"message": {
"query" : "ny city",
"auto_generate_synonyms_phrase_query" : false
}
}
}
}
--------------------------------------------------
// CONSOLE
The example above creates a boolean query:
`(ny OR (new AND york)) city)`
that matches documents with the term `ny` or the conjunction `new AND york`.
By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`.
.Comparison to query_string / field
**************************************************

View File

@ -136,8 +136,8 @@ follows:
* plus `tie_breaker * _score` for all other matching fields
Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`,
`fuzziness`, `lenient`, `prefix_length`, `max_expansions`, `rewrite`, `zero_terms_query`
and `cutoff_frequency`, as explained in <<query-dsl-match-query, match query>>.
`fuzziness`, `lenient`, `prefix_length`, `max_expansions`, `rewrite`, `zero_terms_query`,
`cutoff_frequency` and `auto_generate_synonyms_phrase_query`, as explained in <<query-dsl-match-query, match query>>.
[IMPORTANT]
[[operator-min]]

View File

@ -110,6 +110,9 @@ the query string. This allows to use a field that has a different analysis chain
for exact matching. Look <<mixing-exact-search-with-stemming,here>> for a
comprehensive example.
|`auto_generate_synonyms_phrase_query` |Whether phrase queries should be automatically generated for multi terms synonyms.
Defaults to `true`.
|`all_fields` | deprecated[6.0.0, set `default_field` to `*` instead]
Perform the query on all fields detected in the mapping that can
be queried. Will be used by default when the `_all` field is disabled and no
@ -273,4 +276,37 @@ GET /_search
--------------------------------------------------
// CONSOLE
[float]
==== Synonyms
The `query_string` query supports multi-terms synonym expansion with the <<analysis-synonym-graph-tokenfilter,
synonym_graph>> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms.
For example, the following synonym: `"ny, new york" would produce:`
`(ny OR ("new york"))`
It is also possible to match multi terms synonyms with conjunctions instead:
[source,js]
--------------------------------------------------
GET /_search
{
"query": {
"query_string" : {
"default_field": "title",
"query" : "ny city",
"auto_generate_synonyms_phrase_query" : false
}
}
}
--------------------------------------------------
// CONSOLE
The example above creates a boolean query:
`(ny OR (new AND york)) city)`
that matches documents with the term `ny` or the conjunction `new AND york`.
By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`.
include::query-string-syntax.asciidoc[]

View File

@ -62,6 +62,9 @@ the query string. This allows to use a field that has a different analysis chain
for exact matching. Look <<mixing-exact-search-with-stemming,here>> for a
comprehensive example.
|`auto_generate_synonyms_phrase_query` |Whether phrase queries should be automatically generated for multi terms synonyms.
Defaults to `true`.
|`all_fields` | Perform the query on all fields detected in the mapping that can
be queried. Will be used by default when the `_all` field is disabled and no
`default_field` is specified index settings, and no `fields` are specified.
@ -160,3 +163,36 @@ GET /_search
The available flags are: `ALL`, `NONE`, `AND`, `OR`, `NOT`, `PREFIX`, `PHRASE`,
`PRECEDENCE`, `ESCAPE`, `WHITESPACE`, `FUZZY`, `NEAR`, and `SLOP`.
[float]
==== Synonyms
The `simple_query_string` query supports multi-terms synonym expansion with the <<analysis-synonym-graph-tokenfilter,
synonym_graph>> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms.
For example, the following synonym: `"ny, new york" would produce:`
`(ny OR ("new york"))`
It is also possible to match multi terms synonyms with conjunctions instead:
[source,js]
--------------------------------------------------
GET /_search
{
"query": {
"simple_query_string" : {
"query" : "ny city",
"auto_generate_synonyms_phrase_query" : false
}
}
}
--------------------------------------------------
// CONSOLE
The example above creates a boolean query:
`(ny OR (new AND york)) city)`
that matches documents with the term `ny` or the conjunction `new AND york`.
By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`.