Make fields optional in multi_match query and rely on index.query.default_field by default (#27380)

* Make fields optional in multi_match query and rely on index.query.default_field by default

This commit adds the ability to send `multi_match` query without providing any `fields`.
When no fields are provided the `multi_match` query will use the fields defined in the index setting `index.query.default_field`
(which in turns defaults to `*`).
The same behavior is already implemented in `query_string` and `simple_query_string` so this change just applies
the heuristic to `multi_match` queries.
Relying on `index.query.default_field` rather than `*` is safer for big mappings that break the 1024 field expansion limit added in 7.0 for all
text queries. For these kind of mappings the admin can change the `index.query.default_field` in order to make sure that exploratory queries using
`multi_match`, `query_string` or `simple_query_string` do not throw an exception.
This commit is contained in:
Jim Ferenczi 2017-11-17 10:25:21 +01:00 committed by GitHub
parent f761a0e0e4
commit 53462f6499
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 117 additions and 25 deletions

View File

@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
@ -36,9 +37,11 @@ import org.elasticsearch.index.query.support.QueryParsers;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MultiMatchQuery;
import org.elasticsearch.index.search.QueryParserHelper;
import org.elasticsearch.index.search.QueryStringQueryParser;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
@ -55,7 +58,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
public static final int DEFAULT_PHRASE_SLOP = MatchQuery.DEFAULT_PHRASE_SLOP;
public static final int DEFAULT_PREFIX_LENGTH = FuzzyQuery.defaultPrefixLength;
public static final int DEFAULT_MAX_EXPANSIONS = FuzzyQuery.defaultMaxExpansions;
public static final boolean DEFAULT_LENIENCY = MatchQuery.DEFAULT_LENIENCY;
public static final MatchQuery.ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = MatchQuery.DEFAULT_ZERO_TERMS_QUERY;
public static final boolean DEFAULT_FUZZY_TRANSPOSITIONS = FuzzyQuery.defaultTranspositions;
@ -91,7 +93,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
private String fuzzyRewrite = null;
private Boolean useDisMax;
private Float tieBreaker;
private boolean lenient = DEFAULT_LENIENCY;
private Boolean lenient;
private Float cutoffFrequency = null;
private MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
private boolean autoGenerateSynonymsPhraseQuery = true;
@ -223,7 +225,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
fuzzyRewrite = in.readOptionalString();
useDisMax = in.readOptionalBoolean();
tieBreaker = in.readOptionalFloat();
lenient = in.readBoolean();
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
lenient = in.readOptionalBoolean();
} else {
lenient = in.readBoolean();
}
cutoffFrequency = in.readOptionalFloat();
zeroTermsQuery = MatchQuery.ZeroTermsQuery.readFromStream(in);
if (in.getVersion().onOrAfter(Version.V_6_1_0)) {
@ -251,7 +257,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
out.writeOptionalString(fuzzyRewrite);
out.writeOptionalBoolean(useDisMax);
out.writeOptionalFloat(tieBreaker);
out.writeBoolean(lenient);
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeOptionalBoolean(lenient);
} else {
out.writeBoolean(lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient);
}
out.writeOptionalFloat(cutoffFrequency);
zeroTermsQuery.writeTo(out);
if (out.getVersion().onOrAfter(Version.V_6_1_0)) {
@ -488,7 +498,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
}
public boolean lenient() {
return lenient;
return lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient;
}
/**
@ -588,7 +598,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
if (tieBreaker != null) {
builder.field(TIE_BREAKER_FIELD.getPreferredName(), tieBreaker);
}
builder.field(LENIENT_FIELD.getPreferredName(), lenient);
if (lenient != null) {
builder.field(LENIENT_FIELD.getPreferredName(), lenient);
}
if (cutoffFrequency != null) {
builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency);
}
@ -614,7 +626,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
Boolean useDisMax = null;
Float tieBreaker = null;
Float cutoffFrequency = null;
boolean lenient = DEFAULT_LENIENCY;
Boolean lenient = null;
MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
boolean autoGenerateSynonymsPhraseQuery = true;
boolean fuzzyTranspositions = DEFAULT_FUZZY_TRANSPOSITIONS;
@ -698,16 +710,12 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
throw new ParsingException(parser.getTokenLocation(), "No text specified for multi_match query");
}
if (fieldsBoosts.isEmpty()) {
throw new ParsingException(parser.getTokenLocation(), "No fields specified for multi_match query");
}
if (fuzziness != null && (type == Type.CROSS_FIELDS || type == Type.PHRASE || type == Type.PHRASE_PREFIX)) {
throw new ParsingException(parser.getTokenLocation(),
"Fuzziness not allowed for type [" + type.parseField.getPreferredName() + "]");
}
return new MultiMatchQueryBuilder(value)
MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder(value)
.fields(fieldsBoosts)
.type(type)
.analyzer(analyzer)
@ -715,7 +723,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
.fuzziness(fuzziness)
.fuzzyRewrite(fuzzyRewrite)
.useDisMax(useDisMax)
.lenient(lenient)
.maxExpansions(maxExpansions)
.minimumShouldMatch(minimumShouldMatch)
.operator(operator)
@ -727,6 +734,10 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
.boost(boost)
.queryName(queryName)
.fuzzyTranspositions(fuzzyTranspositions);
if (lenient != null) {
builder.lenient(lenient);
}
return builder;
}
private static void parseFieldAndBoost(XContentParser parser, Map<String, Float> fieldsBoosts) throws IOException {
@ -778,7 +789,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
if (cutoffFrequency != null) {
multiMatchQuery.setCommonTermsCutoff(cutoffFrequency);
}
multiMatchQuery.setLenient(lenient);
if (lenient != null) {
multiMatchQuery.setLenient(lenient);
}
multiMatchQuery.setZeroTermsQuery(zeroTermsQuery);
multiMatchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
multiMatchQuery.setTranspositions(fuzzyTranspositions);
@ -793,8 +806,20 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
}
}
}
Map<String, Float> newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts);
Map<String, Float> newFieldsBoosts;
if (fieldsBoosts.isEmpty()) {
// no fields provided, defaults to index.query.default_field
List<String> defaultFields = context.defaultFields();
boolean isAllField = defaultFields.size() == 1 && Regex.isMatchAllPattern(defaultFields.get(0));
if (isAllField && lenient == null) {
// Sets leniency to true if not explicitly
// set in the request
multiMatchQuery.setLenient(true);
}
newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, QueryParserHelper.parseFieldsAndWeights(defaultFields));
} else {
newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts);
}
return multiMatchQuery.parse(type, newFieldsBoosts, value, minimumShouldMatch);
}

View File

@ -32,8 +32,10 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.query.MultiMatchQueryBuilder.Type;
import org.elasticsearch.index.search.MatchQuery;
@ -41,6 +43,7 @@ import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.test.AbstractQueryTestCase;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -66,18 +69,28 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
assumeTrue("test with date fields runs only when at least a type is registered", getCurrentTypes().length > 0);
}
// creates the query with random value and field name
Object value;
final Object value;
if (fieldName.equals(STRING_FIELD_NAME)) {
value = getRandomQueryText();
} else {
value = getRandomValueForFieldName(fieldName);
}
MultiMatchQueryBuilder query = new MultiMatchQueryBuilder(value, fieldName);
// field with random boost
if (randomBoolean()) {
query.field(fieldName, randomFloat() * 10);
final MultiMatchQueryBuilder query;
if (rarely()) {
query = new MultiMatchQueryBuilder(value, fieldName);
if (randomBoolean()) {
query.lenient(randomBoolean());
}
// field with random boost
if (randomBoolean()) {
query.field(fieldName, randomFloat() * 10);
}
} else {
query = new MultiMatchQueryBuilder(value);
query.lenient(true);
}
// sets other parameters of the multi match query
if (randomBoolean()) {
query.type(randomFrom(MultiMatchQueryBuilder.Type.values()));
@ -112,9 +125,6 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
if (randomBoolean()) {
query.tieBreaker(randomFloat());
}
if (randomBoolean()) {
query.lenient(randomBoolean());
}
if (randomBoolean()) {
query.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
}
@ -338,4 +348,56 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
assertEquals(expected, query);
}
public void testDefaultField() throws Exception {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
QueryShardContext context = createShardContext();
MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("hello");
// should pass because we set lenient to true when default field is `*`
Query query = builder.toQuery(context);
assertThat(query, instanceOf(DisjunctionMaxQuery.class));
context.getIndexSettings().updateIndexMetaData(
newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field",
STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5").build())
);
MultiMatchQueryBuilder qb = new MultiMatchQueryBuilder("hello");
query = qb.toQuery(context);
DisjunctionMaxQuery expected = new DisjunctionMaxQuery(
Arrays.asList(
new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f)
), 0.0f
);
assertEquals(expected, query);
context.getIndexSettings().updateIndexMetaData(
newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field",
STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5", INT_FIELD_NAME).build())
);
// should fail because lenient defaults to false
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> qb.toQuery(context));
assertThat(exc, instanceOf(NumberFormatException.class));
assertThat(exc.getMessage(), equalTo("For input string: \"hello\""));
// explicitly sets lenient
qb.lenient(true);
query = qb.toQuery(context);
expected = new DisjunctionMaxQuery(
Arrays.asList(
new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f),
new MatchNoDocsQuery("failed [mapped_int] query, caused by number_format_exception:[For input string: \"hello\"]")
), 0.0f
);
assertEquals(expected, query);
}
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
Settings build = Settings.builder().put(oldIndexSettings)
.put(indexSettings)
.build();
return IndexMetaData.builder(name).settings(build).build();
}
}

View File

@ -58,6 +58,11 @@ GET /_search
<1> The `subject` field is three times as important as the `message` field.
If no `fields` are provided, the `multi_match` query defaults to the `index.query.default_field`
index settings, which in turn defaults to `*`. `*` extracts all fields in the mapping that
are eligible to term queries and filters the metadata fields. All extracted fields are then
combined to build a query.
WARNING: There is a limit of no more than 1024 fields being queried at once.
[[multi-match-types]]