From 53462f6499617c6152718a5b7b2e618a8d093dba Mon Sep 17 00:00:00 2001 From: Jim Ferenczi <jim.ferenczi@elastic.co> Date: Fri, 17 Nov 2017 10:25:21 +0100 Subject: [PATCH] Make fields optional in multi_match query and rely on index.query.default_field by default (#27380) * Make fields optional in multi_match query and rely on index.query.default_field by default This commit adds the ability to send `multi_match` query without providing any `fields`. When no fields are provided the `multi_match` query will use the fields defined in the index setting `index.query.default_field` (which in turns defaults to `*`). The same behavior is already implemented in `query_string` and `simple_query_string` so this change just applies the heuristic to `multi_match` queries. Relying on `index.query.default_field` rather than `*` is safer for big mappings that break the 1024 field expansion limit added in 7.0 for all text queries. For these kind of mappings the admin can change the `index.query.default_field` in order to make sure that exploratory queries using `multi_match`, `query_string` or `simple_query_string` do not throw an exception. --- .../index/query/MultiMatchQueryBuilder.java | 57 +++++++++---- .../query/MultiMatchQueryBuilderTests.java | 80 ++++++++++++++++--- .../query-dsl/multi-match-query.asciidoc | 5 ++ 3 files changed, 117 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index 6063b8a1204..c9a5e7e52e9 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -36,9 +37,11 @@ import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.search.MultiMatchQuery; import org.elasticsearch.index.search.QueryParserHelper; +import org.elasticsearch.index.search.QueryStringQueryParser; import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -55,7 +58,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery public static final int DEFAULT_PHRASE_SLOP = MatchQuery.DEFAULT_PHRASE_SLOP; public static final int DEFAULT_PREFIX_LENGTH = FuzzyQuery.defaultPrefixLength; public static final int DEFAULT_MAX_EXPANSIONS = FuzzyQuery.defaultMaxExpansions; - public static final boolean DEFAULT_LENIENCY = MatchQuery.DEFAULT_LENIENCY; public static final MatchQuery.ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = MatchQuery.DEFAULT_ZERO_TERMS_QUERY; public static final boolean DEFAULT_FUZZY_TRANSPOSITIONS = FuzzyQuery.defaultTranspositions; @@ -91,7 +93,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery private String fuzzyRewrite = null; private Boolean useDisMax; private Float tieBreaker; - private boolean lenient = DEFAULT_LENIENCY; + private Boolean lenient; private Float cutoffFrequency = null; private MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY; private boolean autoGenerateSynonymsPhraseQuery = true; @@ -223,7 +225,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery fuzzyRewrite = in.readOptionalString(); useDisMax = in.readOptionalBoolean(); tieBreaker = in.readOptionalFloat(); - lenient = in.readBoolean(); + if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { + lenient = in.readOptionalBoolean(); + } else { + lenient = in.readBoolean(); + } cutoffFrequency = in.readOptionalFloat(); zeroTermsQuery = MatchQuery.ZeroTermsQuery.readFromStream(in); if (in.getVersion().onOrAfter(Version.V_6_1_0)) { @@ -251,7 +257,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery out.writeOptionalString(fuzzyRewrite); out.writeOptionalBoolean(useDisMax); out.writeOptionalFloat(tieBreaker); - out.writeBoolean(lenient); + if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { + out.writeOptionalBoolean(lenient); + } else { + out.writeBoolean(lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient); + } out.writeOptionalFloat(cutoffFrequency); zeroTermsQuery.writeTo(out); if (out.getVersion().onOrAfter(Version.V_6_1_0)) { @@ -488,7 +498,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery } public boolean lenient() { - return lenient; + return lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient; } /** @@ -588,7 +598,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery if (tieBreaker != null) { builder.field(TIE_BREAKER_FIELD.getPreferredName(), tieBreaker); } - builder.field(LENIENT_FIELD.getPreferredName(), lenient); + if (lenient != null) { + builder.field(LENIENT_FIELD.getPreferredName(), lenient); + } if (cutoffFrequency != null) { builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency); } @@ -614,7 +626,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery Boolean useDisMax = null; Float tieBreaker = null; Float cutoffFrequency = null; - boolean lenient = DEFAULT_LENIENCY; + Boolean lenient = null; MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY; boolean autoGenerateSynonymsPhraseQuery = true; boolean fuzzyTranspositions = DEFAULT_FUZZY_TRANSPOSITIONS; @@ -698,16 +710,12 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery throw new ParsingException(parser.getTokenLocation(), "No text specified for multi_match query"); } - if (fieldsBoosts.isEmpty()) { - throw new ParsingException(parser.getTokenLocation(), "No fields specified for multi_match query"); - } - if (fuzziness != null && (type == Type.CROSS_FIELDS || type == Type.PHRASE || type == Type.PHRASE_PREFIX)) { throw new ParsingException(parser.getTokenLocation(), "Fuzziness not allowed for type [" + type.parseField.getPreferredName() + "]"); } - return new MultiMatchQueryBuilder(value) + MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder(value) .fields(fieldsBoosts) .type(type) .analyzer(analyzer) @@ -715,7 +723,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery .fuzziness(fuzziness) .fuzzyRewrite(fuzzyRewrite) .useDisMax(useDisMax) - .lenient(lenient) .maxExpansions(maxExpansions) .minimumShouldMatch(minimumShouldMatch) .operator(operator) @@ -727,6 +734,10 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery .boost(boost) .queryName(queryName) .fuzzyTranspositions(fuzzyTranspositions); + if (lenient != null) { + builder.lenient(lenient); + } + return builder; } private static void parseFieldAndBoost(XContentParser parser, Map<String, Float> fieldsBoosts) throws IOException { @@ -778,7 +789,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery if (cutoffFrequency != null) { multiMatchQuery.setCommonTermsCutoff(cutoffFrequency); } - multiMatchQuery.setLenient(lenient); + if (lenient != null) { + multiMatchQuery.setLenient(lenient); + } multiMatchQuery.setZeroTermsQuery(zeroTermsQuery); multiMatchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); multiMatchQuery.setTranspositions(fuzzyTranspositions); @@ -793,8 +806,20 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery } } } - - Map<String, Float> newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts); + Map<String, Float> newFieldsBoosts; + if (fieldsBoosts.isEmpty()) { + // no fields provided, defaults to index.query.default_field + List<String> defaultFields = context.defaultFields(); + boolean isAllField = defaultFields.size() == 1 && Regex.isMatchAllPattern(defaultFields.get(0)); + if (isAllField && lenient == null) { + // Sets leniency to true if not explicitly + // set in the request + multiMatchQuery.setLenient(true); + } + newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, QueryParserHelper.parseFieldsAndWeights(defaultFields)); + } else { + newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts); + } return multiMatchQuery.parse(type, newFieldsBoosts, value, minimumShouldMatch); } diff --git a/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index a0afe28a17b..e81edb7dcf9 100644 --- a/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -32,8 +32,10 @@ import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.query.MultiMatchQueryBuilder.Type; import org.elasticsearch.index.search.MatchQuery; @@ -41,6 +43,7 @@ import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -66,18 +69,28 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc assumeTrue("test with date fields runs only when at least a type is registered", getCurrentTypes().length > 0); } - // creates the query with random value and field name - Object value; + final Object value; if (fieldName.equals(STRING_FIELD_NAME)) { value = getRandomQueryText(); } else { value = getRandomValueForFieldName(fieldName); } - MultiMatchQueryBuilder query = new MultiMatchQueryBuilder(value, fieldName); - // field with random boost - if (randomBoolean()) { - query.field(fieldName, randomFloat() * 10); + + final MultiMatchQueryBuilder query; + if (rarely()) { + query = new MultiMatchQueryBuilder(value, fieldName); + if (randomBoolean()) { + query.lenient(randomBoolean()); + } + // field with random boost + if (randomBoolean()) { + query.field(fieldName, randomFloat() * 10); + } + } else { + query = new MultiMatchQueryBuilder(value); + query.lenient(true); } + // sets other parameters of the multi match query if (randomBoolean()) { query.type(randomFrom(MultiMatchQueryBuilder.Type.values())); @@ -112,9 +125,6 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc if (randomBoolean()) { query.tieBreaker(randomFloat()); } - if (randomBoolean()) { - query.lenient(randomBoolean()); - } if (randomBoolean()) { query.cutoffFrequency((float) 10 / randomIntBetween(1, 100)); } @@ -338,4 +348,56 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc assertEquals(expected, query); } + + public void testDefaultField() throws Exception { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + QueryShardContext context = createShardContext(); + MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("hello"); + // should pass because we set lenient to true when default field is `*` + Query query = builder.toQuery(context); + assertThat(query, instanceOf(DisjunctionMaxQuery.class)); + + context.getIndexSettings().updateIndexMetaData( + newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field", + STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5").build()) + ); + + MultiMatchQueryBuilder qb = new MultiMatchQueryBuilder("hello"); + query = qb.toQuery(context); + DisjunctionMaxQuery expected = new DisjunctionMaxQuery( + Arrays.asList( + new TermQuery(new Term(STRING_FIELD_NAME, "hello")), + new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f) + ), 0.0f + ); + assertEquals(expected, query); + + context.getIndexSettings().updateIndexMetaData( + newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field", + STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5", INT_FIELD_NAME).build()) + ); + // should fail because lenient defaults to false + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> qb.toQuery(context)); + assertThat(exc, instanceOf(NumberFormatException.class)); + assertThat(exc.getMessage(), equalTo("For input string: \"hello\"")); + + // explicitly sets lenient + qb.lenient(true); + query = qb.toQuery(context); + expected = new DisjunctionMaxQuery( + Arrays.asList( + new TermQuery(new Term(STRING_FIELD_NAME, "hello")), + new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f), + new MatchNoDocsQuery("failed [mapped_int] query, caused by number_format_exception:[For input string: \"hello\"]") + ), 0.0f + ); + assertEquals(expected, query); + } + + private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) { + Settings build = Settings.builder().put(oldIndexSettings) + .put(indexSettings) + .build(); + return IndexMetaData.builder(name).settings(build).build(); + } } diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index 217d3a7d211..edb6ff11da7 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -58,6 +58,11 @@ GET /_search <1> The `subject` field is three times as important as the `message` field. +If no `fields` are provided, the `multi_match` query defaults to the `index.query.default_field` +index settings, which in turn defaults to `*`. `*` extracts all fields in the mapping that +are eligible to term queries and filters the metadata fields. All extracted fields are then +combined to build a query. + WARNING: There is a limit of no more than 1024 fields being queried at once. [[multi-match-types]]