From 53462f6499617c6152718a5b7b2e618a8d093dba Mon Sep 17 00:00:00 2001
From: Jim Ferenczi <jim.ferenczi@elastic.co>
Date: Fri, 17 Nov 2017 10:25:21 +0100
Subject: [PATCH] Make fields optional in multi_match query and rely on
 index.query.default_field by default (#27380)

* Make fields optional in multi_match query and rely on index.query.default_field by default

This commit adds the ability to send `multi_match` query without providing any `fields`.
When no fields are provided the `multi_match` query will use the fields defined in the index setting `index.query.default_field`
(which in turns defaults to `*`).
The same behavior is already implemented in `query_string` and `simple_query_string` so this change just applies
the heuristic to `multi_match` queries.
Relying on `index.query.default_field` rather than `*` is safer for big mappings that break the 1024 field expansion limit added in 7.0 for all
text queries. For these kind of mappings the admin can change the `index.query.default_field` in order to make sure that exploratory queries using
`multi_match`, `query_string` or `simple_query_string` do not throw an exception.
---
 .../index/query/MultiMatchQueryBuilder.java   | 57 +++++++++----
 .../query/MultiMatchQueryBuilderTests.java    | 80 ++++++++++++++++---
 .../query-dsl/multi-match-query.asciidoc      |  5 ++
 3 files changed, 117 insertions(+), 25 deletions(-)

diff --git a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java
index 6063b8a1204..c9a5e7e52e9 100644
--- a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java
+++ b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java
@@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
@@ -36,9 +37,11 @@ import org.elasticsearch.index.query.support.QueryParsers;
 import org.elasticsearch.index.search.MatchQuery;
 import org.elasticsearch.index.search.MultiMatchQuery;
 import org.elasticsearch.index.search.QueryParserHelper;
+import org.elasticsearch.index.search.QueryStringQueryParser;
 
 import java.io.IOException;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
@@ -55,7 +58,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
     public static final int DEFAULT_PHRASE_SLOP = MatchQuery.DEFAULT_PHRASE_SLOP;
     public static final int DEFAULT_PREFIX_LENGTH = FuzzyQuery.defaultPrefixLength;
     public static final int DEFAULT_MAX_EXPANSIONS = FuzzyQuery.defaultMaxExpansions;
-    public static final boolean DEFAULT_LENIENCY = MatchQuery.DEFAULT_LENIENCY;
     public static final MatchQuery.ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = MatchQuery.DEFAULT_ZERO_TERMS_QUERY;
     public static final boolean DEFAULT_FUZZY_TRANSPOSITIONS = FuzzyQuery.defaultTranspositions;
 
@@ -91,7 +93,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
     private String fuzzyRewrite = null;
     private Boolean useDisMax;
     private Float tieBreaker;
-    private boolean lenient = DEFAULT_LENIENCY;
+    private Boolean lenient;
     private Float cutoffFrequency = null;
     private MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
     private boolean autoGenerateSynonymsPhraseQuery = true;
@@ -223,7 +225,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         fuzzyRewrite = in.readOptionalString();
         useDisMax = in.readOptionalBoolean();
         tieBreaker = in.readOptionalFloat();
-        lenient = in.readBoolean();
+        if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            lenient = in.readOptionalBoolean();
+        } else {
+            lenient = in.readBoolean();
+        }
         cutoffFrequency = in.readOptionalFloat();
         zeroTermsQuery = MatchQuery.ZeroTermsQuery.readFromStream(in);
         if (in.getVersion().onOrAfter(Version.V_6_1_0)) {
@@ -251,7 +257,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         out.writeOptionalString(fuzzyRewrite);
         out.writeOptionalBoolean(useDisMax);
         out.writeOptionalFloat(tieBreaker);
-        out.writeBoolean(lenient);
+        if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+            out.writeOptionalBoolean(lenient);
+        } else {
+            out.writeBoolean(lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient);
+        }
         out.writeOptionalFloat(cutoffFrequency);
         zeroTermsQuery.writeTo(out);
         if (out.getVersion().onOrAfter(Version.V_6_1_0)) {
@@ -488,7 +498,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
     }
 
     public boolean lenient() {
-        return lenient;
+        return lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient;
     }
 
     /**
@@ -588,7 +598,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         if (tieBreaker != null) {
             builder.field(TIE_BREAKER_FIELD.getPreferredName(), tieBreaker);
         }
-        builder.field(LENIENT_FIELD.getPreferredName(), lenient);
+        if (lenient != null) {
+            builder.field(LENIENT_FIELD.getPreferredName(), lenient);
+        }
         if (cutoffFrequency != null) {
             builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency);
         }
@@ -614,7 +626,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         Boolean useDisMax = null;
         Float tieBreaker = null;
         Float cutoffFrequency = null;
-        boolean lenient = DEFAULT_LENIENCY;
+        Boolean lenient = null;
         MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
         boolean autoGenerateSynonymsPhraseQuery = true;
         boolean fuzzyTranspositions = DEFAULT_FUZZY_TRANSPOSITIONS;
@@ -698,16 +710,12 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
             throw new ParsingException(parser.getTokenLocation(), "No text specified for multi_match query");
         }
 
-        if (fieldsBoosts.isEmpty()) {
-            throw new ParsingException(parser.getTokenLocation(), "No fields specified for multi_match query");
-        }
-
         if (fuzziness != null && (type == Type.CROSS_FIELDS || type == Type.PHRASE || type == Type.PHRASE_PREFIX)) {
             throw new ParsingException(parser.getTokenLocation(),
                     "Fuzziness not allowed for type [" + type.parseField.getPreferredName() + "]");
         }
 
-        return new MultiMatchQueryBuilder(value)
+        MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder(value)
                 .fields(fieldsBoosts)
                 .type(type)
                 .analyzer(analyzer)
@@ -715,7 +723,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
                 .fuzziness(fuzziness)
                 .fuzzyRewrite(fuzzyRewrite)
                 .useDisMax(useDisMax)
-                .lenient(lenient)
                 .maxExpansions(maxExpansions)
                 .minimumShouldMatch(minimumShouldMatch)
                 .operator(operator)
@@ -727,6 +734,10 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
                 .boost(boost)
                 .queryName(queryName)
                 .fuzzyTranspositions(fuzzyTranspositions);
+        if (lenient != null) {
+            builder.lenient(lenient);
+        }
+        return builder;
     }
 
     private static void parseFieldAndBoost(XContentParser parser, Map<String, Float> fieldsBoosts) throws IOException {
@@ -778,7 +789,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
         if (cutoffFrequency != null) {
             multiMatchQuery.setCommonTermsCutoff(cutoffFrequency);
         }
-        multiMatchQuery.setLenient(lenient);
+        if (lenient != null) {
+            multiMatchQuery.setLenient(lenient);
+        }
         multiMatchQuery.setZeroTermsQuery(zeroTermsQuery);
         multiMatchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
         multiMatchQuery.setTranspositions(fuzzyTranspositions);
@@ -793,8 +806,20 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
                 }
             }
         }
-
-        Map<String, Float> newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts);
+        Map<String, Float> newFieldsBoosts;
+        if (fieldsBoosts.isEmpty()) {
+            // no fields provided, defaults to index.query.default_field
+            List<String> defaultFields = context.defaultFields();
+            boolean isAllField = defaultFields.size() == 1 && Regex.isMatchAllPattern(defaultFields.get(0));
+            if (isAllField && lenient == null) {
+                // Sets leniency to true if not explicitly
+                // set in the request
+                multiMatchQuery.setLenient(true);
+            }
+            newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, QueryParserHelper.parseFieldsAndWeights(defaultFields));
+        } else {
+            newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts);
+        }
         return multiMatchQuery.parse(type, newFieldsBoosts, value, minimumShouldMatch);
     }
 
diff --git a/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java
index a0afe28a17b..e81edb7dcf9 100644
--- a/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java
+++ b/core/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java
@@ -32,8 +32,10 @@ import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PointRangeQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.index.query.MultiMatchQueryBuilder.Type;
 import org.elasticsearch.index.search.MatchQuery;
@@ -41,6 +43,7 @@ import org.elasticsearch.search.internal.SearchContext;
 import org.elasticsearch.test.AbstractQueryTestCase;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -66,18 +69,28 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
             assumeTrue("test with date fields runs only when at least a type is registered", getCurrentTypes().length > 0);
         }
 
-        // creates the query with random value and field name
-        Object value;
+        final Object value;
         if (fieldName.equals(STRING_FIELD_NAME)) {
             value = getRandomQueryText();
         } else {
             value = getRandomValueForFieldName(fieldName);
         }
-        MultiMatchQueryBuilder query = new MultiMatchQueryBuilder(value, fieldName);
-        // field with random boost
-        if (randomBoolean()) {
-            query.field(fieldName, randomFloat() * 10);
+
+        final MultiMatchQueryBuilder query;
+        if (rarely()) {
+            query = new MultiMatchQueryBuilder(value, fieldName);
+            if (randomBoolean()) {
+                query.lenient(randomBoolean());
+            }
+            // field with random boost
+            if (randomBoolean()) {
+                query.field(fieldName, randomFloat() * 10);
+            }
+        } else {
+            query = new MultiMatchQueryBuilder(value);
+            query.lenient(true);
         }
+
         // sets other parameters of the multi match query
         if (randomBoolean()) {
             query.type(randomFrom(MultiMatchQueryBuilder.Type.values()));
@@ -112,9 +125,6 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
         if (randomBoolean()) {
             query.tieBreaker(randomFloat());
         }
-        if (randomBoolean()) {
-            query.lenient(randomBoolean());
-        }
         if (randomBoolean()) {
             query.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
         }
@@ -338,4 +348,56 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
 
         assertEquals(expected, query);
     }
+
+    public void testDefaultField() throws Exception {
+        assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
+        QueryShardContext context = createShardContext();
+        MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("hello");
+        // should pass because we set lenient to true when default field is `*`
+        Query query = builder.toQuery(context);
+        assertThat(query, instanceOf(DisjunctionMaxQuery.class));
+
+        context.getIndexSettings().updateIndexMetaData(
+            newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field",
+                STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5").build())
+        );
+
+        MultiMatchQueryBuilder qb = new MultiMatchQueryBuilder("hello");
+        query = qb.toQuery(context);
+        DisjunctionMaxQuery expected = new DisjunctionMaxQuery(
+            Arrays.asList(
+                new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
+                new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f)
+            ), 0.0f
+        );
+        assertEquals(expected, query);
+
+        context.getIndexSettings().updateIndexMetaData(
+            newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field",
+                STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5", INT_FIELD_NAME).build())
+        );
+        // should fail because lenient defaults to false
+        IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> qb.toQuery(context));
+        assertThat(exc, instanceOf(NumberFormatException.class));
+        assertThat(exc.getMessage(), equalTo("For input string: \"hello\""));
+
+        // explicitly sets lenient
+        qb.lenient(true);
+        query = qb.toQuery(context);
+        expected = new DisjunctionMaxQuery(
+            Arrays.asList(
+                new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
+                new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f),
+                new MatchNoDocsQuery("failed [mapped_int] query, caused by number_format_exception:[For input string: \"hello\"]")
+            ), 0.0f
+        );
+        assertEquals(expected, query);
+    }
+
+    private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
+        Settings build = Settings.builder().put(oldIndexSettings)
+            .put(indexSettings)
+            .build();
+        return IndexMetaData.builder(name).settings(build).build();
+    }
 }
diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc
index 217d3a7d211..edb6ff11da7 100644
--- a/docs/reference/query-dsl/multi-match-query.asciidoc
+++ b/docs/reference/query-dsl/multi-match-query.asciidoc
@@ -58,6 +58,11 @@ GET /_search
 
 <1> The `subject` field is three times as important as the `message` field.
 
+If no `fields` are provided, the `multi_match` query defaults to the `index.query.default_field`
+index settings, which in turn defaults to `*`. `*` extracts all fields in the mapping that
+are eligible to term queries and filters the metadata fields. All extracted fields are then
+combined to build a query.
+
 WARNING: There is a limit of no more than 1024 fields being queried at once.
 
 [[multi-match-types]]