mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 01:19:02 +00:00
Make fields optional in multi_match query and rely on index.query.default_field by default (#27380)
* Make fields optional in multi_match query and rely on index.query.default_field by default This commit adds the ability to send `multi_match` query without providing any `fields`. When no fields are provided the `multi_match` query will use the fields defined in the index setting `index.query.default_field` (which in turns defaults to `*`). The same behavior is already implemented in `query_string` and `simple_query_string` so this change just applies the heuristic to `multi_match` queries. Relying on `index.query.default_field` rather than `*` is safer for big mappings that break the 1024 field expansion limit added in 7.0 for all text queries. For these kind of mappings the admin can change the `index.query.default_field` in order to make sure that exploratory queries using `multi_match`, `query_string` or `simple_query_string` do not throw an exception.
This commit is contained in:
parent
f761a0e0e4
commit
53462f6499
@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
@ -36,9 +37,11 @@ import org.elasticsearch.index.query.support.QueryParsers;
|
||||
import org.elasticsearch.index.search.MatchQuery;
|
||||
import org.elasticsearch.index.search.MultiMatchQuery;
|
||||
import org.elasticsearch.index.search.QueryParserHelper;
|
||||
import org.elasticsearch.index.search.QueryStringQueryParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
@ -55,7 +58,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
public static final int DEFAULT_PHRASE_SLOP = MatchQuery.DEFAULT_PHRASE_SLOP;
|
||||
public static final int DEFAULT_PREFIX_LENGTH = FuzzyQuery.defaultPrefixLength;
|
||||
public static final int DEFAULT_MAX_EXPANSIONS = FuzzyQuery.defaultMaxExpansions;
|
||||
public static final boolean DEFAULT_LENIENCY = MatchQuery.DEFAULT_LENIENCY;
|
||||
public static final MatchQuery.ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = MatchQuery.DEFAULT_ZERO_TERMS_QUERY;
|
||||
public static final boolean DEFAULT_FUZZY_TRANSPOSITIONS = FuzzyQuery.defaultTranspositions;
|
||||
|
||||
@ -91,7 +93,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
private String fuzzyRewrite = null;
|
||||
private Boolean useDisMax;
|
||||
private Float tieBreaker;
|
||||
private boolean lenient = DEFAULT_LENIENCY;
|
||||
private Boolean lenient;
|
||||
private Float cutoffFrequency = null;
|
||||
private MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
|
||||
private boolean autoGenerateSynonymsPhraseQuery = true;
|
||||
@ -223,7 +225,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
fuzzyRewrite = in.readOptionalString();
|
||||
useDisMax = in.readOptionalBoolean();
|
||||
tieBreaker = in.readOptionalFloat();
|
||||
lenient = in.readBoolean();
|
||||
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
lenient = in.readOptionalBoolean();
|
||||
} else {
|
||||
lenient = in.readBoolean();
|
||||
}
|
||||
cutoffFrequency = in.readOptionalFloat();
|
||||
zeroTermsQuery = MatchQuery.ZeroTermsQuery.readFromStream(in);
|
||||
if (in.getVersion().onOrAfter(Version.V_6_1_0)) {
|
||||
@ -251,7 +257,11 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
out.writeOptionalString(fuzzyRewrite);
|
||||
out.writeOptionalBoolean(useDisMax);
|
||||
out.writeOptionalFloat(tieBreaker);
|
||||
out.writeBoolean(lenient);
|
||||
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
out.writeOptionalBoolean(lenient);
|
||||
} else {
|
||||
out.writeBoolean(lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient);
|
||||
}
|
||||
out.writeOptionalFloat(cutoffFrequency);
|
||||
zeroTermsQuery.writeTo(out);
|
||||
if (out.getVersion().onOrAfter(Version.V_6_1_0)) {
|
||||
@ -488,7 +498,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
}
|
||||
|
||||
public boolean lenient() {
|
||||
return lenient;
|
||||
return lenient == null ? MatchQuery.DEFAULT_LENIENCY : lenient;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -588,7 +598,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
if (tieBreaker != null) {
|
||||
builder.field(TIE_BREAKER_FIELD.getPreferredName(), tieBreaker);
|
||||
}
|
||||
builder.field(LENIENT_FIELD.getPreferredName(), lenient);
|
||||
if (lenient != null) {
|
||||
builder.field(LENIENT_FIELD.getPreferredName(), lenient);
|
||||
}
|
||||
if (cutoffFrequency != null) {
|
||||
builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency);
|
||||
}
|
||||
@ -614,7 +626,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
Boolean useDisMax = null;
|
||||
Float tieBreaker = null;
|
||||
Float cutoffFrequency = null;
|
||||
boolean lenient = DEFAULT_LENIENCY;
|
||||
Boolean lenient = null;
|
||||
MatchQuery.ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;
|
||||
boolean autoGenerateSynonymsPhraseQuery = true;
|
||||
boolean fuzzyTranspositions = DEFAULT_FUZZY_TRANSPOSITIONS;
|
||||
@ -698,16 +710,12 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
throw new ParsingException(parser.getTokenLocation(), "No text specified for multi_match query");
|
||||
}
|
||||
|
||||
if (fieldsBoosts.isEmpty()) {
|
||||
throw new ParsingException(parser.getTokenLocation(), "No fields specified for multi_match query");
|
||||
}
|
||||
|
||||
if (fuzziness != null && (type == Type.CROSS_FIELDS || type == Type.PHRASE || type == Type.PHRASE_PREFIX)) {
|
||||
throw new ParsingException(parser.getTokenLocation(),
|
||||
"Fuzziness not allowed for type [" + type.parseField.getPreferredName() + "]");
|
||||
}
|
||||
|
||||
return new MultiMatchQueryBuilder(value)
|
||||
MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder(value)
|
||||
.fields(fieldsBoosts)
|
||||
.type(type)
|
||||
.analyzer(analyzer)
|
||||
@ -715,7 +723,6 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
.fuzziness(fuzziness)
|
||||
.fuzzyRewrite(fuzzyRewrite)
|
||||
.useDisMax(useDisMax)
|
||||
.lenient(lenient)
|
||||
.maxExpansions(maxExpansions)
|
||||
.minimumShouldMatch(minimumShouldMatch)
|
||||
.operator(operator)
|
||||
@ -727,6 +734,10 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
.boost(boost)
|
||||
.queryName(queryName)
|
||||
.fuzzyTranspositions(fuzzyTranspositions);
|
||||
if (lenient != null) {
|
||||
builder.lenient(lenient);
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
private static void parseFieldAndBoost(XContentParser parser, Map<String, Float> fieldsBoosts) throws IOException {
|
||||
@ -778,7 +789,9 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
if (cutoffFrequency != null) {
|
||||
multiMatchQuery.setCommonTermsCutoff(cutoffFrequency);
|
||||
}
|
||||
multiMatchQuery.setLenient(lenient);
|
||||
if (lenient != null) {
|
||||
multiMatchQuery.setLenient(lenient);
|
||||
}
|
||||
multiMatchQuery.setZeroTermsQuery(zeroTermsQuery);
|
||||
multiMatchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
|
||||
multiMatchQuery.setTranspositions(fuzzyTranspositions);
|
||||
@ -793,8 +806,20 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder<MultiMatchQuery
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Float> newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts);
|
||||
Map<String, Float> newFieldsBoosts;
|
||||
if (fieldsBoosts.isEmpty()) {
|
||||
// no fields provided, defaults to index.query.default_field
|
||||
List<String> defaultFields = context.defaultFields();
|
||||
boolean isAllField = defaultFields.size() == 1 && Regex.isMatchAllPattern(defaultFields.get(0));
|
||||
if (isAllField && lenient == null) {
|
||||
// Sets leniency to true if not explicitly
|
||||
// set in the request
|
||||
multiMatchQuery.setLenient(true);
|
||||
}
|
||||
newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, QueryParserHelper.parseFieldsAndWeights(defaultFields));
|
||||
} else {
|
||||
newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts);
|
||||
}
|
||||
return multiMatchQuery.parse(type, newFieldsBoosts, value, minimumShouldMatch);
|
||||
}
|
||||
|
||||
|
@ -32,8 +32,10 @@ import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PointRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.query.MultiMatchQueryBuilder.Type;
|
||||
import org.elasticsearch.index.search.MatchQuery;
|
||||
@ -41,6 +43,7 @@ import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.test.AbstractQueryTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -66,18 +69,28 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
|
||||
assumeTrue("test with date fields runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
}
|
||||
|
||||
// creates the query with random value and field name
|
||||
Object value;
|
||||
final Object value;
|
||||
if (fieldName.equals(STRING_FIELD_NAME)) {
|
||||
value = getRandomQueryText();
|
||||
} else {
|
||||
value = getRandomValueForFieldName(fieldName);
|
||||
}
|
||||
MultiMatchQueryBuilder query = new MultiMatchQueryBuilder(value, fieldName);
|
||||
// field with random boost
|
||||
if (randomBoolean()) {
|
||||
query.field(fieldName, randomFloat() * 10);
|
||||
|
||||
final MultiMatchQueryBuilder query;
|
||||
if (rarely()) {
|
||||
query = new MultiMatchQueryBuilder(value, fieldName);
|
||||
if (randomBoolean()) {
|
||||
query.lenient(randomBoolean());
|
||||
}
|
||||
// field with random boost
|
||||
if (randomBoolean()) {
|
||||
query.field(fieldName, randomFloat() * 10);
|
||||
}
|
||||
} else {
|
||||
query = new MultiMatchQueryBuilder(value);
|
||||
query.lenient(true);
|
||||
}
|
||||
|
||||
// sets other parameters of the multi match query
|
||||
if (randomBoolean()) {
|
||||
query.type(randomFrom(MultiMatchQueryBuilder.Type.values()));
|
||||
@ -112,9 +125,6 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
|
||||
if (randomBoolean()) {
|
||||
query.tieBreaker(randomFloat());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
query.lenient(randomBoolean());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
query.cutoffFrequency((float) 10 / randomIntBetween(1, 100));
|
||||
}
|
||||
@ -338,4 +348,56 @@ public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase<MultiMatc
|
||||
|
||||
assertEquals(expected, query);
|
||||
}
|
||||
|
||||
public void testDefaultField() throws Exception {
|
||||
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
QueryShardContext context = createShardContext();
|
||||
MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("hello");
|
||||
// should pass because we set lenient to true when default field is `*`
|
||||
Query query = builder.toQuery(context);
|
||||
assertThat(query, instanceOf(DisjunctionMaxQuery.class));
|
||||
|
||||
context.getIndexSettings().updateIndexMetaData(
|
||||
newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field",
|
||||
STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5").build())
|
||||
);
|
||||
|
||||
MultiMatchQueryBuilder qb = new MultiMatchQueryBuilder("hello");
|
||||
query = qb.toQuery(context);
|
||||
DisjunctionMaxQuery expected = new DisjunctionMaxQuery(
|
||||
Arrays.asList(
|
||||
new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
|
||||
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f)
|
||||
), 0.0f
|
||||
);
|
||||
assertEquals(expected, query);
|
||||
|
||||
context.getIndexSettings().updateIndexMetaData(
|
||||
newIndexMeta("index", context.getIndexSettings().getSettings(), Settings.builder().putList("index.query.default_field",
|
||||
STRING_FIELD_NAME, STRING_FIELD_NAME_2 + "^5", INT_FIELD_NAME).build())
|
||||
);
|
||||
// should fail because lenient defaults to false
|
||||
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> qb.toQuery(context));
|
||||
assertThat(exc, instanceOf(NumberFormatException.class));
|
||||
assertThat(exc.getMessage(), equalTo("For input string: \"hello\""));
|
||||
|
||||
// explicitly sets lenient
|
||||
qb.lenient(true);
|
||||
query = qb.toQuery(context);
|
||||
expected = new DisjunctionMaxQuery(
|
||||
Arrays.asList(
|
||||
new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
|
||||
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f),
|
||||
new MatchNoDocsQuery("failed [mapped_int] query, caused by number_format_exception:[For input string: \"hello\"]")
|
||||
), 0.0f
|
||||
);
|
||||
assertEquals(expected, query);
|
||||
}
|
||||
|
||||
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
|
||||
Settings build = Settings.builder().put(oldIndexSettings)
|
||||
.put(indexSettings)
|
||||
.build();
|
||||
return IndexMetaData.builder(name).settings(build).build();
|
||||
}
|
||||
}
|
||||
|
@ -58,6 +58,11 @@ GET /_search
|
||||
|
||||
<1> The `subject` field is three times as important as the `message` field.
|
||||
|
||||
If no `fields` are provided, the `multi_match` query defaults to the `index.query.default_field`
|
||||
index settings, which in turn defaults to `*`. `*` extracts all fields in the mapping that
|
||||
are eligible to term queries and filters the metadata fields. All extracted fields are then
|
||||
combined to build a query.
|
||||
|
||||
WARNING: There is a limit of no more than 1024 fields being queried at once.
|
||||
|
||||
[[multi-match-types]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user