Deduplicate alias and concrete fields in query field expansion (#42328)

The full-text query parsers accept field pattern that are expanded using the mapping.
Alias field are also detected during the expansion but they are not deduplicated with the
concrete fields that are found from other patterns (or the same). This change ensures
that we deduplicate the target fields of the full-text query parsers in order to avoid
adding the same clause multiple times. Boolean queries are already able to deduplicate
clauses during rewrite but since we also use DisjunctionMaxQuery it is preferable to detect
 these duplicates early on.
This commit is contained in:
Jim Ferenczi 2019-06-05 08:47:33 +02:00 committed by jimczi
parent 41a9f3ae3b
commit de0ea4bbf7
5 changed files with 40 additions and 32 deletions

View File

@ -55,6 +55,10 @@ public final class QueryParserHelper {
} else {
fieldName = field;
}
// handle duplicates
if (fieldsAndWeights.containsKey(field)) {
boost *= fieldsAndWeights.get(field);
}
fieldsAndWeights.put(fieldName, boost);
}
return fieldsAndWeights;
@ -84,7 +88,13 @@ public final class QueryParserHelper {
float weight = fieldEntry.getValue() == null ? 1.0f : fieldEntry.getValue();
Map<String, Float> fieldMap = resolveMappingField(context, fieldEntry.getKey(), weight,
!multiField, !allField, fieldSuffix);
resolvedFields.putAll(fieldMap);
for (Map.Entry<String, Float> field : fieldMap.entrySet()) {
float boost = field.getValue();
if (resolvedFields.containsKey(field.getKey())) {
boost *= resolvedFields.get(field.getKey());
}
resolvedFields.put(field.getKey(), boost);
}
}
checkForTooManyFields(resolvedFields, context);
return resolvedFields;
@ -149,7 +159,12 @@ public final class QueryParserHelper {
// other exceptions are parsing errors or not indexed fields: keep
}
}
fields.put(fieldName, weight);
// handle duplicates
float w = weight;
if (fields.containsKey(fieldType.name())) {
w *= fields.get(fieldType.name());
}
fields.put(fieldType.name(), w);
}
checkForTooManyFields(fields, context);
return fields;

View File

@ -241,10 +241,9 @@ public class MultiMatchQueryBuilderTests extends FullTextQueryTestCase<MultiMatc
assertThat(query, instanceOf(DisjunctionMaxQuery.class));
DisjunctionMaxQuery dQuery = (DisjunctionMaxQuery) query;
assertThat(dQuery.getTieBreakerMultiplier(), equalTo(1.0f));
assertThat(dQuery.getDisjuncts().size(), equalTo(3));
assertThat(dQuery.getDisjuncts().size(), equalTo(2));
assertThat(assertDisjunctionSubQuery(query, TermQuery.class, 0).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test")));
assertThat(assertDisjunctionSubQuery(query, TermQuery.class, 1).getTerm(), equalTo(new Term(STRING_FIELD_NAME_2, "test")));
assertThat(assertDisjunctionSubQuery(query, TermQuery.class, 2).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "test")));
}
public void testToQueryFieldMissing() throws Exception {
@ -266,23 +265,6 @@ public class MultiMatchQueryBuilderTests extends FullTextQueryTestCase<MultiMatc
}
public void testToQueryBooleanPrefixMultipleFields() throws IOException {
{
final MultiMatchQueryBuilder builder = new MultiMatchQueryBuilder("foo bar", STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME);
builder.type(Type.BOOL_PREFIX);
final Query query = builder.toQuery(createShardContext());
assertThat(query, instanceOf(DisjunctionMaxQuery.class));
final DisjunctionMaxQuery disMaxQuery = (DisjunctionMaxQuery) query;
assertThat(disMaxQuery.getDisjuncts(), hasSize(2));
for (Query disjunctQuery : disMaxQuery.getDisjuncts()) {
assertThat(disjunctQuery, instanceOf(BooleanQuery.class));
final BooleanQuery booleanQuery = (BooleanQuery) disjunctQuery;
assertThat(booleanQuery.clauses(), hasSize(2));
assertThat(assertBooleanSubQuery(booleanQuery, TermQuery.class, 0).getTerm(), equalTo(new Term(STRING_FIELD_NAME, "foo")));
assertThat(assertBooleanSubQuery(booleanQuery, PrefixQuery.class, 1).getPrefix(),
equalTo(new Term(STRING_FIELD_NAME, "bar")));
}
}
{
// STRING_FIELD_NAME_2 is a keyword field
final MultiMatchQueryBuilder queryBuilder = new MultiMatchQueryBuilder("foo bar", STRING_FIELD_NAME, STRING_FIELD_NAME_2);
@ -451,9 +433,9 @@ public class MultiMatchQueryBuilderTests extends FullTextQueryTestCase<MultiMatc
query = qb.toQuery(context);
expected = new DisjunctionMaxQuery(
Arrays.asList(
new MatchNoDocsQuery("failed [mapped_int] query, caused by number_format_exception:[For input string: \"hello\"]"),
new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f),
new MatchNoDocsQuery("failed [mapped_int] query, caused by number_format_exception:[For input string: \"hello\"]")
new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "hello")), 5.0f)
), 0.0f
);
assertEquals(expected, query);
@ -571,7 +553,7 @@ public class MultiMatchQueryBuilderTests extends FullTextQueryTestCase<MultiMatc
noMatchNoDocsQueries++;
}
}
assertEquals(11, noMatchNoDocsQueries);
assertEquals(9, noMatchNoDocsQueries);
assertThat(disjunctionMaxQuery.getDisjuncts(), hasItems(new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
new TermQuery(new Term(STRING_FIELD_NAME_2, "hello"))));
}

View File

@ -519,13 +519,11 @@ public class QueryStringQueryBuilderTests extends FullTextQueryTestCase<QueryStr
Query query = queryStringQuery("test").field("mapped_str*").toQuery(createShardContext());
assertThat(query, instanceOf(DisjunctionMaxQuery.class));
DisjunctionMaxQuery dQuery = (DisjunctionMaxQuery) query;
assertThat(dQuery.getDisjuncts().size(), equalTo(3));
assertThat(dQuery.getDisjuncts().size(), equalTo(2));
assertThat(assertDisjunctionSubQuery(query, TermQuery.class, 0).getTerm(),
equalTo(new Term(STRING_FIELD_NAME, "test")));
assertThat(assertDisjunctionSubQuery(query, TermQuery.class, 1).getTerm(),
equalTo(new Term(STRING_FIELD_NAME_2, "test")));
assertThat(assertDisjunctionSubQuery(query, TermQuery.class, 2).getTerm(),
equalTo(new Term(STRING_FIELD_NAME, "test")));
}
public void testToQueryDisMaxQuery() throws Exception {
@ -1541,6 +1539,19 @@ public class QueryStringQueryBuilderTests extends FullTextQueryTestCase<QueryStr
assertThat(exc.getMessage(), CoreMatchers.containsString("negative [boost]"));
}
public void testMergeBoosts() throws IOException {
Query query = new QueryStringQueryBuilder("first")
.type(MultiMatchQueryBuilder.Type.MOST_FIELDS)
.field(STRING_FIELD_NAME, 0.3f)
.field(STRING_FIELD_NAME.substring(0, STRING_FIELD_NAME.length()-2) + "*", 0.5f)
.toQuery(createShardContext());
List<Query> terms = new ArrayList<>();
terms.add(new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME, "first")), 0.075f));
terms.add(new BoostQuery(new TermQuery(new Term(STRING_FIELD_NAME_2, "first")), 0.5f));
Query expected = new DisjunctionMaxQuery(terms, 1.0f);
assertEquals(expected, query);
}
private static IndexMetaData newIndexMeta(String name, Settings oldIndexSettings, Settings indexSettings) {
Settings build = Settings.builder().put(oldIndexSettings)
.put(indexSettings)
@ -1557,7 +1568,7 @@ public class QueryStringQueryBuilderTests extends FullTextQueryTestCase<QueryStr
noMatchNoDocsQueries++;
}
}
assertEquals(11, noMatchNoDocsQueries);
assertEquals(9, noMatchNoDocsQueries);
assertThat(disjunctionMaxQuery.getDisjuncts(), hasItems(new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
new TermQuery(new Term(STRING_FIELD_NAME_2, "hello"))));
}

View File

@ -104,7 +104,7 @@ public class SimpleQueryStringBuilderTests extends FullTextQueryTestCase<SimpleQ
int fieldCount = randomIntBetween(0, 2);
Map<String, Float> fields = new HashMap<>();
for (int i = 0; i < fieldCount; i++) {
if (randomBoolean()) {
if (i == 0) {
String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME);
fields.put(fieldName, AbstractQueryBuilder.DEFAULT_BOOST);
} else {
@ -786,7 +786,7 @@ public class SimpleQueryStringBuilderTests extends FullTextQueryTestCase<SimpleQ
noMatchNoDocsQueries++;
}
}
assertEquals(11, noMatchNoDocsQueries);
assertEquals(9, noMatchNoDocsQueries);
assertThat(disjunctionMaxQuery.getDisjuncts(), hasItems(new TermQuery(new Term(STRING_FIELD_NAME, "hello")),
new TermQuery(new Term(STRING_FIELD_NAME_2, "hello"))));
}

View File

@ -111,8 +111,8 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
.toQuery(queryShardContext);
try (Engine.Searcher searcher = indexService.getShard(0).acquireSearcher("test")) {
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
Query tq1 = new BoostQuery(new TermQuery(new Term("name.first", "banon")), 2);
Query tq2 = new BoostQuery(new TermQuery(new Term("name.last", "banon")), 3);
Query tq1 = new BoostQuery(new TermQuery(new Term("name.last", "banon")), 3);
Query tq2 = new BoostQuery(new TermQuery(new Term("name.first", "banon")), 2);
Query expected = new DisjunctionMaxQuery(Arrays.asList(tq2, tq1), tieBreaker);
assertEquals(expected, rewrittenQuery);
}