Correctly handle MSM for nested disjunctions (#50669)

With the rewrite of the percolator's QueryAnalyzer to use lucene's QueryVisitor API,
term queries that are direct children of a boolean query are handled separately from
other children. This works fine for conjunctions, but for disjunctions we need to
treat the extracted terms from these direct descendents along with extractions from
more deeply nested children to ensure that minimum-should-match requirements
are met correctly.

This commit changes the logic in QueryAnalyzer#getResult() to bundle child term
results with all other results before handling them.

Fixes #50305
This commit is contained in:
Alan Woodward 2020-01-07 09:31:22 +00:00
parent a2ef0e8830
commit a3ab7eb95d
2 changed files with 38 additions and 2 deletions

View File

@ -130,8 +130,7 @@ final class QueryAnalyzer {
Result getResult() { Result getResult() {
List<Result> partialResults = new ArrayList<>(); List<Result> partialResults = new ArrayList<>();
if (terms.size() > 0) { if (terms.size() > 0) {
partialResults.add(conjunction ? handleConjunction(terms, version) : partialResults.addAll(terms);
handleDisjunction(terms, minimumShouldMatch, version));
} }
if (children.isEmpty() == false) { if (children.isEmpty() == false) {
List<Result> childResults = children.stream().map(ResultBuilder::getResult).collect(Collectors.toList()); List<Result> childResults = children.stream().map(ResultBuilder::getResult).collect(Collectors.toList());

View File

@ -1507,6 +1507,43 @@ public class QueryAnalyzerTests extends ESTestCase {
assertTermsEqual(result.extractions, new Term("field", "a")); assertTermsEqual(result.extractions, new Term("field", "a"));
} }
public void testRangeAndTermWithNestedMSM() {
Query q1 = new BooleanQuery.Builder()
.add(new TermQuery(new Term("f", "v3")), Occur.SHOULD)
.add(new BooleanQuery.Builder()
.add(new TermQuery(new Term("f", "n1")), Occur.SHOULD)
.build(), Occur.SHOULD)
.add(new TermQuery(new Term("f", "v4")), Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
Result r1 = analyze(q1, Version.CURRENT);
assertEquals(2, r1.minimumShouldMatch);
assertThat(r1.extractions, hasSize(3));
assertFalse(r1.matchAllDocs);
assertTrue(r1.verified);
Query q = new BooleanQuery.Builder()
.add(IntPoint.newRangeQuery("i", 0, 10), Occur.FILTER)
.add(new TermQuery(new Term("f", "v1")), Occur.MUST)
.add(new TermQuery(new Term("f", "v2")), Occur.MUST)
.add(IntPoint.newRangeQuery("i", 2, 20), Occur.FILTER)
.add(new TermQuery(new Term("f", "v3")), Occur.SHOULD)
.add(new BooleanQuery.Builder()
.add(new TermQuery(new Term("f", "n1")), Occur.SHOULD)
.build(), Occur.SHOULD)
.add(new TermQuery(new Term("f", "v4")), Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
Result r = analyze(q, Version.CURRENT);
assertThat(r.minimumShouldMatch, equalTo(5));
assertThat(r.extractions, hasSize(7));
assertFalse(r.matchAllDocs);
assertFalse(r.verified);
}
public void testCombinedRangeAndTermWithMinimumShouldMatch() { public void testCombinedRangeAndTermWithMinimumShouldMatch() {
Query disj = new BooleanQuery.Builder() Query disj = new BooleanQuery.Builder()