LUCENE-7386: Flatten nested disjunctions.

This commit is contained in:
Adrien Grand 2019-04-18 11:24:50 +02:00
parent 48a68365bb
commit 61d7569f78
3 changed files with 107 additions and 1 deletions

View File

@ -145,6 +145,10 @@ Changes in Runtime Behavior
the only difference in output between the two is in the position length
attribute. (Alan Woodward, Jim Ferenczi)
* LUCENE-7386: Disjunctions nested in disjunctions are now flattened. This might
trigger changes in the produced scores due to changes to the order in which
scores of sub clauses are summed up. (Adrien Grand)
Other
* LUCENE-8680: Refactor EdgeTree#relateTriangle method. (Ignacio Vera)

View File

@ -173,6 +173,15 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return clauseSets.get(occur);
}
/**
* Whether this query is a pure disjunction, ie. it only has SHOULD clauses
* and it is enough for a single clause to match for this boolean query to match.
*/
boolean isPureDisjunction() {
return clauses.size() == getClauses(Occur.SHOULD).size()
&& minimumNumberShouldMatch <= 1;
}
/** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to
* make it possible to do:
* <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre>
@ -245,9 +254,13 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
Query query = clause.getQuery();
Query rewritten = query.rewrite(reader);
if (rewritten != query) {
// rewrite clause
actuallyRewritten = true;
builder.add(rewritten, clause.getOccur());
} else {
// leave as-is
builder.add(clause);
}
builder.add(rewritten, clause.getOccur());
}
if (actuallyRewritten) {
return builder.build();
@ -448,6 +461,31 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
}
}
// Flatten nested disjunctions, this is important for block-max WAND to perform well
if (minimumNumberShouldMatch <= 1) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false;
for (BooleanClause clause : clauses) {
if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) {
BooleanQuery innerQuery = (BooleanQuery) clause.getQuery();
if (innerQuery.isPureDisjunction()) {
actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery.clauses()) {
builder.add(innerClause);
}
} else {
builder.add(clause);
}
} else {
builder.add(clause);
}
}
if (actuallyRewritten) {
return builder.build();
}
}
return super.rewrite(reader);
}

View File

@ -480,4 +480,68 @@ public class TestBooleanRewrites extends LuceneTestCase {
.build();
assertEquals(expected, searcher.rewrite(query));
}
public void testFlattenInnerDisjunctions() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
Query inner = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.build();
Query query = new BooleanQuery.Builder()
.add(inner, Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build();
Query expectedRewritten = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
query = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(0)
.add(inner, Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(0)
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
query = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(1)
.add(inner, Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(1)
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
query = new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(inner, Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertSame(query, searcher.rewrite(query));
inner = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
query = new BooleanQuery.Builder()
.add(inner, Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build();
assertSame(query, searcher.rewrite(query));
}
}