diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dae291c8c9d..b4416fa40ff 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -145,6 +145,10 @@ Changes in Runtime Behavior the only difference in output between the two is in the position length attribute. (Alan Woodward, Jim Ferenczi) +* LUCENE-7386: Disjunctions nested in disjunctions are now flattened. This might + trigger changes in the produced scores due to changes to the order in which + scores of sub clauses are summed up. (Adrien Grand) + Other * LUCENE-8680: Refactor EdgeTree#relateTriangle method. (Ignacio Vera) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 87105b0e823..ff264e78095 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -173,6 +173,15 @@ public class BooleanQuery extends Query implements Iterable { return clauseSets.get(occur); } + /** + * Whether this query is a pure disjunction, ie. it only has SHOULD clauses + * and it is enough for a single clause to match for this boolean query to match. + */ + boolean isPureDisjunction() { + return clauses.size() == getClauses(Occur.SHOULD).size() + && minimumNumberShouldMatch <= 1; + } + /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to * make it possible to do: *
for (BooleanClause clause : booleanQuery) {}
@@ -245,9 +254,13 @@ public class BooleanQuery extends Query implements Iterable { Query query = clause.getQuery(); Query rewritten = query.rewrite(reader); if (rewritten != query) { + // rewrite clause actuallyRewritten = true; + builder.add(rewritten, clause.getOccur()); + } else { + // leave as-is + builder.add(clause); } - builder.add(rewritten, clause.getOccur()); } if (actuallyRewritten) { return builder.build(); @@ -448,6 +461,31 @@ public class BooleanQuery extends Query implements Iterable { } } + // Flatten nested disjunctions, this is important for block-max WAND to perform well + if (minimumNumberShouldMatch <= 1) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch); + boolean actuallyRewritten = false; + for (BooleanClause clause : clauses) { + if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) { + BooleanQuery innerQuery = (BooleanQuery) clause.getQuery(); + if (innerQuery.isPureDisjunction()) { + actuallyRewritten = true; + for (BooleanClause innerClause : innerQuery.clauses()) { + builder.add(innerClause); + } + } else { + builder.add(clause); + } + } else { + builder.add(clause); + } + } + if (actuallyRewritten) { + return builder.build(); + } + } + return super.rewrite(reader); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 56c5f8a476d..2120c516a25 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -480,4 +480,68 @@ public class TestBooleanRewrites extends LuceneTestCase { .build(); assertEquals(expected, searcher.rewrite(query)); } + + public void testFlattenInnerDisjunctions() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + + Query inner = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .build(); + Query query = new BooleanQuery.Builder() + .add(inner, Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .build(); + Query expectedRewritten = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .build(); + assertEquals(expectedRewritten, searcher.rewrite(query)); + + query = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(0) + .add(inner, Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + .build(); + expectedRewritten = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(0) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + .build(); + assertEquals(expectedRewritten, searcher.rewrite(query)); + + query = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(1) + .add(inner, Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + .build(); + expectedRewritten = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(1) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + .build(); + assertEquals(expectedRewritten, searcher.rewrite(query)); + + query = new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(2) + .add(inner, Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + .build(); + assertSame(query, searcher.rewrite(query)); + + inner = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + query = new BooleanQuery.Builder() + .add(inner, Occur.SHOULD) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .build(); + assertSame(query, searcher.rewrite(query)); + } }