mirror of https://github.com/apache/lucene.git
LUCENE-7386: Flatten nested disjunctions.
This commit is contained in:
parent
48a68365bb
commit
61d7569f78
|
@ -145,6 +145,10 @@ Changes in Runtime Behavior
|
|||
the only difference in output between the two is in the position length
|
||||
attribute. (Alan Woodward, Jim Ferenczi)
|
||||
|
||||
* LUCENE-7386: Disjunctions nested in disjunctions are now flattened. This might
|
||||
trigger changes in the produced scores due to changes to the order in which
|
||||
scores of sub clauses are summed up. (Adrien Grand)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-8680: Refactor EdgeTree#relateTriangle method. (Ignacio Vera)
|
||||
|
|
|
@ -173,6 +173,15 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
return clauseSets.get(occur);
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this query is a pure disjunction, ie. it only has SHOULD clauses
|
||||
* and it is enough for a single clause to match for this boolean query to match.
|
||||
*/
|
||||
boolean isPureDisjunction() {
|
||||
return clauses.size() == getClauses(Occur.SHOULD).size()
|
||||
&& minimumNumberShouldMatch <= 1;
|
||||
}
|
||||
|
||||
/** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to
|
||||
* make it possible to do:
|
||||
* <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre>
|
||||
|
@ -245,9 +254,13 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
Query query = clause.getQuery();
|
||||
Query rewritten = query.rewrite(reader);
|
||||
if (rewritten != query) {
|
||||
// rewrite clause
|
||||
actuallyRewritten = true;
|
||||
builder.add(rewritten, clause.getOccur());
|
||||
} else {
|
||||
// leave as-is
|
||||
builder.add(clause);
|
||||
}
|
||||
builder.add(rewritten, clause.getOccur());
|
||||
}
|
||||
if (actuallyRewritten) {
|
||||
return builder.build();
|
||||
|
@ -448,6 +461,31 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
}
|
||||
}
|
||||
|
||||
// Flatten nested disjunctions, this is important for block-max WAND to perform well
|
||||
if (minimumNumberShouldMatch <= 1) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
||||
boolean actuallyRewritten = false;
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) {
|
||||
BooleanQuery innerQuery = (BooleanQuery) clause.getQuery();
|
||||
if (innerQuery.isPureDisjunction()) {
|
||||
actuallyRewritten = true;
|
||||
for (BooleanClause innerClause : innerQuery.clauses()) {
|
||||
builder.add(innerClause);
|
||||
}
|
||||
} else {
|
||||
builder.add(clause);
|
||||
}
|
||||
} else {
|
||||
builder.add(clause);
|
||||
}
|
||||
}
|
||||
if (actuallyRewritten) {
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
|
||||
return super.rewrite(reader);
|
||||
}
|
||||
|
||||
|
|
|
@ -480,4 +480,68 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
.build();
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
}
|
||||
|
||||
public void testFlattenInnerDisjunctions() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
|
||||
Query inner = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.build();
|
||||
Query query = new BooleanQuery.Builder()
|
||||
.add(inner, Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.build();
|
||||
Query expectedRewritten = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(0)
|
||||
.add(inner, Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
expectedRewritten = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(0)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(1)
|
||||
.add(inner, Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
expectedRewritten = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(1)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.add(inner, Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
assertSame(query, searcher.rewrite(query));
|
||||
|
||||
inner = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build();
|
||||
query = new BooleanQuery.Builder()
|
||||
.add(inner, Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.build();
|
||||
assertSame(query, searcher.rewrite(query));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue