Fix exponential runtime for Boolean#rewrite (#12072)

When #672 was introduced, it added many nice rewrite optimizations. However, in the case when there are many multiple nested Boolean queries under a top level Boolean#filter clause, its runtime grows exponentially.

The key issue was how the BooleanQuery#rewriteNoScoring redirected yet again to the ConstantScoreQuery#rewrite. This causes BooleanQuery#rewrite to be called again recursively , even though it was previously called in ConstantScoreQuery#rewrite, and THEN BooleanQuery#rewriteNoScoring is called again, recursively.

This causes exponential growth in rewrite time based on query depth. The change here hopes to short-circuit that and only grow (near) linearly by calling BooleanQuery#rewriteNoScoring directly, instead if attempting to redirect through ConstantScoreQuery#rewrite.

closes: #12069
This commit is contained in:
Benjamin Trent 2023-01-12 10:50:05 -05:00 committed by GitHub
parent 9ab324d2be
commit 59b17452aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 115 additions and 3 deletions

View File

@ -234,6 +234,9 @@ Bug Fixes
* GITHUB#12046: Out of boundary in CombinedFieldQuery#addTerm. (Lu Xugang)
* GITHUB#12072: Fix exponential runtime for nested BooleanQuery#rewrite when a
BooleanClause is non-scoring. (Ben Trent)
Optimizations
---------------------
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all

View File

@ -192,7 +192,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
// Utility method for rewriting BooleanQuery when scores are not needed.
// This is called from ConstantScoreQuery#rewrite
BooleanQuery rewriteNoScoring(IndexSearcher indexSearcher) throws IOException {
BooleanQuery rewriteNoScoring() {
boolean actuallyRewritten = false;
BooleanQuery.Builder newQuery =
new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
@ -203,10 +203,19 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
for (BooleanClause clause : clauses) {
Query query = clause.getQuery();
Query rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher);
// NOTE: rewritingNoScoring() should not call rewrite(), otherwise this
// method could run in exponential time with the depth of the query as
// every new level would rewrite 2x more than its parent level.
Query rewritten = query;
if (rewritten instanceof BoostQuery) {
rewritten = ((BoostQuery) rewritten).getQuery();
}
if (rewritten instanceof ConstantScoreQuery) {
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
}
if (rewritten instanceof BooleanQuery) {
rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
}
BooleanClause.Occur occur = clause.getOccur();
if (occur == Occur.SHOULD && keepShould == false) {
// ignore clause

View File

@ -50,7 +50,7 @@ public final class ConstantScoreQuery extends Query {
} else if (rewritten instanceof ConstantScoreQuery) {
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
} else if (rewritten instanceof BooleanQuery) {
rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(indexSearcher);
rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
}
if (rewritten.getClass() == MatchNoDocsQuery.class) {

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -322,6 +323,75 @@ public class TestBooleanRewrites extends LuceneTestCase {
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2));
}
public void testDeeplyNestedBooleanRewriteShouldClauses() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
Function<Integer, TermQuery> termQueryFunction =
(i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
int depth = TestUtil.nextInt(random(), 10, 30);
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
Query expectedQuery =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
Query deepBuilder =
new BooleanQuery.Builder()
.add(rewriteQuery, Occur.SHOULD)
.setMinimumNumberShouldMatch(1)
.build();
for (int i = depth; i > 0; i--) {
TermQuery tq = termQueryFunction.apply(i);
BooleanQuery.Builder bq =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(tq, Occur.SHOULD)
.add(deepBuilder, Occur.SHOULD);
deepBuilder = bq.build();
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
if (i == depth) {
expectedBq.add(rewriteQuery, Occur.FILTER);
} else {
expectedBq.add(expectedQuery, Occur.FILTER);
}
expectedQuery = expectedBq.build();
}
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
Query rewritten = searcher.rewrite(bq);
assertEquals(expectedQuery, rewritten);
// the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
// `FILTER`
assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites);
}
public void testDeeplyNestedBooleanRewrite() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
Function<Integer, TermQuery> termQueryFunction =
(i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
int depth = TestUtil.nextInt(random(), 10, 30);
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
Query expectedQuery =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
for (int i = depth; i > 0; i--) {
TermQuery tq = termQueryFunction.apply(i);
BooleanQuery.Builder bq =
new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
deepBuilder = bq.build();
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
if (i == depth) {
expectedBq.add(rewriteQuery, Occur.FILTER);
} else {
expectedBq.add(expectedQuery, Occur.FILTER);
}
expectedQuery = expectedBq.build();
}
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
Query rewritten = searcher.rewrite(bq);
assertEquals(expectedQuery, rewritten);
assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites);
}
public void testRemoveMatchAllFilter() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
@ -864,4 +934,34 @@ public class TestBooleanRewrites extends LuceneTestCase {
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
}
// Test query to count number of rewrites for its lifetime.
private static final class TestRewriteQuery extends Query {
private int numRewrites = 0;
@Override
public String toString(String field) {
return "TestRewriteQuery{rewrites=" + numRewrites + "}";
}
@Override
public Query rewrite(IndexSearcher indexSearcher) {
numRewrites++;
return this;
}
@Override
public void visit(QueryVisitor visitor) {}
@Override
public boolean equals(Object obj) {
return obj instanceof TestRewriteQuery;
}
@Override
public int hashCode() {
return TestRewriteQuery.class.hashCode();
}
}
}