mirror of https://github.com/apache/lucene.git
Fix exponential runtime for Boolean#rewrite (#12072)
When #672 was introduced, it added many nice rewrite optimizations. However, in the case when there are many multiple nested Boolean queries under a top level Boolean#filter clause, its runtime grows exponentially. The key issue was how the BooleanQuery#rewriteNoScoring redirected yet again to the ConstantScoreQuery#rewrite. This causes BooleanQuery#rewrite to be called again recursively , even though it was previously called in ConstantScoreQuery#rewrite, and THEN BooleanQuery#rewriteNoScoring is called again, recursively. This causes exponential growth in rewrite time based on query depth. The change here hopes to short-circuit that and only grow (near) linearly by calling BooleanQuery#rewriteNoScoring directly, instead if attempting to redirect through ConstantScoreQuery#rewrite. closes: #12069
This commit is contained in:
parent
9ab324d2be
commit
59b17452aa
|
@ -234,6 +234,9 @@ Bug Fixes
|
|||
|
||||
* GITHUB#12046: Out of boundary in CombinedFieldQuery#addTerm. (Lu Xugang)
|
||||
|
||||
* GITHUB#12072: Fix exponential runtime for nested BooleanQuery#rewrite when a
|
||||
BooleanClause is non-scoring. (Ben Trent)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
|
||||
|
|
|
@ -192,7 +192,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
|
||||
// Utility method for rewriting BooleanQuery when scores are not needed.
|
||||
// This is called from ConstantScoreQuery#rewrite
|
||||
BooleanQuery rewriteNoScoring(IndexSearcher indexSearcher) throws IOException {
|
||||
BooleanQuery rewriteNoScoring() {
|
||||
boolean actuallyRewritten = false;
|
||||
BooleanQuery.Builder newQuery =
|
||||
new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
|
||||
|
@ -203,10 +203,19 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
|
||||
for (BooleanClause clause : clauses) {
|
||||
Query query = clause.getQuery();
|
||||
Query rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher);
|
||||
// NOTE: rewritingNoScoring() should not call rewrite(), otherwise this
|
||||
// method could run in exponential time with the depth of the query as
|
||||
// every new level would rewrite 2x more than its parent level.
|
||||
Query rewritten = query;
|
||||
if (rewritten instanceof BoostQuery) {
|
||||
rewritten = ((BoostQuery) rewritten).getQuery();
|
||||
}
|
||||
if (rewritten instanceof ConstantScoreQuery) {
|
||||
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
|
||||
}
|
||||
if (rewritten instanceof BooleanQuery) {
|
||||
rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
|
||||
}
|
||||
BooleanClause.Occur occur = clause.getOccur();
|
||||
if (occur == Occur.SHOULD && keepShould == false) {
|
||||
// ignore clause
|
||||
|
|
|
@ -50,7 +50,7 @@ public final class ConstantScoreQuery extends Query {
|
|||
} else if (rewritten instanceof ConstantScoreQuery) {
|
||||
rewritten = ((ConstantScoreQuery) rewritten).getQuery();
|
||||
} else if (rewritten instanceof BooleanQuery) {
|
||||
rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(indexSearcher);
|
||||
rewritten = ((BooleanQuery) rewritten).rewriteNoScoring();
|
||||
}
|
||||
|
||||
if (rewritten.getClass() == MatchNoDocsQuery.class) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.Arrays;
|
|||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -322,6 +323,75 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2));
|
||||
}
|
||||
|
||||
public void testDeeplyNestedBooleanRewriteShouldClauses() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
Function<Integer, TermQuery> termQueryFunction =
|
||||
(i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
|
||||
int depth = TestUtil.nextInt(random(), 10, 30);
|
||||
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
||||
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
||||
Query expectedQuery =
|
||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
|
||||
Query deepBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(rewriteQuery, Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(1)
|
||||
.build();
|
||||
for (int i = depth; i > 0; i--) {
|
||||
TermQuery tq = termQueryFunction.apply(i);
|
||||
BooleanQuery.Builder bq =
|
||||
new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.add(tq, Occur.SHOULD)
|
||||
.add(deepBuilder, Occur.SHOULD);
|
||||
deepBuilder = bq.build();
|
||||
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
|
||||
if (i == depth) {
|
||||
expectedBq.add(rewriteQuery, Occur.FILTER);
|
||||
} else {
|
||||
expectedBq.add(expectedQuery, Occur.FILTER);
|
||||
}
|
||||
expectedQuery = expectedBq.build();
|
||||
}
|
||||
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
||||
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
|
||||
Query rewritten = searcher.rewrite(bq);
|
||||
assertEquals(expectedQuery, rewritten);
|
||||
// the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
|
||||
// `FILTER`
|
||||
assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites);
|
||||
}
|
||||
|
||||
public void testDeeplyNestedBooleanRewrite() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
Function<Integer, TermQuery> termQueryFunction =
|
||||
(i) -> new TermQuery(new Term("layer[" + i + "]", "foo"));
|
||||
int depth = TestUtil.nextInt(random(), 10, 30);
|
||||
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
||||
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
||||
Query expectedQuery =
|
||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
|
||||
Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
|
||||
for (int i = depth; i > 0; i--) {
|
||||
TermQuery tq = termQueryFunction.apply(i);
|
||||
BooleanQuery.Builder bq =
|
||||
new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
|
||||
deepBuilder = bq.build();
|
||||
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
|
||||
if (i == depth) {
|
||||
expectedBq.add(rewriteQuery, Occur.FILTER);
|
||||
} else {
|
||||
expectedBq.add(expectedQuery, Occur.FILTER);
|
||||
}
|
||||
expectedQuery = expectedBq.build();
|
||||
}
|
||||
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
||||
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
|
||||
Query rewritten = searcher.rewrite(bq);
|
||||
assertEquals(expectedQuery, rewritten);
|
||||
assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites);
|
||||
}
|
||||
|
||||
public void testRemoveMatchAllFilter() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
|
||||
|
@ -864,4 +934,34 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
.build();
|
||||
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
|
||||
}
|
||||
|
||||
// Test query to count number of rewrites for its lifetime.
|
||||
private static final class TestRewriteQuery extends Query {
|
||||
|
||||
private int numRewrites = 0;
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "TestRewriteQuery{rewrites=" + numRewrites + "}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexSearcher indexSearcher) {
|
||||
numRewrites++;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return obj instanceof TestRewriteQuery;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return TestRewriteQuery.class.hashCode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue