From 59b17452aaf04fc6a6f54f70cc4d1307f077e875 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 12 Jan 2023 10:50:05 -0500 Subject: [PATCH] Fix exponential runtime for Boolean#rewrite (#12072) When #672 was introduced, it added many nice rewrite optimizations. However, in the case when there are many multiple nested Boolean queries under a top level Boolean#filter clause, its runtime grows exponentially. The key issue was how the BooleanQuery#rewriteNoScoring redirected yet again to the ConstantScoreQuery#rewrite. This causes BooleanQuery#rewrite to be called again recursively , even though it was previously called in ConstantScoreQuery#rewrite, and THEN BooleanQuery#rewriteNoScoring is called again, recursively. This causes exponential growth in rewrite time based on query depth. The change here hopes to short-circuit that and only grow (near) linearly by calling BooleanQuery#rewriteNoScoring directly, instead if attempting to redirect through ConstantScoreQuery#rewrite. closes: #12069 --- lucene/CHANGES.txt | 3 + .../apache/lucene/search/BooleanQuery.java | 13 ++- .../lucene/search/ConstantScoreQuery.java | 2 +- .../lucene/search/TestBooleanRewrites.java | 100 ++++++++++++++++++ 4 files changed, 115 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fdc9d05734b..fd7c80c3b0e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -234,6 +234,9 @@ Bug Fixes * GITHUB#12046: Out of boundary in CombinedFieldQuery#addTerm. (Lu Xugang) +* GITHUB#12072: Fix exponential runtime for nested BooleanQuery#rewrite when a + BooleanClause is non-scoring. (Ben Trent) + Optimizations --------------------- * GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 0354280eb01..07823ae5c4d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -192,7 +192,7 @@ public class BooleanQuery extends Query implements Iterable { // Utility method for rewriting BooleanQuery when scores are not needed. // This is called from ConstantScoreQuery#rewrite - BooleanQuery rewriteNoScoring(IndexSearcher indexSearcher) throws IOException { + BooleanQuery rewriteNoScoring() { boolean actuallyRewritten = false; BooleanQuery.Builder newQuery = new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); @@ -203,10 +203,19 @@ public class BooleanQuery extends Query implements Iterable { for (BooleanClause clause : clauses) { Query query = clause.getQuery(); - Query rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher); + // NOTE: rewritingNoScoring() should not call rewrite(), otherwise this + // method could run in exponential time with the depth of the query as + // every new level would rewrite 2x more than its parent level. + Query rewritten = query; + if (rewritten instanceof BoostQuery) { + rewritten = ((BoostQuery) rewritten).getQuery(); + } if (rewritten instanceof ConstantScoreQuery) { rewritten = ((ConstantScoreQuery) rewritten).getQuery(); } + if (rewritten instanceof BooleanQuery) { + rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(); + } BooleanClause.Occur occur = clause.getOccur(); if (occur == Occur.SHOULD && keepShould == false) { // ignore clause diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 1984cacccbe..48f763e1646 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -50,7 +50,7 @@ public final class ConstantScoreQuery extends Query { } else if (rewritten instanceof ConstantScoreQuery) { rewritten = ((ConstantScoreQuery) rewritten).getQuery(); } else if (rewritten instanceof BooleanQuery) { - rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(indexSearcher); + rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(); } if (rewritten.getClass() == MatchNoDocsQuery.class) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 4f2e3a829b8..87b8068d2f1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -21,6 +21,7 @@ import java.util.Arrays; import java.util.Map; import java.util.Random; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -322,6 +323,75 @@ public class TestBooleanRewrites extends LuceneTestCase { assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2)); } + public void testDeeplyNestedBooleanRewriteShouldClauses() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + Function termQueryFunction = + (i) -> new TermQuery(new Term("layer[" + i + "]", "foo")); + int depth = TestUtil.nextInt(random(), 10, 30); + TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery(); + TestRewriteQuery rewriteQuery = new TestRewriteQuery(); + Query expectedQuery = + new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build(); + Query deepBuilder = + new BooleanQuery.Builder() + .add(rewriteQuery, Occur.SHOULD) + .setMinimumNumberShouldMatch(1) + .build(); + for (int i = depth; i > 0; i--) { + TermQuery tq = termQueryFunction.apply(i); + BooleanQuery.Builder bq = + new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(2) + .add(tq, Occur.SHOULD) + .add(deepBuilder, Occur.SHOULD); + deepBuilder = bq.build(); + BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER); + if (i == depth) { + expectedBq.add(rewriteQuery, Occur.FILTER); + } else { + expectedBq.add(expectedQuery, Occur.FILTER); + } + expectedQuery = expectedBq.build(); + } + BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build(); + expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f); + Query rewritten = searcher.rewrite(bq); + assertEquals(expectedQuery, rewritten); + // the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then + // `FILTER` + assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites); + } + + public void testDeeplyNestedBooleanRewrite() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + Function termQueryFunction = + (i) -> new TermQuery(new Term("layer[" + i + "]", "foo")); + int depth = TestUtil.nextInt(random(), 10, 30); + TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery(); + TestRewriteQuery rewriteQuery = new TestRewriteQuery(); + Query expectedQuery = + new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build(); + Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build(); + for (int i = depth; i > 0; i--) { + TermQuery tq = termQueryFunction.apply(i); + BooleanQuery.Builder bq = + new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST); + deepBuilder = bq.build(); + BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER); + if (i == depth) { + expectedBq.add(rewriteQuery, Occur.FILTER); + } else { + expectedBq.add(expectedQuery, Occur.FILTER); + } + expectedQuery = expectedBq.build(); + } + BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build(); + expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f); + Query rewritten = searcher.rewrite(bq); + assertEquals(expectedQuery, rewritten); + assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites); + } + public void testRemoveMatchAllFilter() throws IOException { IndexSearcher searcher = newSearcher(new MultiReader()); @@ -864,4 +934,34 @@ public class TestBooleanRewrites extends LuceneTestCase { .build(); assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query)); } + + // Test query to count number of rewrites for its lifetime. + private static final class TestRewriteQuery extends Query { + + private int numRewrites = 0; + + @Override + public String toString(String field) { + return "TestRewriteQuery{rewrites=" + numRewrites + "}"; + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) { + numRewrites++; + return this; + } + + @Override + public void visit(QueryVisitor visitor) {} + + @Override + public boolean equals(Object obj) { + return obj instanceof TestRewriteQuery; + } + + @Override + public int hashCode() { + return TestRewriteQuery.class.hashCode(); + } + } }