Speed up top-k retrieval on filtered conjunctions. (#13994)

A while back we added an optimized bulk scorer that implements block-max AND,
this yielded a good speedup on nightly benchmarks, see annotation `FP` at
https://benchmarks.mikemccandless.com/AndHighHigh.html. With this PR, filtered
conjunctions now also run through this optimized bulk scorer by doing two
things:
 - It flattens inner conjunctions. This makes queries initially written as
   something like `+(+term1 +term2) #filter` rewritten to
   `+term1 +term2 #filter`.
 - It evaluates queries that have a mix of MUST and FILTER clauses evaluated
   through `BlockMaxConjunctionBulkScorer` by treating FILTER clauses as
   scoring clauses that produce a score of 0.
This commit is contained in:
Adrien Grand 2024-11-18 08:51:35 +01:00
parent a5d44d89eb
commit cf27af1416
4 changed files with 175 additions and 24 deletions

View File

@ -67,6 +67,9 @@ Optimizations
longs that would pack two integers. We are now moving back to integers to be longs that would pack two integers. We are now moving back to integers to be
able to take advantage of 2x more lanes with the vector API. (Adrien Grand) able to take advantage of 2x more lanes with the vector API. (Adrien Grand)
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
(Adrien Grand)
Bug Fixes Bug Fixes
--------------------- ---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended

View File

@ -268,6 +268,11 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return new MatchNoDocsQuery("empty BooleanQuery"); return new MatchNoDocsQuery("empty BooleanQuery");
} }
// Queries with no positive clauses have no matches
if (clauses.size() == clauseSets.get(Occur.MUST_NOT).size()) {
return new MatchNoDocsQuery("pure negative BooleanQuery");
}
// optimize 1-clause queries // optimize 1-clause queries
if (clauses.size() == 1) { if (clauses.size() == 1) {
BooleanClause c = clauses.get(0); BooleanClause c = clauses.get(0);
@ -283,8 +288,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
// no scoring clauses, so return a score of 0 // no scoring clauses, so return a score of 0
return new BoostQuery(new ConstantScoreQuery(query), 0); return new BoostQuery(new ConstantScoreQuery(query), 0);
case MUST_NOT: case MUST_NOT:
// no positive clauses
return new MatchNoDocsQuery("pure negative BooleanQuery");
default: default:
throw new AssertionError(); throw new AssertionError();
} }
@ -539,8 +542,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch); builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false; boolean actuallyRewritten = false;
for (BooleanClause clause : clauses) { for (BooleanClause clause : clauses) {
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery) { if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery innerQuery) {
BooleanQuery innerQuery = (BooleanQuery) clause.query();
if (innerQuery.isPureDisjunction()) { if (innerQuery.isPureDisjunction()) {
actuallyRewritten = true; actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery.clauses()) { for (BooleanClause innerClause : innerQuery.clauses()) {
@ -558,6 +560,46 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
} }
} }
// Inline required / prohibited clauses. This helps run filtered conjunctive queries more
// efficiently by providing all clauses to the block-max AND scorer.
{
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false;
for (BooleanClause outerClause : clauses) {
if (outerClause.isRequired() && outerClause.query() instanceof BooleanQuery innerQuery) {
// Inlining prohibited clauses is not legal if the query is a pure negation, since pure
// negations have no matches. It works because the inner BooleanQuery would have first
// rewritten to a MatchNoDocsQuery if it only had prohibited clauses.
assert innerQuery.getClauses(Occur.MUST_NOT).size() != innerQuery.clauses().size();
if (innerQuery.getMinimumNumberShouldMatch() == 0
&& innerQuery.getClauses(Occur.SHOULD).isEmpty()) {
actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery) {
Occur innerOccur = innerClause.occur();
if (innerOccur == Occur.FILTER
|| innerOccur == Occur.MUST_NOT
|| outerClause.occur() == Occur.MUST) {
builder.add(innerClause);
} else {
assert outerClause.occur() == Occur.FILTER && innerOccur == Occur.MUST;
// In this case we need to change the occur of the inner query from MUST to FILTER.
builder.add(innerClause.query(), Occur.FILTER);
}
}
} else {
builder.add(outerClause);
}
} else {
builder.add(outerClause);
}
}
if (actuallyRewritten) {
return builder.build();
}
}
// SHOULD clause count less than or equal to minimumNumberShouldMatch // SHOULD clause count less than or equal to minimumNumberShouldMatch
// Important(this can only be processed after nested clauses have been flattened) // Important(this can only be processed after nested clauses have been flattened)
{ {

View File

@ -333,10 +333,15 @@ final class BooleanScorerSupplier extends ScorerSupplier {
requiredScoring.add(ss.get(leadCost)); requiredScoring.add(ss.get(leadCost));
} }
if (scoreMode == ScoreMode.TOP_SCORES if (scoreMode == ScoreMode.TOP_SCORES
&& requiredNoScoring.isEmpty()
&& requiredScoring.size() > 1 && requiredScoring.size() > 1
// Only specialize top-level conjunctions for clauses that don't have a two-phase iterator. // Only specialize top-level conjunctions for clauses that don't have a two-phase iterator.
&& requiredNoScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)
&& requiredScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)) { && requiredScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)) {
// Turn all filters into scoring clauses with a score of zero, so that
// BlockMaxConjunctionBulkScorer is applicable.
for (Scorer filter : requiredNoScoring) {
requiredScoring.add(new ConstantScoreScorer(0f, ScoreMode.COMPLETE, filter.iterator()));
}
return new BlockMaxConjunctionBulkScorer(maxDoc, requiredScoring); return new BlockMaxConjunctionBulkScorer(maxDoc, requiredScoring);
} }
if (scoreMode != ScoreMode.TOP_SCORES if (scoreMode != ScoreMode.TOP_SCORES

View File

@ -330,8 +330,8 @@ public class TestBooleanRewrites extends LuceneTestCase {
int depth = TestUtil.nextInt(random(), 10, 30); int depth = TestUtil.nextInt(random(), 10, 30);
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery(); TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
TestRewriteQuery rewriteQuery = new TestRewriteQuery(); TestRewriteQuery rewriteQuery = new TestRewriteQuery();
Query expectedQuery = BooleanQuery.Builder expectedQueryBuilder =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build(); new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
Query deepBuilder = Query deepBuilder =
new BooleanQuery.Builder() new BooleanQuery.Builder()
.add(rewriteQuery, Occur.SHOULD) .add(rewriteQuery, Occur.SHOULD)
@ -345,21 +345,19 @@ public class TestBooleanRewrites extends LuceneTestCase {
.add(tq, Occur.SHOULD) .add(tq, Occur.SHOULD)
.add(deepBuilder, Occur.SHOULD); .add(deepBuilder, Occur.SHOULD);
deepBuilder = bq.build(); deepBuilder = bq.build();
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER); expectedQueryBuilder.add(tq, Occur.FILTER);
if (i == depth) { if (i == depth) {
expectedBq.add(rewriteQuery, Occur.FILTER); expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
} else {
expectedBq.add(expectedQuery, Occur.FILTER);
} }
expectedQuery = expectedBq.build();
} }
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build(); BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f); Query expectedQuery =
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
Query rewritten = searcher.rewrite(bq); Query rewritten = searcher.rewrite(bq);
assertEquals(expectedQuery, rewritten); assertEquals(expectedQuery, rewritten);
// the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then // the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
// `FILTER` // `FILTER`, plus the flattening of required clauses
assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites); assertEquals("Depth=" + depth, depth * 2, rewriteQuery.numRewrites);
} }
public void testDeeplyNestedBooleanRewrite() throws IOException { public void testDeeplyNestedBooleanRewrite() throws IOException {
@ -369,27 +367,26 @@ public class TestBooleanRewrites extends LuceneTestCase {
int depth = TestUtil.nextInt(random(), 10, 30); int depth = TestUtil.nextInt(random(), 10, 30);
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery(); TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
TestRewriteQuery rewriteQuery = new TestRewriteQuery(); TestRewriteQuery rewriteQuery = new TestRewriteQuery();
Query expectedQuery = BooleanQuery.Builder expectedQueryBuilder =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build(); new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build(); Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
for (int i = depth; i > 0; i--) { for (int i = depth; i > 0; i--) {
TermQuery tq = termQueryFunction.apply(i); TermQuery tq = termQueryFunction.apply(i);
BooleanQuery.Builder bq = BooleanQuery.Builder bq =
new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST); new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
deepBuilder = bq.build(); deepBuilder = bq.build();
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER); expectedQueryBuilder.add(tq, Occur.FILTER);
if (i == depth) { if (i == depth) {
expectedBq.add(rewriteQuery, Occur.FILTER); expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
} else {
expectedBq.add(expectedQuery, Occur.FILTER);
} }
expectedQuery = expectedBq.build();
} }
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build(); BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f); Query expectedQuery =
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
Query rewritten = searcher.rewrite(bq); Query rewritten = searcher.rewrite(bq);
assertEquals(expectedQuery, rewritten); assertEquals(expectedQuery, rewritten);
assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites); // `depth` rewrites because of the flattening
assertEquals("Depth=" + depth, depth, rewriteQuery.numRewrites);
} }
public void testRemoveMatchAllFilter() throws IOException { public void testRemoveMatchAllFilter() throws IOException {
@ -691,6 +688,110 @@ public class TestBooleanRewrites extends LuceneTestCase {
assertSame(query, searcher.rewrite(query)); assertSame(query, searcher.rewrite(query));
} }
public void testFlattenInnerConjunctions() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
Query inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.build();
Query query =
new BooleanQuery.Builder()
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
.build();
Query expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
query =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(0)
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(0)
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
query =
new BooleanQuery.Builder()
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.build();
query =
new BooleanQuery.Builder()
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.build();
query =
new BooleanQuery.Builder()
.add(inner, Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
.build();
query =
new BooleanQuery.Builder()
.add(inner, Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
}
public void testDiscardShouldClauses() throws IOException { public void testDiscardShouldClauses() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader()); IndexSearcher searcher = newSearcher(new MultiReader());