mirror of https://github.com/apache/lucene.git
Speed up top-k retrieval on filtered conjunctions. (#13994)
A while back we added an optimized bulk scorer that implements block-max AND, this yielded a good speedup on nightly benchmarks, see annotation `FP` at https://benchmarks.mikemccandless.com/AndHighHigh.html. With this PR, filtered conjunctions now also run through this optimized bulk scorer by doing two things: - It flattens inner conjunctions. This makes queries initially written as something like `+(+term1 +term2) #filter` rewritten to `+term1 +term2 #filter`. - It evaluates queries that have a mix of MUST and FILTER clauses evaluated through `BlockMaxConjunctionBulkScorer` by treating FILTER clauses as scoring clauses that produce a score of 0.
This commit is contained in:
parent
a0e1eeefeb
commit
4400d55297
|
@ -93,6 +93,9 @@ Optimizations
|
|||
longs that would pack two integers. We are now moving back to integers to be
|
||||
able to take advantage of 2x more lanes with the vector API. (Adrien Grand)
|
||||
|
||||
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
|
||||
(Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||
|
|
|
@ -268,6 +268,11 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
return new MatchNoDocsQuery("empty BooleanQuery");
|
||||
}
|
||||
|
||||
// Queries with no positive clauses have no matches
|
||||
if (clauses.size() == clauseSets.get(Occur.MUST_NOT).size()) {
|
||||
return new MatchNoDocsQuery("pure negative BooleanQuery");
|
||||
}
|
||||
|
||||
// optimize 1-clause queries
|
||||
if (clauses.size() == 1) {
|
||||
BooleanClause c = clauses.get(0);
|
||||
|
@ -283,8 +288,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
// no scoring clauses, so return a score of 0
|
||||
return new BoostQuery(new ConstantScoreQuery(query), 0);
|
||||
case MUST_NOT:
|
||||
// no positive clauses
|
||||
return new MatchNoDocsQuery("pure negative BooleanQuery");
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
@ -539,8 +542,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
||||
boolean actuallyRewritten = false;
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery) {
|
||||
BooleanQuery innerQuery = (BooleanQuery) clause.query();
|
||||
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery innerQuery) {
|
||||
if (innerQuery.isPureDisjunction()) {
|
||||
actuallyRewritten = true;
|
||||
for (BooleanClause innerClause : innerQuery.clauses()) {
|
||||
|
@ -558,6 +560,46 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
}
|
||||
}
|
||||
|
||||
// Inline required / prohibited clauses. This helps run filtered conjunctive queries more
|
||||
// efficiently by providing all clauses to the block-max AND scorer.
|
||||
{
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
||||
boolean actuallyRewritten = false;
|
||||
for (BooleanClause outerClause : clauses) {
|
||||
if (outerClause.isRequired() && outerClause.query() instanceof BooleanQuery innerQuery) {
|
||||
// Inlining prohibited clauses is not legal if the query is a pure negation, since pure
|
||||
// negations have no matches. It works because the inner BooleanQuery would have first
|
||||
// rewritten to a MatchNoDocsQuery if it only had prohibited clauses.
|
||||
assert innerQuery.getClauses(Occur.MUST_NOT).size() != innerQuery.clauses().size();
|
||||
if (innerQuery.getMinimumNumberShouldMatch() == 0
|
||||
&& innerQuery.getClauses(Occur.SHOULD).isEmpty()) {
|
||||
|
||||
actuallyRewritten = true;
|
||||
for (BooleanClause innerClause : innerQuery) {
|
||||
Occur innerOccur = innerClause.occur();
|
||||
if (innerOccur == Occur.FILTER
|
||||
|| innerOccur == Occur.MUST_NOT
|
||||
|| outerClause.occur() == Occur.MUST) {
|
||||
builder.add(innerClause);
|
||||
} else {
|
||||
assert outerClause.occur() == Occur.FILTER && innerOccur == Occur.MUST;
|
||||
// In this case we need to change the occur of the inner query from MUST to FILTER.
|
||||
builder.add(innerClause.query(), Occur.FILTER);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
builder.add(outerClause);
|
||||
}
|
||||
} else {
|
||||
builder.add(outerClause);
|
||||
}
|
||||
}
|
||||
if (actuallyRewritten) {
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
|
||||
// SHOULD clause count less than or equal to minimumNumberShouldMatch
|
||||
// Important(this can only be processed after nested clauses have been flattened)
|
||||
{
|
||||
|
|
|
@ -333,10 +333,15 @@ final class BooleanScorerSupplier extends ScorerSupplier {
|
|||
requiredScoring.add(ss.get(leadCost));
|
||||
}
|
||||
if (scoreMode == ScoreMode.TOP_SCORES
|
||||
&& requiredNoScoring.isEmpty()
|
||||
&& requiredScoring.size() > 1
|
||||
// Only specialize top-level conjunctions for clauses that don't have a two-phase iterator.
|
||||
&& requiredNoScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)
|
||||
&& requiredScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)) {
|
||||
// Turn all filters into scoring clauses with a score of zero, so that
|
||||
// BlockMaxConjunctionBulkScorer is applicable.
|
||||
for (Scorer filter : requiredNoScoring) {
|
||||
requiredScoring.add(new ConstantScoreScorer(0f, ScoreMode.COMPLETE, filter.iterator()));
|
||||
}
|
||||
return new BlockMaxConjunctionBulkScorer(maxDoc, requiredScoring);
|
||||
}
|
||||
if (scoreMode != ScoreMode.TOP_SCORES
|
||||
|
|
|
@ -330,8 +330,8 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
int depth = TestUtil.nextInt(random(), 10, 30);
|
||||
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
||||
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
||||
Query expectedQuery =
|
||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
|
||||
BooleanQuery.Builder expectedQueryBuilder =
|
||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
|
||||
Query deepBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(rewriteQuery, Occur.SHOULD)
|
||||
|
@ -345,21 +345,19 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
.add(tq, Occur.SHOULD)
|
||||
.add(deepBuilder, Occur.SHOULD);
|
||||
deepBuilder = bq.build();
|
||||
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
|
||||
expectedQueryBuilder.add(tq, Occur.FILTER);
|
||||
if (i == depth) {
|
||||
expectedBq.add(rewriteQuery, Occur.FILTER);
|
||||
} else {
|
||||
expectedBq.add(expectedQuery, Occur.FILTER);
|
||||
expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
|
||||
}
|
||||
expectedQuery = expectedBq.build();
|
||||
}
|
||||
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
||||
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
|
||||
Query expectedQuery =
|
||||
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
|
||||
Query rewritten = searcher.rewrite(bq);
|
||||
assertEquals(expectedQuery, rewritten);
|
||||
// the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
|
||||
// `FILTER`
|
||||
assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites);
|
||||
// `FILTER`, plus the flattening of required clauses
|
||||
assertEquals("Depth=" + depth, depth * 2, rewriteQuery.numRewrites);
|
||||
}
|
||||
|
||||
public void testDeeplyNestedBooleanRewrite() throws IOException {
|
||||
|
@ -369,27 +367,26 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
int depth = TestUtil.nextInt(random(), 10, 30);
|
||||
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
||||
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
||||
Query expectedQuery =
|
||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
|
||||
BooleanQuery.Builder expectedQueryBuilder =
|
||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
|
||||
Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
|
||||
for (int i = depth; i > 0; i--) {
|
||||
TermQuery tq = termQueryFunction.apply(i);
|
||||
BooleanQuery.Builder bq =
|
||||
new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
|
||||
deepBuilder = bq.build();
|
||||
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
|
||||
expectedQueryBuilder.add(tq, Occur.FILTER);
|
||||
if (i == depth) {
|
||||
expectedBq.add(rewriteQuery, Occur.FILTER);
|
||||
} else {
|
||||
expectedBq.add(expectedQuery, Occur.FILTER);
|
||||
expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
|
||||
}
|
||||
expectedQuery = expectedBq.build();
|
||||
}
|
||||
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
||||
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
|
||||
Query expectedQuery =
|
||||
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
|
||||
Query rewritten = searcher.rewrite(bq);
|
||||
assertEquals(expectedQuery, rewritten);
|
||||
assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites);
|
||||
// `depth` rewrites because of the flattening
|
||||
assertEquals("Depth=" + depth, depth, rewriteQuery.numRewrites);
|
||||
}
|
||||
|
||||
public void testRemoveMatchAllFilter() throws IOException {
|
||||
|
@ -691,6 +688,110 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
assertSame(query, searcher.rewrite(query));
|
||||
}
|
||||
|
||||
public void testFlattenInnerConjunctions() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
|
||||
Query inner =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||
.build();
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(inner, Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
|
||||
.build();
|
||||
Query expectedRewritten =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(0)
|
||||
.add(inner, Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.build();
|
||||
expectedRewritten =
|
||||
new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(0)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(inner, Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
|
||||
.build();
|
||||
expectedRewritten =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
inner =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||
.build();
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(inner, Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
expectedRewritten =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
inner =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||
.build();
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(inner, Occur.FILTER)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
expectedRewritten =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
|
||||
inner =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
|
||||
.build();
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(inner, Occur.FILTER)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
expectedRewritten =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
|
||||
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||
}
|
||||
|
||||
public void testDiscardShouldClauses() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
|
||||
|
|
Loading…
Reference in New Issue