mirror of https://github.com/apache/lucene.git
Speed up top-k retrieval on filtered conjunctions. (#13994)
A while back we added an optimized bulk scorer that implements block-max AND, this yielded a good speedup on nightly benchmarks, see annotation `FP` at https://benchmarks.mikemccandless.com/AndHighHigh.html. With this PR, filtered conjunctions now also run through this optimized bulk scorer by doing two things: - It flattens inner conjunctions. This makes queries initially written as something like `+(+term1 +term2) #filter` rewritten to `+term1 +term2 #filter`. - It evaluates queries that have a mix of MUST and FILTER clauses evaluated through `BlockMaxConjunctionBulkScorer` by treating FILTER clauses as scoring clauses that produce a score of 0.
This commit is contained in:
parent
a5d44d89eb
commit
cf27af1416
|
@ -67,6 +67,9 @@ Optimizations
|
||||||
longs that would pack two integers. We are now moving back to integers to be
|
longs that would pack two integers. We are now moving back to integers to be
|
||||||
able to take advantage of 2x more lanes with the vector API. (Adrien Grand)
|
able to take advantage of 2x more lanes with the vector API. (Adrien Grand)
|
||||||
|
|
||||||
|
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
|
||||||
|
(Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||||
|
|
|
@ -268,6 +268,11 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
return new MatchNoDocsQuery("empty BooleanQuery");
|
return new MatchNoDocsQuery("empty BooleanQuery");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Queries with no positive clauses have no matches
|
||||||
|
if (clauses.size() == clauseSets.get(Occur.MUST_NOT).size()) {
|
||||||
|
return new MatchNoDocsQuery("pure negative BooleanQuery");
|
||||||
|
}
|
||||||
|
|
||||||
// optimize 1-clause queries
|
// optimize 1-clause queries
|
||||||
if (clauses.size() == 1) {
|
if (clauses.size() == 1) {
|
||||||
BooleanClause c = clauses.get(0);
|
BooleanClause c = clauses.get(0);
|
||||||
|
@ -283,8 +288,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
// no scoring clauses, so return a score of 0
|
// no scoring clauses, so return a score of 0
|
||||||
return new BoostQuery(new ConstantScoreQuery(query), 0);
|
return new BoostQuery(new ConstantScoreQuery(query), 0);
|
||||||
case MUST_NOT:
|
case MUST_NOT:
|
||||||
// no positive clauses
|
|
||||||
return new MatchNoDocsQuery("pure negative BooleanQuery");
|
|
||||||
default:
|
default:
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
|
@ -539,8 +542,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
||||||
boolean actuallyRewritten = false;
|
boolean actuallyRewritten = false;
|
||||||
for (BooleanClause clause : clauses) {
|
for (BooleanClause clause : clauses) {
|
||||||
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery) {
|
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery innerQuery) {
|
||||||
BooleanQuery innerQuery = (BooleanQuery) clause.query();
|
|
||||||
if (innerQuery.isPureDisjunction()) {
|
if (innerQuery.isPureDisjunction()) {
|
||||||
actuallyRewritten = true;
|
actuallyRewritten = true;
|
||||||
for (BooleanClause innerClause : innerQuery.clauses()) {
|
for (BooleanClause innerClause : innerQuery.clauses()) {
|
||||||
|
@ -558,6 +560,46 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Inline required / prohibited clauses. This helps run filtered conjunctive queries more
|
||||||
|
// efficiently by providing all clauses to the block-max AND scorer.
|
||||||
|
{
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
|
||||||
|
boolean actuallyRewritten = false;
|
||||||
|
for (BooleanClause outerClause : clauses) {
|
||||||
|
if (outerClause.isRequired() && outerClause.query() instanceof BooleanQuery innerQuery) {
|
||||||
|
// Inlining prohibited clauses is not legal if the query is a pure negation, since pure
|
||||||
|
// negations have no matches. It works because the inner BooleanQuery would have first
|
||||||
|
// rewritten to a MatchNoDocsQuery if it only had prohibited clauses.
|
||||||
|
assert innerQuery.getClauses(Occur.MUST_NOT).size() != innerQuery.clauses().size();
|
||||||
|
if (innerQuery.getMinimumNumberShouldMatch() == 0
|
||||||
|
&& innerQuery.getClauses(Occur.SHOULD).isEmpty()) {
|
||||||
|
|
||||||
|
actuallyRewritten = true;
|
||||||
|
for (BooleanClause innerClause : innerQuery) {
|
||||||
|
Occur innerOccur = innerClause.occur();
|
||||||
|
if (innerOccur == Occur.FILTER
|
||||||
|
|| innerOccur == Occur.MUST_NOT
|
||||||
|
|| outerClause.occur() == Occur.MUST) {
|
||||||
|
builder.add(innerClause);
|
||||||
|
} else {
|
||||||
|
assert outerClause.occur() == Occur.FILTER && innerOccur == Occur.MUST;
|
||||||
|
// In this case we need to change the occur of the inner query from MUST to FILTER.
|
||||||
|
builder.add(innerClause.query(), Occur.FILTER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
builder.add(outerClause);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
builder.add(outerClause);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (actuallyRewritten) {
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// SHOULD clause count less than or equal to minimumNumberShouldMatch
|
// SHOULD clause count less than or equal to minimumNumberShouldMatch
|
||||||
// Important(this can only be processed after nested clauses have been flattened)
|
// Important(this can only be processed after nested clauses have been flattened)
|
||||||
{
|
{
|
||||||
|
|
|
@ -333,10 +333,15 @@ final class BooleanScorerSupplier extends ScorerSupplier {
|
||||||
requiredScoring.add(ss.get(leadCost));
|
requiredScoring.add(ss.get(leadCost));
|
||||||
}
|
}
|
||||||
if (scoreMode == ScoreMode.TOP_SCORES
|
if (scoreMode == ScoreMode.TOP_SCORES
|
||||||
&& requiredNoScoring.isEmpty()
|
|
||||||
&& requiredScoring.size() > 1
|
&& requiredScoring.size() > 1
|
||||||
// Only specialize top-level conjunctions for clauses that don't have a two-phase iterator.
|
// Only specialize top-level conjunctions for clauses that don't have a two-phase iterator.
|
||||||
|
&& requiredNoScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)
|
||||||
&& requiredScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)) {
|
&& requiredScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)) {
|
||||||
|
// Turn all filters into scoring clauses with a score of zero, so that
|
||||||
|
// BlockMaxConjunctionBulkScorer is applicable.
|
||||||
|
for (Scorer filter : requiredNoScoring) {
|
||||||
|
requiredScoring.add(new ConstantScoreScorer(0f, ScoreMode.COMPLETE, filter.iterator()));
|
||||||
|
}
|
||||||
return new BlockMaxConjunctionBulkScorer(maxDoc, requiredScoring);
|
return new BlockMaxConjunctionBulkScorer(maxDoc, requiredScoring);
|
||||||
}
|
}
|
||||||
if (scoreMode != ScoreMode.TOP_SCORES
|
if (scoreMode != ScoreMode.TOP_SCORES
|
||||||
|
|
|
@ -330,8 +330,8 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
||||||
int depth = TestUtil.nextInt(random(), 10, 30);
|
int depth = TestUtil.nextInt(random(), 10, 30);
|
||||||
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
||||||
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
||||||
Query expectedQuery =
|
BooleanQuery.Builder expectedQueryBuilder =
|
||||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
|
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
|
||||||
Query deepBuilder =
|
Query deepBuilder =
|
||||||
new BooleanQuery.Builder()
|
new BooleanQuery.Builder()
|
||||||
.add(rewriteQuery, Occur.SHOULD)
|
.add(rewriteQuery, Occur.SHOULD)
|
||||||
|
@ -345,21 +345,19 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
||||||
.add(tq, Occur.SHOULD)
|
.add(tq, Occur.SHOULD)
|
||||||
.add(deepBuilder, Occur.SHOULD);
|
.add(deepBuilder, Occur.SHOULD);
|
||||||
deepBuilder = bq.build();
|
deepBuilder = bq.build();
|
||||||
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
|
expectedQueryBuilder.add(tq, Occur.FILTER);
|
||||||
if (i == depth) {
|
if (i == depth) {
|
||||||
expectedBq.add(rewriteQuery, Occur.FILTER);
|
expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
|
||||||
} else {
|
|
||||||
expectedBq.add(expectedQuery, Occur.FILTER);
|
|
||||||
}
|
}
|
||||||
expectedQuery = expectedBq.build();
|
|
||||||
}
|
}
|
||||||
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
||||||
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
|
Query expectedQuery =
|
||||||
|
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
|
||||||
Query rewritten = searcher.rewrite(bq);
|
Query rewritten = searcher.rewrite(bq);
|
||||||
assertEquals(expectedQuery, rewritten);
|
assertEquals(expectedQuery, rewritten);
|
||||||
// the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
|
// the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
|
||||||
// `FILTER`
|
// `FILTER`, plus the flattening of required clauses
|
||||||
assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites);
|
assertEquals("Depth=" + depth, depth * 2, rewriteQuery.numRewrites);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDeeplyNestedBooleanRewrite() throws IOException {
|
public void testDeeplyNestedBooleanRewrite() throws IOException {
|
||||||
|
@ -369,27 +367,26 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
||||||
int depth = TestUtil.nextInt(random(), 10, 30);
|
int depth = TestUtil.nextInt(random(), 10, 30);
|
||||||
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
|
||||||
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
|
||||||
Query expectedQuery =
|
BooleanQuery.Builder expectedQueryBuilder =
|
||||||
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
|
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
|
||||||
Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
|
Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
|
||||||
for (int i = depth; i > 0; i--) {
|
for (int i = depth; i > 0; i--) {
|
||||||
TermQuery tq = termQueryFunction.apply(i);
|
TermQuery tq = termQueryFunction.apply(i);
|
||||||
BooleanQuery.Builder bq =
|
BooleanQuery.Builder bq =
|
||||||
new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
|
new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
|
||||||
deepBuilder = bq.build();
|
deepBuilder = bq.build();
|
||||||
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
|
expectedQueryBuilder.add(tq, Occur.FILTER);
|
||||||
if (i == depth) {
|
if (i == depth) {
|
||||||
expectedBq.add(rewriteQuery, Occur.FILTER);
|
expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
|
||||||
} else {
|
|
||||||
expectedBq.add(expectedQuery, Occur.FILTER);
|
|
||||||
}
|
}
|
||||||
expectedQuery = expectedBq.build();
|
|
||||||
}
|
}
|
||||||
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
|
||||||
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
|
Query expectedQuery =
|
||||||
|
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
|
||||||
Query rewritten = searcher.rewrite(bq);
|
Query rewritten = searcher.rewrite(bq);
|
||||||
assertEquals(expectedQuery, rewritten);
|
assertEquals(expectedQuery, rewritten);
|
||||||
assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites);
|
// `depth` rewrites because of the flattening
|
||||||
|
assertEquals("Depth=" + depth, depth, rewriteQuery.numRewrites);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRemoveMatchAllFilter() throws IOException {
|
public void testRemoveMatchAllFilter() throws IOException {
|
||||||
|
@ -691,6 +688,110 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
||||||
assertSame(query, searcher.rewrite(query));
|
assertSame(query, searcher.rewrite(query));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFlattenInnerConjunctions() throws IOException {
|
||||||
|
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||||
|
|
||||||
|
Query inner =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(inner, Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
Query expectedRewritten =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||||
|
|
||||||
|
query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.setMinimumNumberShouldMatch(0)
|
||||||
|
.add(inner, Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
expectedRewritten =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.setMinimumNumberShouldMatch(0)
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||||
|
|
||||||
|
query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(inner, Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
|
||||||
|
.build();
|
||||||
|
expectedRewritten =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
|
||||||
|
.build();
|
||||||
|
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||||
|
|
||||||
|
inner =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(inner, Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
expectedRewritten =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||||
|
|
||||||
|
inner =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(inner, Occur.FILTER)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
expectedRewritten =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||||
|
|
||||||
|
inner =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
|
||||||
|
.build();
|
||||||
|
query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(inner, Occur.FILTER)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
expectedRewritten =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
|
||||||
|
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
|
||||||
|
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||||
|
.build();
|
||||||
|
assertEquals(expectedRewritten, searcher.rewrite(query));
|
||||||
|
}
|
||||||
|
|
||||||
public void testDiscardShouldClauses() throws IOException {
|
public void testDiscardShouldClauses() throws IOException {
|
||||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue