Propagate minimum competitive score in ReqOptSumScorer. (#13026)

If the required clause doesn't contribute scores, which typically happens if
the required clause is a `FILTER` clause, then the minimum competitive score
can be propagated directly to the optional clause.
This commit is contained in:
Adrien Grand 2024-01-26 15:50:22 +01:00 committed by GitHub
parent 082d318abd
commit 7d35ae4858
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 58 additions and 20 deletions

View File

@ -221,6 +221,11 @@ Optimizations
clause of a boolean query, this helps save work on other required clauses of
the same boolean query. (Adrien Grand)
* GITHUB#13026: ReqOptSumScorer will now propagate minimum competitive scores
to the optional clause if the required clause doesn't score. In practice,
this will help boolean queries that consist of a mix OF FILTER clauses and
SHOULD clauses. (Adrien Grand)
Bug Fixes
---------------------
* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)

View File

@ -36,7 +36,7 @@ class ReqOptSumScorer extends Scorer {
private final TwoPhaseIterator twoPhase;
private float minScore = 0;
private float reqMaxScore;
private final float reqMaxScore;
private boolean optIsRequired;
/**
@ -295,6 +295,15 @@ class ReqOptSumScorer extends Scorer {
// Potentially move to a conjunction
if (reqMaxScore < minScore) {
optIsRequired = true;
if (reqMaxScore == 0) {
// If the required clause doesn't contribute scores, we can propagate the minimum
// competitive score to the optional clause. This happens when the required clause is a
// FILTER clause.
// In theory we could generalize this and set minScore - reqMaxScore as a minimum
// competitive score, but it's unlikely to help in practice unless reqMaxScore is much
// smaller than typical scores of the optional clause.
optScorer.setMinCompetitiveScore(minScore);
}
}
}

View File

@ -41,7 +41,15 @@ import org.apache.lucene.tests.util.LuceneTestCase;
public class TestReqOptSumScorer extends LuceneTestCase {
public void testBasics() throws IOException {
public void testBasicsMust() throws IOException {
doTestBasics(Occur.MUST);
}
public void testBasicsFilter() throws IOException {
doTestBasics(Occur.FILTER);
}
private void doTestBasics(Occur reqOccur) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w =
new RandomIndexWriter(
@ -75,7 +83,7 @@ public class TestReqOptSumScorer extends LuceneTestCase {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(new ConstantScoreQuery(new TermQuery(new Term("f", "foo"))), Occur.MUST)
.add(new ConstantScoreQuery(new TermQuery(new Term("f", "foo"))), reqOccur)
.add(new ConstantScoreQuery(new TermQuery(new Term("f", "bar"))), Occur.SHOULD)
.build();
Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1);
@ -92,9 +100,13 @@ public class TestReqOptSumScorer extends LuceneTestCase {
ss.setTopLevelScoringClause();
scorer = ss.get(Long.MAX_VALUE);
scorer.setMinCompetitiveScore(Math.nextDown(1f));
assertEquals(0, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(0, scorer.iterator().nextDoc());
}
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(2, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(2, scorer.iterator().nextDoc());
}
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
@ -102,8 +114,10 @@ public class TestReqOptSumScorer extends LuceneTestCase {
ss.setTopLevelScoringClause();
scorer = ss.get(Long.MAX_VALUE);
scorer.setMinCompetitiveScore(Math.nextUp(1f));
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
ss = weight.scorerSupplier(context);
@ -111,8 +125,10 @@ public class TestReqOptSumScorer extends LuceneTestCase {
scorer = ss.get(Long.MAX_VALUE);
assertEquals(0, scorer.iterator().nextDoc());
scorer.setMinCompetitiveScore(Math.nextUp(1f));
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
if (reqOccur == Occur.MUST) {
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
reader.close();
@ -235,15 +251,23 @@ public class TestReqOptSumScorer extends LuceneTestCase {
dir.close();
}
public void testRandomFrequentOpt() throws IOException {
doTestRandom(0.5);
public void testMustRandomFrequentOpt() throws IOException {
doTestRandom(Occur.MUST, 0.5);
}
public void testRandomRareOpt() throws IOException {
doTestRandom(0.05);
public void testMustRandomRareOpt() throws IOException {
doTestRandom(Occur.MUST, 0.05);
}
private void doTestRandom(double optFreq) throws IOException {
public void testFilterRandomFrequentOpt() throws IOException {
doTestRandom(Occur.FILTER, 0.5);
}
public void testFilterRandomRareOpt() throws IOException {
doTestRandom(Occur.FILTER, 0.05);
}
private void doTestRandom(Occur reqOccur, double optFreq) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = atLeast(1000);
@ -269,7 +293,7 @@ public class TestReqOptSumScorer extends LuceneTestCase {
Query mustTerm = new TermQuery(new Term("f", "A"));
Query shouldTerm = new TermQuery(new Term("f", "B"));
Query query =
new BooleanQuery.Builder().add(mustTerm, Occur.MUST).add(shouldTerm, Occur.SHOULD).build();
new BooleanQuery.Builder().add(mustTerm, reqOccur).add(shouldTerm, Occur.SHOULD).build();
TopScoreDocCollectorManager collectorManager =
new TopScoreDocCollectorManager(10, Integer.MAX_VALUE);
@ -293,7 +317,7 @@ public class TestReqOptSumScorer extends LuceneTestCase {
{
Query q =
new BooleanQuery.Builder()
.add(new RandomApproximationQuery(mustTerm, random()), Occur.MUST)
.add(new RandomApproximationQuery(mustTerm, random()), reqOccur)
.add(shouldTerm, Occur.SHOULD)
.build();
@ -304,7 +328,7 @@ public class TestReqOptSumScorer extends LuceneTestCase {
q =
new BooleanQuery.Builder()
.add(mustTerm, Occur.MUST)
.add(mustTerm, reqOccur)
.add(new RandomApproximationQuery(shouldTerm, random()), Occur.SHOULD)
.build();
collectorManager = new TopScoreDocCollectorManager(10, 1);
@ -314,7 +338,7 @@ public class TestReqOptSumScorer extends LuceneTestCase {
q =
new BooleanQuery.Builder()
.add(new RandomApproximationQuery(mustTerm, random()), Occur.MUST)
.add(new RandomApproximationQuery(mustTerm, random()), reqOccur)
.add(new RandomApproximationQuery(shouldTerm, random()), Occur.SHOULD)
.build();
collectorManager = new TopScoreDocCollectorManager(10, 1);
@ -348,7 +372,7 @@ public class TestReqOptSumScorer extends LuceneTestCase {
{
query =
new BooleanQuery.Builder()
.add(query, Occur.MUST)
.add(query, reqOccur)
.add(new TermQuery(new Term("f", "C")), Occur.SHOULD)
.build();
@ -356,7 +380,7 @@ public class TestReqOptSumScorer extends LuceneTestCase {
query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("f", "C")), Occur.MUST)
.add(new TermQuery(new Term("f", "C")), reqOccur)
.add(query, Occur.SHOULD)
.build();