Make MaxScoreBulkScorer repartition scorers when the min competitive increases. (#13800)

MaxScoreBulkScorer partitions scorers into a set of essential scorers and a set
of non-essential scorers, depending on the maximum scores produced by scorers
and on the current minimum competitive score. An increase of the minimum
competitive score has the potential to yield a more favorable partitioning, but
repartitioning can also be expensive.

In order to repartition when necessary while avoiding to repartition too often,
this PR tracks the minimum value of the minimum competitive score that would
produce a more favorable partitioning, and repartitions scorers whenever the
minimum competitive score exceeds this threshold.
This commit is contained in:
Adrien Grand 2024-10-15 15:20:34 +02:00
parent b8bfffa368
commit 8ad66899bc
3 changed files with 14 additions and 28 deletions

View File

@ -21,8 +21,12 @@ Improvements
Optimizations
---------------------
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
minimum competitive allows for a more favorable partitioning. (Adrien Grand)
Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended

View File

@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
// Index of the first scorer that is required, this scorer and all following scorers are required
// for a document to match.
int firstRequiredScorer;
// The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
float nextMinCompetitiveScore;
private final long cost;
float minCompetitiveScore;
private final Score scorable = new Score();
@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
while (top.doc < outerWindowMax) {
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
top = essentialQueue.top();
if (minCompetitiveScore >= nextMinCompetitiveScore) {
// The minimum competitive score increased substantially, so we can now partition scorers
// in a more favorable way.
break;
}
}
outerWindowMin = outerWindowMax;
outerWindowMin = Math.min(top.doc, outerWindowMax);
}
return nextCandidate(max);
@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
});
double maxScoreSum = 0;
firstEssentialScorer = 0;
nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
for (int i = 0; i < allScorers.length; ++i) {
final DisiWrapper w = scratch[i];
double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
firstEssentialScorer++;
} else {
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
}
}

View File

@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestMaxScoreBulkScorer extends LuceneTestCase {
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
super(in);
}
@Override
public int advanceShallow(int target) throws IOException {
return Math.min(target | 0x7FF, in.advanceShallow(target));
}
@Override
public float getMaxScore(int upTo) throws IOException {
return in.getMaxScore(upTo);
}
}
private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
new MaxScoreBulkScorer(
@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
new MaxScoreBulkScorer(