Make MaxScoreBulkScorer repartition scorers when the min competitive increases. (#13800)

MaxScoreBulkScorer partitions scorers into a set of essential scorers and a set
of non-essential scorers, depending on the maximum scores produced by scorers
and on the current minimum competitive score. An increase of the minimum
competitive score has the potential to yield a more favorable partitioning, but
repartitioning can also be expensive.

In order to repartition when necessary while avoiding to repartition too often,
this PR tracks the minimum value of the minimum competitive score that would
produce a more favorable partitioning, and repartitions scorers whenever the
minimum competitive score exceeds this threshold.
This commit is contained in:
Adrien Grand 2024-10-15 15:20:34 +02:00 committed by GitHub
parent 352d85cbe4
commit cf29597fec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 28 deletions

View File

@ -47,8 +47,12 @@ Improvements
Optimizations Optimizations
--------------------- ---------------------
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao) * GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
minimum competitive allows for a more favorable partitioning. (Adrien Grand)
Bug Fixes Bug Fixes
--------------------- ---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended

View File

@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
// Index of the first scorer that is required, this scorer and all following scorers are required // Index of the first scorer that is required, this scorer and all following scorers are required
// for a document to match. // for a document to match.
int firstRequiredScorer; int firstRequiredScorer;
// The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
float nextMinCompetitiveScore;
private final long cost; private final long cost;
float minCompetitiveScore; float minCompetitiveScore;
private final Score scorable = new Score(); private final Score scorable = new Score();
@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
while (top.doc < outerWindowMax) { while (top.doc < outerWindowMax) {
scoreInnerWindow(collector, acceptDocs, outerWindowMax); scoreInnerWindow(collector, acceptDocs, outerWindowMax);
top = essentialQueue.top(); top = essentialQueue.top();
if (minCompetitiveScore >= nextMinCompetitiveScore) {
// The minimum competitive score increased substantially, so we can now partition scorers
// in a more favorable way.
break;
}
} }
outerWindowMin = outerWindowMax; outerWindowMin = Math.min(top.doc, outerWindowMax);
} }
return nextCandidate(max); return nextCandidate(max);
@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
}); });
double maxScoreSum = 0; double maxScoreSum = 0;
firstEssentialScorer = 0; firstEssentialScorer = 0;
nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
for (int i = 0; i < allScorers.length; ++i) { for (int i = 0; i < allScorers.length; ++i) {
final DisiWrapper w = scratch[i]; final DisiWrapper w = scratch[i];
double newMaxScoreSum = maxScoreSum + w.maxWindowScore; double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
firstEssentialScorer++; firstEssentialScorer++;
} else { } else {
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w; allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
} }
} }

View File

@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept // These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestMaxScoreBulkScorer extends LuceneTestCase { public class TestMaxScoreBulkScorer extends LuceneTestCase {
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
super(in);
}
@Override
public int advanceShallow(int target) throws IOException {
return Math.min(target | 0x7FF, in.advanceShallow(target));
}
@Override
public float getMaxScore(int upTo) throws IOException {
return in.getMaxScore(upTo);
}
}
private void writeDocuments(Directory dir) throws IOException { private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w = try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 = Scorer scorer3 =
searcher searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer( new MaxScoreBulkScorer(
@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 = Scorer scorer3 =
searcher searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer( new MaxScoreBulkScorer(