mirror of https://github.com/apache/lucene.git
Make MaxScoreBulkScorer repartition scorers when the min competitive increases. (#13800)
MaxScoreBulkScorer partitions scorers into a set of essential scorers and a set of non-essential scorers, depending on the maximum scores produced by scorers and on the current minimum competitive score. An increase of the minimum competitive score has the potential to yield a more favorable partitioning, but repartitioning can also be expensive. In order to repartition when necessary while avoiding to repartition too often, this PR tracks the minimum value of the minimum competitive score that would produce a more favorable partitioning, and repartitions scorers whenever the minimum competitive score exceeds this threshold.
This commit is contained in:
parent
352d85cbe4
commit
cf29597fec
|
@ -47,8 +47,12 @@ Improvements
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
|
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
|
||||||
|
|
||||||
|
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
|
||||||
|
minimum competitive allows for a more favorable partitioning. (Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||||
|
|
|
@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
||||||
// Index of the first scorer that is required, this scorer and all following scorers are required
|
// Index of the first scorer that is required, this scorer and all following scorers are required
|
||||||
// for a document to match.
|
// for a document to match.
|
||||||
int firstRequiredScorer;
|
int firstRequiredScorer;
|
||||||
|
// The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
|
||||||
|
float nextMinCompetitiveScore;
|
||||||
private final long cost;
|
private final long cost;
|
||||||
float minCompetitiveScore;
|
float minCompetitiveScore;
|
||||||
private final Score scorable = new Score();
|
private final Score scorable = new Score();
|
||||||
|
@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
||||||
while (top.doc < outerWindowMax) {
|
while (top.doc < outerWindowMax) {
|
||||||
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
|
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
|
||||||
top = essentialQueue.top();
|
top = essentialQueue.top();
|
||||||
|
if (minCompetitiveScore >= nextMinCompetitiveScore) {
|
||||||
|
// The minimum competitive score increased substantially, so we can now partition scorers
|
||||||
|
// in a more favorable way.
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
outerWindowMin = outerWindowMax;
|
outerWindowMin = Math.min(top.doc, outerWindowMax);
|
||||||
}
|
}
|
||||||
|
|
||||||
return nextCandidate(max);
|
return nextCandidate(max);
|
||||||
|
@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
||||||
});
|
});
|
||||||
double maxScoreSum = 0;
|
double maxScoreSum = 0;
|
||||||
firstEssentialScorer = 0;
|
firstEssentialScorer = 0;
|
||||||
|
nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
|
||||||
for (int i = 0; i < allScorers.length; ++i) {
|
for (int i = 0; i < allScorers.length; ++i) {
|
||||||
final DisiWrapper w = scratch[i];
|
final DisiWrapper w = scratch[i];
|
||||||
double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
|
double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
|
||||||
|
@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
||||||
firstEssentialScorer++;
|
firstEssentialScorer++;
|
||||||
} else {
|
} else {
|
||||||
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
|
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
|
||||||
|
nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
|
||||||
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
||||||
public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||||
|
|
||||||
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
|
|
||||||
|
|
||||||
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
|
|
||||||
super(in);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advanceShallow(int target) throws IOException {
|
|
||||||
return Math.min(target | 0x7FF, in.advanceShallow(target));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
|
||||||
return in.getMaxScore(upTo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void writeDocuments(Directory dir) throws IOException {
|
private void writeDocuments(Directory dir) throws IOException {
|
||||||
try (IndexWriter w =
|
try (IndexWriter w =
|
||||||
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||||
|
@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
|
||||||
Scorer scorer2 =
|
Scorer scorer2 =
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
|
||||||
|
|
||||||
BulkScorer scorer =
|
BulkScorer scorer =
|
||||||
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
||||||
|
@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
|
||||||
Scorer scorer2 =
|
Scorer scorer2 =
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
|
||||||
|
|
||||||
BulkScorer scorer =
|
BulkScorer scorer =
|
||||||
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
||||||
|
@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
|
||||||
Scorer scorer2 =
|
Scorer scorer2 =
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
|
||||||
Scorer scorer3 =
|
Scorer scorer3 =
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
|
|
||||||
|
|
||||||
BulkScorer scorer =
|
BulkScorer scorer =
|
||||||
new MaxScoreBulkScorer(
|
new MaxScoreBulkScorer(
|
||||||
|
@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
|
||||||
Scorer scorer2 =
|
Scorer scorer2 =
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
|
||||||
Scorer scorer3 =
|
Scorer scorer3 =
|
||||||
searcher
|
searcher
|
||||||
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
||||||
.scorer(context);
|
.scorer(context);
|
||||||
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
|
|
||||||
|
|
||||||
BulkScorer scorer =
|
BulkScorer scorer =
|
||||||
new MaxScoreBulkScorer(
|
new MaxScoreBulkScorer(
|
||||||
|
|
Loading…
Reference in New Issue