mirror of https://github.com/apache/lucene.git
Compute better windows in MaxScoreBulkScorer. (#12593)
MaxScoreBulkScorer computes windows based on the set of clauses that were essential in the *previous* window. This usually works well as the set of essential clauses tends to be stable over time, but there are cases when clauses get swapped between essential and non-essential clauses, and computing windows based on the previous window can lead to suboptimal choices. This PR creates a first proposal for the next score window using essential clauses from the previous window, and then creates a second proposal once scorers have been partitioned and their max scores have been updated. If this second proposal results in a smaller window, it gets used. On one particular query (`the incredibles`) and a reordered index with BP (which increases chances that scorers move from essential to non-essential or vice-versa), this change yielded a 2.3x speedup.
This commit is contained in:
parent
28f0885bdf
commit
91019d685a
|
@ -76,12 +76,32 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
int outerWindowMin = min;
|
||||
outer:
|
||||
while (outerWindowMin < max) {
|
||||
int outerWindowMax = updateMaxWindowScores(outerWindowMin);
|
||||
int outerWindowMax = computeOuterWindowMax(outerWindowMin);
|
||||
outerWindowMax = Math.min(outerWindowMax, max);
|
||||
if (partitionScorers() == false) {
|
||||
// No matches in this window
|
||||
outerWindowMin = outerWindowMax;
|
||||
continue;
|
||||
|
||||
while (true) {
|
||||
updateMaxWindowScores(outerWindowMin, outerWindowMax);
|
||||
if (partitionScorers() == false) {
|
||||
// No matches in this window
|
||||
outerWindowMin = outerWindowMax;
|
||||
continue outer;
|
||||
}
|
||||
|
||||
// There is a dependency between windows and maximum scores, as we compute windows based on
|
||||
// maximum scores and maximum scores based on windows.
|
||||
// So the approach consists of starting by computing a window based on the set of essential
|
||||
// scorers from the _previous_ window and then iteratively recompute maximum scores and
|
||||
// windows as long as the window size decreases.
|
||||
// In general the set of essential scorers is rather stable over time so this would exit
|
||||
// after a single iteration, but there is a change that some scorers got swapped between the
|
||||
// set of essential and non-essential scorers, in which case there may be multiple
|
||||
// iterations of this loop.
|
||||
|
||||
int newOuterWindowMax = computeOuterWindowMax(outerWindowMin);
|
||||
if (newOuterWindowMax >= outerWindowMax) {
|
||||
break;
|
||||
}
|
||||
outerWindowMax = newOuterWindowMax;
|
||||
}
|
||||
|
||||
DisiWrapper top = essentialQueue.top();
|
||||
|
@ -191,35 +211,39 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
}
|
||||
}
|
||||
|
||||
private int updateMaxWindowScores(int windowMin) throws IOException {
|
||||
private int computeOuterWindowMax(int windowMin) throws IOException {
|
||||
// Only use essential scorers to compute the window's max doc ID, in order to avoid constantly
|
||||
// recomputing max scores over small windows
|
||||
final int firstWindowLead = Math.min(firstEssentialScorer, allScorers.length - 1);
|
||||
for (int i = 0; i < firstWindowLead; ++i) {
|
||||
final DisiWrapper scorer = allScorers[i];
|
||||
if (scorer.doc < windowMin) {
|
||||
scorer.scorer.advanceShallow(windowMin);
|
||||
}
|
||||
}
|
||||
int windowMax = DocIdSetIterator.NO_MORE_DOCS;
|
||||
for (int i = firstWindowLead; i < allScorers.length; ++i) {
|
||||
final DisiWrapper scorer = allScorers[i];
|
||||
final int upTo = scorer.scorer.advanceShallow(Math.max(scorer.doc, windowMin));
|
||||
windowMax = (int) Math.min(windowMax, upTo + 1L); // upTo is inclusive
|
||||
}
|
||||
|
||||
// Score at least an entire inner window of docs
|
||||
windowMax =
|
||||
Math.max(
|
||||
windowMax, (int) Math.min(Integer.MAX_VALUE, (long) windowMin + INNER_WINDOW_SIZE));
|
||||
|
||||
return windowMax;
|
||||
}
|
||||
|
||||
private void updateMaxWindowScores(int windowMin, int windowMax) throws IOException {
|
||||
for (DisiWrapper scorer : allScorers) {
|
||||
if (scorer.doc < windowMax) {
|
||||
if (scorer.doc < windowMin) {
|
||||
// Make sure to advance shallow if necessary to get as good score upper bounds as
|
||||
// possible.
|
||||
scorer.scorer.advanceShallow(windowMin);
|
||||
}
|
||||
scorer.maxWindowScore = scorer.scorer.getMaxScore(windowMax - 1);
|
||||
} else {
|
||||
// This scorer has no documents in the considered window.
|
||||
scorer.maxWindowScore = 0;
|
||||
}
|
||||
}
|
||||
return windowMax;
|
||||
}
|
||||
|
||||
private void scoreNonEssentialClauses(LeafCollector collector, int doc, double essentialScore)
|
||||
|
|
Loading…
Reference in New Issue