Compute better windows in MaxScoreBulkScorer. (#12593)

MaxScoreBulkScorer computes windows based on the set of clauses that were
essential in the *previous* window. This usually works well as the set of
essential clauses tends to be stable over time, but there are cases when
clauses get swapped between essential and non-essential clauses, and computing
windows based on the previous window can lead to suboptimal choices.

This PR creates a first proposal for the next score window using essential
clauses from the previous window, and then creates a second proposal once
scorers have been partitioned and their max scores have been updated. If this
second proposal results in a smaller window, it gets used.

On one particular query (`the incredibles`) and a reordered index with BP
(which increases chances that scorers move from essential to non-essential or
vice-versa), this change yielded a 2.3x speedup.
This commit is contained in:
Adrien Grand 2023-10-05 12:46:57 +02:00 committed by GitHub
parent 28f0885bdf
commit 91019d685a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 37 additions and 13 deletions

View File

@ -76,12 +76,32 @@ final class MaxScoreBulkScorer extends BulkScorer {
int outerWindowMin = min;
outer:
while (outerWindowMin < max) {
int outerWindowMax = updateMaxWindowScores(outerWindowMin);
int outerWindowMax = computeOuterWindowMax(outerWindowMin);
outerWindowMax = Math.min(outerWindowMax, max);
if (partitionScorers() == false) {
// No matches in this window
outerWindowMin = outerWindowMax;
continue;
while (true) {
updateMaxWindowScores(outerWindowMin, outerWindowMax);
if (partitionScorers() == false) {
// No matches in this window
outerWindowMin = outerWindowMax;
continue outer;
}
// There is a dependency between windows and maximum scores, as we compute windows based on
// maximum scores and maximum scores based on windows.
// So the approach consists of starting by computing a window based on the set of essential
// scorers from the _previous_ window and then iteratively recompute maximum scores and
// windows as long as the window size decreases.
// In general the set of essential scorers is rather stable over time so this would exit
// after a single iteration, but there is a change that some scorers got swapped between the
// set of essential and non-essential scorers, in which case there may be multiple
// iterations of this loop.
int newOuterWindowMax = computeOuterWindowMax(outerWindowMin);
if (newOuterWindowMax >= outerWindowMax) {
break;
}
outerWindowMax = newOuterWindowMax;
}
DisiWrapper top = essentialQueue.top();
@ -191,35 +211,39 @@ final class MaxScoreBulkScorer extends BulkScorer {
}
}
private int updateMaxWindowScores(int windowMin) throws IOException {
private int computeOuterWindowMax(int windowMin) throws IOException {
// Only use essential scorers to compute the window's max doc ID, in order to avoid constantly
// recomputing max scores over small windows
final int firstWindowLead = Math.min(firstEssentialScorer, allScorers.length - 1);
for (int i = 0; i < firstWindowLead; ++i) {
final DisiWrapper scorer = allScorers[i];
if (scorer.doc < windowMin) {
scorer.scorer.advanceShallow(windowMin);
}
}
int windowMax = DocIdSetIterator.NO_MORE_DOCS;
for (int i = firstWindowLead; i < allScorers.length; ++i) {
final DisiWrapper scorer = allScorers[i];
final int upTo = scorer.scorer.advanceShallow(Math.max(scorer.doc, windowMin));
windowMax = (int) Math.min(windowMax, upTo + 1L); // upTo is inclusive
}
// Score at least an entire inner window of docs
windowMax =
Math.max(
windowMax, (int) Math.min(Integer.MAX_VALUE, (long) windowMin + INNER_WINDOW_SIZE));
return windowMax;
}
private void updateMaxWindowScores(int windowMin, int windowMax) throws IOException {
for (DisiWrapper scorer : allScorers) {
if (scorer.doc < windowMax) {
if (scorer.doc < windowMin) {
// Make sure to advance shallow if necessary to get as good score upper bounds as
// possible.
scorer.scorer.advanceShallow(windowMin);
}
scorer.maxWindowScore = scorer.scorer.getMaxScore(windowMax - 1);
} else {
// This scorer has no documents in the considered window.
scorer.maxWindowScore = 0;
}
}
return windowMax;
}
private void scoreNonEssentialClauses(LeafCollector collector, int doc, double essentialScore)