From 91019d685adf49231dc13e37282d87e604275bf8 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 5 Oct 2023 12:46:57 +0200 Subject: [PATCH] Compute better windows in MaxScoreBulkScorer. (#12593) MaxScoreBulkScorer computes windows based on the set of clauses that were essential in the *previous* window. This usually works well as the set of essential clauses tends to be stable over time, but there are cases when clauses get swapped between essential and non-essential clauses, and computing windows based on the previous window can lead to suboptimal choices. This PR creates a first proposal for the next score window using essential clauses from the previous window, and then creates a second proposal once scorers have been partitioned and their max scores have been updated. If this second proposal results in a smaller window, it gets used. On one particular query (`the incredibles`) and a reordered index with BP (which increases chances that scorers move from essential to non-essential or vice-versa), this change yielded a 2.3x speedup. --- .../lucene/search/MaxScoreBulkScorer.java | 50 ++++++++++++++----- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index 4cc14cdd0b3..3c3f3db7491 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -76,12 +76,32 @@ final class MaxScoreBulkScorer extends BulkScorer { int outerWindowMin = min; outer: while (outerWindowMin < max) { - int outerWindowMax = updateMaxWindowScores(outerWindowMin); + int outerWindowMax = computeOuterWindowMax(outerWindowMin); outerWindowMax = Math.min(outerWindowMax, max); - if (partitionScorers() == false) { - // No matches in this window - outerWindowMin = outerWindowMax; - continue; + + while (true) { + updateMaxWindowScores(outerWindowMin, outerWindowMax); + if (partitionScorers() == false) { + // No matches in this window + outerWindowMin = outerWindowMax; + continue outer; + } + + // There is a dependency between windows and maximum scores, as we compute windows based on + // maximum scores and maximum scores based on windows. + // So the approach consists of starting by computing a window based on the set of essential + // scorers from the _previous_ window and then iteratively recompute maximum scores and + // windows as long as the window size decreases. + // In general the set of essential scorers is rather stable over time so this would exit + // after a single iteration, but there is a change that some scorers got swapped between the + // set of essential and non-essential scorers, in which case there may be multiple + // iterations of this loop. + + int newOuterWindowMax = computeOuterWindowMax(outerWindowMin); + if (newOuterWindowMax >= outerWindowMax) { + break; + } + outerWindowMax = newOuterWindowMax; } DisiWrapper top = essentialQueue.top(); @@ -191,35 +211,39 @@ final class MaxScoreBulkScorer extends BulkScorer { } } - private int updateMaxWindowScores(int windowMin) throws IOException { + private int computeOuterWindowMax(int windowMin) throws IOException { // Only use essential scorers to compute the window's max doc ID, in order to avoid constantly // recomputing max scores over small windows final int firstWindowLead = Math.min(firstEssentialScorer, allScorers.length - 1); - for (int i = 0; i < firstWindowLead; ++i) { - final DisiWrapper scorer = allScorers[i]; - if (scorer.doc < windowMin) { - scorer.scorer.advanceShallow(windowMin); - } - } int windowMax = DocIdSetIterator.NO_MORE_DOCS; for (int i = firstWindowLead; i < allScorers.length; ++i) { final DisiWrapper scorer = allScorers[i]; final int upTo = scorer.scorer.advanceShallow(Math.max(scorer.doc, windowMin)); windowMax = (int) Math.min(windowMax, upTo + 1L); // upTo is inclusive } + // Score at least an entire inner window of docs windowMax = Math.max( windowMax, (int) Math.min(Integer.MAX_VALUE, (long) windowMin + INNER_WINDOW_SIZE)); + return windowMax; + } + + private void updateMaxWindowScores(int windowMin, int windowMax) throws IOException { for (DisiWrapper scorer : allScorers) { if (scorer.doc < windowMax) { + if (scorer.doc < windowMin) { + // Make sure to advance shallow if necessary to get as good score upper bounds as + // possible. + scorer.scorer.advanceShallow(windowMin); + } scorer.maxWindowScore = scorer.scorer.getMaxScore(windowMax - 1); } else { + // This scorer has no documents in the considered window. scorer.maxWindowScore = 0; } } - return windowMax; } private void scoreNonEssentialClauses(LeafCollector collector, int doc, double essentialScore)