Revert "Only consider clauses whose cost is less than the lead cost to compute block boundaries in WANDScorer. (#14000)"

This reverts commit 5807ff1620.
This commit is contained in:
Adrien Grand 2024-11-20 10:15:44 +01:00
parent a67120e175
commit b70d214217
4 changed files with 23 additions and 37 deletions

View File

@ -70,9 +70,6 @@ Optimizations
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
(Adrien Grand)
* GITHUB#13996, GITHUB#14000: Speed up top-k retrieval of filtered disjunctions.
(Adrien Grand)
Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended

View File

@ -478,7 +478,7 @@ final class BooleanScorerSupplier extends ScorerSupplier {
// However, as WANDScorer uses more complex algorithm and data structure, we would like to
// still use DisjunctionSumScorer to handle exhaustive pure disjunctions, which may be faster
if ((scoreMode == ScoreMode.TOP_SCORES && topLevelScoringClause) || minShouldMatch > 1) {
return new WANDScorer(optionalScorers, minShouldMatch, scoreMode, leadCost);
return new WANDScorer(optionalScorers, minShouldMatch, scoreMode);
} else {
return new DisjunctionSumScorer(optionalScorers, scoreMode);
}

View File

@ -149,9 +149,8 @@ final class WANDScorer extends Scorer {
int freq;
final ScoreMode scoreMode;
final long leadCost;
WANDScorer(Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode, long leadCost)
WANDScorer(Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode)
throws IOException {
if (minShouldMatch >= scorers.size()) {
@ -203,7 +202,6 @@ final class WANDScorer extends Scorer {
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
scorers.size(),
minShouldMatch);
this.leadCost = leadCost;
}
// returns a boolean so that it can be called from assert
@ -397,32 +395,26 @@ final class WANDScorer extends Scorer {
}
private void updateMaxScores(int target) throws IOException {
int newUpTo = DocIdSetIterator.NO_MORE_DOCS;
// If we have entries in 'head', we treat them all as leads and take the minimum of their next
// block boundaries as a next boundary.
// We don't take entries in 'tail' into account on purpose: 'tail' is supposed to contain the
// least score contributors, and taking them into account might not move the boundary fast
// enough, so we'll waste CPU re-computing the next boundary all the time.
// Likewise, we ignore clauses whose cost is greater than the lead cost to avoid recomputing
// per-window max scores over and over again. In the event when this makes us compute upTo as
// NO_MORE_DOCS, this scorer will effectively implement WAND rather than block-max WAND.
for (DisiWrapper w : head) {
if (w.doc <= newUpTo && w.cost <= leadCost) {
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
if (head.size() == 0) {
// If the head is empty we use the greatest score contributor as a lead
// like for conjunctions.
upTo = tail[0].scorer.advanceShallow(target);
} else {
// If we still have entries in 'head', we treat them all as leads and
// take the minimum of their next block boundaries as a next boundary.
// We don't take entries in 'tail' into account on purpose: 'tail' is
// supposed to contain the least score contributors, and taking them
// into account might not move the boundary fast enough, so we'll waste
// CPU re-computing the next boundary all the time.
int newUpTo = DocIdSetIterator.NO_MORE_DOCS;
for (DisiWrapper w : head) {
if (w.doc <= newUpTo) {
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
}
}
upTo = newUpTo;
}
// Only look at the tail if none of the `head` clauses had a block we could reuse and if its
// cost is less than or equal to the lead cost.
if (newUpTo == DocIdSetIterator.NO_MORE_DOCS && tailSize > 0 && tail[0].cost <= leadCost) {
newUpTo = tail[0].scorer.advanceShallow(target);
// upTo must be on or after the least `head` doc
DisiWrapper headTop = head.top();
if (headTop != null) {
newUpTo = Math.max(newUpTo, headTop.doc);
}
}
upTo = newUpTo;
tailMaxScore = 0;
for (int i = 0; i < tailSize; ++i) {
@ -468,7 +460,8 @@ final class WANDScorer extends Scorer {
}
}
assert head.size() == 0 || head.top().doc <= upTo;
assert (head.size() == 0 && upTo == DocIdSetIterator.NO_MORE_DOCS)
|| (head.size() > 0 && head.top().doc <= upTo);
assert upTo >= target;
}

View File

@ -1024,11 +1024,7 @@ public class TestWANDScorer extends LuceneTestCase {
final Scorer scorer;
if (optionalScorers.size() > 0) {
scorer =
new WANDScorer(
optionalScorers,
query.getMinimumNumberShouldMatch(),
scoreMode,
Long.MAX_VALUE);
new WANDScorer(optionalScorers, query.getMinimumNumberShouldMatch(), scoreMode);
} else {
scorer = weight.scorer(context);
if (scorer == null) return null;