mirror of https://github.com/apache/lucene.git
Revert "Only consider clauses whose cost is less than the lead cost to compute block boundaries in WANDScorer. (#14000)"
This reverts commit 5807ff1620
.
This commit is contained in:
parent
a67120e175
commit
b70d214217
|
@ -70,9 +70,6 @@ Optimizations
|
|||
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
|
||||
(Adrien Grand)
|
||||
|
||||
* GITHUB#13996, GITHUB#14000: Speed up top-k retrieval of filtered disjunctions.
|
||||
(Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||
|
|
|
@ -478,7 +478,7 @@ final class BooleanScorerSupplier extends ScorerSupplier {
|
|||
// However, as WANDScorer uses more complex algorithm and data structure, we would like to
|
||||
// still use DisjunctionSumScorer to handle exhaustive pure disjunctions, which may be faster
|
||||
if ((scoreMode == ScoreMode.TOP_SCORES && topLevelScoringClause) || minShouldMatch > 1) {
|
||||
return new WANDScorer(optionalScorers, minShouldMatch, scoreMode, leadCost);
|
||||
return new WANDScorer(optionalScorers, minShouldMatch, scoreMode);
|
||||
} else {
|
||||
return new DisjunctionSumScorer(optionalScorers, scoreMode);
|
||||
}
|
||||
|
|
|
@ -149,9 +149,8 @@ final class WANDScorer extends Scorer {
|
|||
int freq;
|
||||
|
||||
final ScoreMode scoreMode;
|
||||
final long leadCost;
|
||||
|
||||
WANDScorer(Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode, long leadCost)
|
||||
WANDScorer(Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode)
|
||||
throws IOException {
|
||||
|
||||
if (minShouldMatch >= scorers.size()) {
|
||||
|
@ -203,7 +202,6 @@ final class WANDScorer extends Scorer {
|
|||
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
|
||||
scorers.size(),
|
||||
minShouldMatch);
|
||||
this.leadCost = leadCost;
|
||||
}
|
||||
|
||||
// returns a boolean so that it can be called from assert
|
||||
|
@ -397,32 +395,26 @@ final class WANDScorer extends Scorer {
|
|||
}
|
||||
|
||||
private void updateMaxScores(int target) throws IOException {
|
||||
int newUpTo = DocIdSetIterator.NO_MORE_DOCS;
|
||||
// If we have entries in 'head', we treat them all as leads and take the minimum of their next
|
||||
// block boundaries as a next boundary.
|
||||
// We don't take entries in 'tail' into account on purpose: 'tail' is supposed to contain the
|
||||
// least score contributors, and taking them into account might not move the boundary fast
|
||||
// enough, so we'll waste CPU re-computing the next boundary all the time.
|
||||
// Likewise, we ignore clauses whose cost is greater than the lead cost to avoid recomputing
|
||||
// per-window max scores over and over again. In the event when this makes us compute upTo as
|
||||
// NO_MORE_DOCS, this scorer will effectively implement WAND rather than block-max WAND.
|
||||
for (DisiWrapper w : head) {
|
||||
if (w.doc <= newUpTo && w.cost <= leadCost) {
|
||||
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
|
||||
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
||||
if (head.size() == 0) {
|
||||
// If the head is empty we use the greatest score contributor as a lead
|
||||
// like for conjunctions.
|
||||
upTo = tail[0].scorer.advanceShallow(target);
|
||||
} else {
|
||||
// If we still have entries in 'head', we treat them all as leads and
|
||||
// take the minimum of their next block boundaries as a next boundary.
|
||||
// We don't take entries in 'tail' into account on purpose: 'tail' is
|
||||
// supposed to contain the least score contributors, and taking them
|
||||
// into account might not move the boundary fast enough, so we'll waste
|
||||
// CPU re-computing the next boundary all the time.
|
||||
int newUpTo = DocIdSetIterator.NO_MORE_DOCS;
|
||||
for (DisiWrapper w : head) {
|
||||
if (w.doc <= newUpTo) {
|
||||
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
|
||||
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
||||
}
|
||||
}
|
||||
upTo = newUpTo;
|
||||
}
|
||||
// Only look at the tail if none of the `head` clauses had a block we could reuse and if its
|
||||
// cost is less than or equal to the lead cost.
|
||||
if (newUpTo == DocIdSetIterator.NO_MORE_DOCS && tailSize > 0 && tail[0].cost <= leadCost) {
|
||||
newUpTo = tail[0].scorer.advanceShallow(target);
|
||||
// upTo must be on or after the least `head` doc
|
||||
DisiWrapper headTop = head.top();
|
||||
if (headTop != null) {
|
||||
newUpTo = Math.max(newUpTo, headTop.doc);
|
||||
}
|
||||
}
|
||||
upTo = newUpTo;
|
||||
|
||||
tailMaxScore = 0;
|
||||
for (int i = 0; i < tailSize; ++i) {
|
||||
|
@ -468,7 +460,8 @@ final class WANDScorer extends Scorer {
|
|||
}
|
||||
}
|
||||
|
||||
assert head.size() == 0 || head.top().doc <= upTo;
|
||||
assert (head.size() == 0 && upTo == DocIdSetIterator.NO_MORE_DOCS)
|
||||
|| (head.size() > 0 && head.top().doc <= upTo);
|
||||
assert upTo >= target;
|
||||
}
|
||||
|
||||
|
|
|
@ -1024,11 +1024,7 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
final Scorer scorer;
|
||||
if (optionalScorers.size() > 0) {
|
||||
scorer =
|
||||
new WANDScorer(
|
||||
optionalScorers,
|
||||
query.getMinimumNumberShouldMatch(),
|
||||
scoreMode,
|
||||
Long.MAX_VALUE);
|
||||
new WANDScorer(optionalScorers, query.getMinimumNumberShouldMatch(), scoreMode);
|
||||
} else {
|
||||
scorer = weight.scorer(context);
|
||||
if (scorer == null) return null;
|
||||
|
|
Loading…
Reference in New Issue