mirror of https://github.com/apache/lucene.git
Revert "Only consider clauses whose cost is less than the lead cost to compute block boundaries in WANDScorer. (#14000)"
This reverts commit 5807ff1620
.
This commit is contained in:
parent
a67120e175
commit
b70d214217
|
@ -70,9 +70,6 @@ Optimizations
|
||||||
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
|
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
|
||||||
(Adrien Grand)
|
(Adrien Grand)
|
||||||
|
|
||||||
* GITHUB#13996, GITHUB#14000: Speed up top-k retrieval of filtered disjunctions.
|
|
||||||
(Adrien Grand)
|
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||||
|
|
|
@ -478,7 +478,7 @@ final class BooleanScorerSupplier extends ScorerSupplier {
|
||||||
// However, as WANDScorer uses more complex algorithm and data structure, we would like to
|
// However, as WANDScorer uses more complex algorithm and data structure, we would like to
|
||||||
// still use DisjunctionSumScorer to handle exhaustive pure disjunctions, which may be faster
|
// still use DisjunctionSumScorer to handle exhaustive pure disjunctions, which may be faster
|
||||||
if ((scoreMode == ScoreMode.TOP_SCORES && topLevelScoringClause) || minShouldMatch > 1) {
|
if ((scoreMode == ScoreMode.TOP_SCORES && topLevelScoringClause) || minShouldMatch > 1) {
|
||||||
return new WANDScorer(optionalScorers, minShouldMatch, scoreMode, leadCost);
|
return new WANDScorer(optionalScorers, minShouldMatch, scoreMode);
|
||||||
} else {
|
} else {
|
||||||
return new DisjunctionSumScorer(optionalScorers, scoreMode);
|
return new DisjunctionSumScorer(optionalScorers, scoreMode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -149,9 +149,8 @@ final class WANDScorer extends Scorer {
|
||||||
int freq;
|
int freq;
|
||||||
|
|
||||||
final ScoreMode scoreMode;
|
final ScoreMode scoreMode;
|
||||||
final long leadCost;
|
|
||||||
|
|
||||||
WANDScorer(Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode, long leadCost)
|
WANDScorer(Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
if (minShouldMatch >= scorers.size()) {
|
if (minShouldMatch >= scorers.size()) {
|
||||||
|
@ -203,7 +202,6 @@ final class WANDScorer extends Scorer {
|
||||||
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
|
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
|
||||||
scorers.size(),
|
scorers.size(),
|
||||||
minShouldMatch);
|
minShouldMatch);
|
||||||
this.leadCost = leadCost;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns a boolean so that it can be called from assert
|
// returns a boolean so that it can be called from assert
|
||||||
|
@ -397,32 +395,26 @@ final class WANDScorer extends Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateMaxScores(int target) throws IOException {
|
private void updateMaxScores(int target) throws IOException {
|
||||||
int newUpTo = DocIdSetIterator.NO_MORE_DOCS;
|
if (head.size() == 0) {
|
||||||
// If we have entries in 'head', we treat them all as leads and take the minimum of their next
|
// If the head is empty we use the greatest score contributor as a lead
|
||||||
// block boundaries as a next boundary.
|
// like for conjunctions.
|
||||||
// We don't take entries in 'tail' into account on purpose: 'tail' is supposed to contain the
|
upTo = tail[0].scorer.advanceShallow(target);
|
||||||
// least score contributors, and taking them into account might not move the boundary fast
|
} else {
|
||||||
// enough, so we'll waste CPU re-computing the next boundary all the time.
|
// If we still have entries in 'head', we treat them all as leads and
|
||||||
// Likewise, we ignore clauses whose cost is greater than the lead cost to avoid recomputing
|
// take the minimum of their next block boundaries as a next boundary.
|
||||||
// per-window max scores over and over again. In the event when this makes us compute upTo as
|
// We don't take entries in 'tail' into account on purpose: 'tail' is
|
||||||
// NO_MORE_DOCS, this scorer will effectively implement WAND rather than block-max WAND.
|
// supposed to contain the least score contributors, and taking them
|
||||||
for (DisiWrapper w : head) {
|
// into account might not move the boundary fast enough, so we'll waste
|
||||||
if (w.doc <= newUpTo && w.cost <= leadCost) {
|
// CPU re-computing the next boundary all the time.
|
||||||
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
|
int newUpTo = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
for (DisiWrapper w : head) {
|
||||||
|
if (w.doc <= newUpTo) {
|
||||||
|
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
|
||||||
|
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
upTo = newUpTo;
|
||||||
}
|
}
|
||||||
// Only look at the tail if none of the `head` clauses had a block we could reuse and if its
|
|
||||||
// cost is less than or equal to the lead cost.
|
|
||||||
if (newUpTo == DocIdSetIterator.NO_MORE_DOCS && tailSize > 0 && tail[0].cost <= leadCost) {
|
|
||||||
newUpTo = tail[0].scorer.advanceShallow(target);
|
|
||||||
// upTo must be on or after the least `head` doc
|
|
||||||
DisiWrapper headTop = head.top();
|
|
||||||
if (headTop != null) {
|
|
||||||
newUpTo = Math.max(newUpTo, headTop.doc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
upTo = newUpTo;
|
|
||||||
|
|
||||||
tailMaxScore = 0;
|
tailMaxScore = 0;
|
||||||
for (int i = 0; i < tailSize; ++i) {
|
for (int i = 0; i < tailSize; ++i) {
|
||||||
|
@ -468,7 +460,8 @@ final class WANDScorer extends Scorer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert head.size() == 0 || head.top().doc <= upTo;
|
assert (head.size() == 0 && upTo == DocIdSetIterator.NO_MORE_DOCS)
|
||||||
|
|| (head.size() > 0 && head.top().doc <= upTo);
|
||||||
assert upTo >= target;
|
assert upTo >= target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1024,11 +1024,7 @@ public class TestWANDScorer extends LuceneTestCase {
|
||||||
final Scorer scorer;
|
final Scorer scorer;
|
||||||
if (optionalScorers.size() > 0) {
|
if (optionalScorers.size() > 0) {
|
||||||
scorer =
|
scorer =
|
||||||
new WANDScorer(
|
new WANDScorer(optionalScorers, query.getMinimumNumberShouldMatch(), scoreMode);
|
||||||
optionalScorers,
|
|
||||||
query.getMinimumNumberShouldMatch(),
|
|
||||||
scoreMode,
|
|
||||||
Long.MAX_VALUE);
|
|
||||||
} else {
|
} else {
|
||||||
scorer = weight.scorer(context);
|
scorer = weight.scorer(context);
|
||||||
if (scorer == null) return null;
|
if (scorer == null) return null;
|
||||||
|
|
Loading…
Reference in New Issue