mirror of https://github.com/apache/lucene.git
Speed up top-k retrieval of filtered disjunctions a bit. (#13996)
This moves work from `advance(int target)` to `TwoPhaseIterator#matches()` so that we do less work on hits that do not match the filter.
This commit is contained in:
parent
5807ff1620
commit
4aeecdfebf
|
@ -96,7 +96,7 @@ Optimizations
|
|||
* GITHUB#13994: Speed up top-k retrieval of filtered conjunctions.
|
||||
(Adrien Grand)
|
||||
|
||||
* GITHUB#14000: Speed up top-k retrieval of filtered disjunctions.
|
||||
* GITHUB#13996, GITHUB#14000: Speed up top-k retrieval of filtered disjunctions.
|
||||
(Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -231,8 +231,12 @@ final class WANDScorer extends Scorer {
|
|||
}
|
||||
|
||||
for (DisiWrapper w : head) {
|
||||
if (lead == null) { // After calling advance() but before matches()
|
||||
assert w.doc >= doc;
|
||||
} else {
|
||||
assert w.doc > doc;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -286,20 +290,21 @@ final class WANDScorer extends Scorer {
|
|||
// Move 'lead' iterators back to the tail
|
||||
pushBackLeads(target);
|
||||
|
||||
// Advance 'head' as well
|
||||
advanceHead(target);
|
||||
// Make sure `head` is also on or beyond `target`
|
||||
DisiWrapper headTop = advanceHead(target);
|
||||
|
||||
// Pop the new 'lead' from 'head'
|
||||
moveToNextCandidate(target);
|
||||
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
if (scoreMode == ScoreMode.TOP_SCORES && (headTop == null || headTop.doc > upTo)) {
|
||||
// Update score bounds if necessary
|
||||
moveToNextBlock(target);
|
||||
assert upTo >= target;
|
||||
headTop = head.top();
|
||||
}
|
||||
|
||||
assert ensureConsistent();
|
||||
|
||||
// Advance to the next possible match
|
||||
return doNextCompetitiveCandidate();
|
||||
if (headTop == null) {
|
||||
return doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
return doc = headTop.doc;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -311,6 +316,9 @@ final class WANDScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
assert lead == null;
|
||||
moveToNextCandidate();
|
||||
|
||||
while (leadMaxScore < minCompetitiveScore || freq < minShouldMatch) {
|
||||
if (leadMaxScore + tailMaxScore < minCompetitiveScore
|
||||
|| freq + tailSize < minShouldMatch) {
|
||||
|
@ -355,7 +363,7 @@ final class WANDScorer extends Scorer {
|
|||
}
|
||||
|
||||
/** Make sure all disis in 'head' are on or after 'target'. */
|
||||
private void advanceHead(int target) throws IOException {
|
||||
private DisiWrapper advanceHead(int target) throws IOException {
|
||||
DisiWrapper headTop = head.top();
|
||||
while (headTop != null && headTop.doc < target) {
|
||||
final DisiWrapper evicted = insertTailWithOverFlow(headTop);
|
||||
|
@ -367,6 +375,7 @@ final class WANDScorer extends Scorer {
|
|||
headTop = head.top();
|
||||
}
|
||||
}
|
||||
return headTop;
|
||||
}
|
||||
|
||||
private void advanceTail(DisiWrapper disi) throws IOException {
|
||||
|
@ -437,7 +446,7 @@ final class WANDScorer extends Scorer {
|
|||
* Update {@code upTo} and maximum scores of sub scorers so that {@code upTo} is greater than or
|
||||
* equal to the next candidate after {@code target}, i.e. the top of `head`.
|
||||
*/
|
||||
private void updateMaxScoresIfNecessary(int target) throws IOException {
|
||||
private void moveToNextBlock(int target) throws IOException {
|
||||
assert lead == null;
|
||||
|
||||
while (upTo < DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
@ -467,48 +476,19 @@ final class WANDScorer extends Scorer {
|
|||
* Set 'doc' to the next potential match, and move all disis of 'head' that are on this doc into
|
||||
* 'lead'.
|
||||
*/
|
||||
private void moveToNextCandidate(int target) throws IOException {
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
// Update score bounds if necessary so
|
||||
updateMaxScoresIfNecessary(target);
|
||||
assert upTo >= target;
|
||||
|
||||
// updateMaxScores tries to move forward until a block with matches is found
|
||||
// so if the head is empty it means there are no matches at all anymore
|
||||
if (head.size() == 0) {
|
||||
assert upTo == DocIdSetIterator.NO_MORE_DOCS;
|
||||
doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private void moveToNextCandidate() throws IOException {
|
||||
// The top of `head` defines the next potential match
|
||||
// pop all documents which are on this doc
|
||||
lead = head.pop();
|
||||
assert doc == lead.doc;
|
||||
lead.next = null;
|
||||
leadMaxScore = lead.scaledMaxScore;
|
||||
freq = 1;
|
||||
doc = lead.doc;
|
||||
while (head.size() > 0 && head.top().doc == doc) {
|
||||
addLead(head.pop());
|
||||
}
|
||||
}
|
||||
|
||||
/** Move iterators to the tail until there is a potential match. */
|
||||
private int doNextCompetitiveCandidate() throws IOException {
|
||||
while (leadMaxScore + tailMaxScore < minCompetitiveScore || freq + tailSize < minShouldMatch) {
|
||||
// no match on doc is possible, move to the next potential match
|
||||
pushBackLeads(doc + 1);
|
||||
moveToNextCandidate(doc + 1);
|
||||
assert ensureConsistent();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
/** Advance all entries from the tail to know about all matches on the current doc. */
|
||||
private void advanceAllTail() throws IOException {
|
||||
// we return the next doc when the sum of the scores of the potential
|
||||
|
|
Loading…
Reference in New Issue