mirror of https://github.com/apache/lucene.git
LUCENE-9668: Deprecate MinShouldMatchSumScorer with WANDScorer (#2205)
This commit is contained in:
parent
93107d6379
commit
7f4d4dfdbf
|
@ -231,10 +231,15 @@ final class Boolean2ScorerSupplier extends ScorerSupplier {
|
||||||
optionalScorers.add(scorer.get(leadCost));
|
optionalScorers.add(scorer.get(leadCost));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
// Technically speaking, WANDScorer should be able to handle the following 3 conditions now
|
||||||
return new WANDScorer(weight, optionalScorers, minShouldMatch);
|
// 1. Any ScoreMode (with scoring or not)
|
||||||
} else if (minShouldMatch > 1) {
|
// 2. Any minCompetitiveScore ( >= 0 )
|
||||||
return new MinShouldMatchSumScorer(weight, optionalScorers, minShouldMatch);
|
// 3. Any minShouldMatch ( >= 0 )
|
||||||
|
//
|
||||||
|
// However, as WANDScorer uses more complex algorithm and data structure, we would like to
|
||||||
|
// still use DisjunctionSumScorer to handle exhaustive pure disjunctions, which may be faster
|
||||||
|
if (scoreMode == ScoreMode.TOP_SCORES || minShouldMatch > 1) {
|
||||||
|
return new WANDScorer(weight, optionalScorers, minShouldMatch, scoreMode);
|
||||||
} else {
|
} else {
|
||||||
return new DisjunctionSumScorer(weight, optionalScorers, scoreMode);
|
return new DisjunctionSumScorer(weight, optionalScorers, scoreMode);
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ final class BooleanScorer extends BulkScorer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// See MinShouldMatchSumScorer for an explanation
|
// See WANDScorer for an explanation
|
||||||
private static long cost(Collection<BulkScorer> scorers, int minShouldMatch) {
|
private static long cost(Collection<BulkScorer> scorers, int minShouldMatch) {
|
||||||
final PriorityQueue<BulkScorer> pq =
|
final PriorityQueue<BulkScorer> pq =
|
||||||
new PriorityQueue<BulkScorer>(scorers.size() - minShouldMatch + 1) {
|
new PriorityQueue<BulkScorer>(scorers.size() - minShouldMatch + 1) {
|
||||||
|
|
|
@ -1,382 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.search;
|
|
||||||
|
|
||||||
import static org.apache.lucene.search.DisiPriorityQueue.leftNode;
|
|
||||||
import static org.apache.lucene.search.DisiPriorityQueue.parentNode;
|
|
||||||
import static org.apache.lucene.search.DisiPriorityQueue.rightNode;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link Scorer} for {@link BooleanQuery} when {@link
|
|
||||||
* BooleanQuery.Builder#setMinimumNumberShouldMatch(int) minShouldMatch} is between 2 and the total
|
|
||||||
* number of clauses.
|
|
||||||
*
|
|
||||||
* <p>This implementation keeps sub scorers in 3 different places: - lead: a linked list of scorer
|
|
||||||
* that are positioned on the desired doc ID - tail: a heap that contains at most minShouldMatch - 1
|
|
||||||
* scorers that are behind the desired doc ID. These scorers are ordered by cost so that we can
|
|
||||||
* advance the least costly ones first. - head: a heap that contains scorers which are beyond the
|
|
||||||
* desired doc ID, ordered by doc ID in order to move quickly to the next candidate.
|
|
||||||
*
|
|
||||||
* <p>Finding the next match consists of first setting the desired doc ID to the least entry in
|
|
||||||
* 'head' and then advance 'tail' until there is a match.
|
|
||||||
*/
|
|
||||||
final class MinShouldMatchSumScorer extends Scorer {
|
|
||||||
|
|
||||||
final int minShouldMatch;
|
|
||||||
|
|
||||||
// list of scorers which 'lead' the iteration and are currently
|
|
||||||
// positioned on 'doc'
|
|
||||||
DisiWrapper lead;
|
|
||||||
int doc; // current doc ID of the leads
|
|
||||||
int freq; // number of scorers on the desired doc ID
|
|
||||||
|
|
||||||
// priority queue of scorers that are too advanced compared to the current
|
|
||||||
// doc. Ordered by doc ID.
|
|
||||||
final DisiPriorityQueue head;
|
|
||||||
|
|
||||||
// priority queue of scorers which are behind the current doc.
|
|
||||||
// Ordered by cost.
|
|
||||||
final DisiWrapper[] tail;
|
|
||||||
int tailSize;
|
|
||||||
|
|
||||||
final long cost;
|
|
||||||
|
|
||||||
MinShouldMatchSumScorer(Weight weight, Collection<Scorer> scorers, int minShouldMatch) {
|
|
||||||
super(weight);
|
|
||||||
|
|
||||||
if (minShouldMatch > scorers.size()) {
|
|
||||||
throw new IllegalArgumentException("minShouldMatch should be <= the number of scorers");
|
|
||||||
}
|
|
||||||
if (minShouldMatch < 1) {
|
|
||||||
throw new IllegalArgumentException("minShouldMatch should be >= 1");
|
|
||||||
}
|
|
||||||
|
|
||||||
this.minShouldMatch = minShouldMatch;
|
|
||||||
this.doc = -1;
|
|
||||||
|
|
||||||
head = new DisiPriorityQueue(scorers.size() - minShouldMatch + 1);
|
|
||||||
// there can be at most minShouldMatch - 1 scorers beyond the current position
|
|
||||||
// otherwise we might be skipping over matching documents
|
|
||||||
tail = new DisiWrapper[minShouldMatch - 1];
|
|
||||||
|
|
||||||
for (Scorer scorer : scorers) {
|
|
||||||
addLead(new DisiWrapper(scorer));
|
|
||||||
}
|
|
||||||
|
|
||||||
this.cost =
|
|
||||||
ScorerUtil.costWithMinShouldMatch(
|
|
||||||
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
|
|
||||||
scorers.size(),
|
|
||||||
minShouldMatch);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final Collection<ChildScorable> getChildren() throws IOException {
|
|
||||||
List<ChildScorable> matchingChildren = new ArrayList<>();
|
|
||||||
updateFreq();
|
|
||||||
for (DisiWrapper s = lead; s != null; s = s.next) {
|
|
||||||
matchingChildren.add(new ChildScorable(s.scorer, "SHOULD"));
|
|
||||||
}
|
|
||||||
return matchingChildren;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator iterator() {
|
|
||||||
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TwoPhaseIterator twoPhaseIterator() {
|
|
||||||
DocIdSetIterator approximation =
|
|
||||||
new DocIdSetIterator() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
assert doc == lead.doc;
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
// We are moving to the next doc ID, so scorers in 'lead' need to go in
|
|
||||||
// 'tail'. If there is not enough space in 'tail', then we take the least
|
|
||||||
// costly scorers and advance them.
|
|
||||||
for (DisiWrapper s = lead; s != null; s = s.next) {
|
|
||||||
final DisiWrapper evicted = insertTailWithOverFlow(s);
|
|
||||||
if (evicted != null) {
|
|
||||||
if (evicted.doc == doc) {
|
|
||||||
evicted.doc = evicted.iterator.nextDoc();
|
|
||||||
} else {
|
|
||||||
evicted.doc = evicted.iterator.advance(doc + 1);
|
|
||||||
}
|
|
||||||
head.add(evicted);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
setDocAndFreq();
|
|
||||||
// It would be correct to return doNextCandidate() at this point but if you
|
|
||||||
// call nextDoc as opposed to advance, it probably means that you really
|
|
||||||
// need the next match. Returning 'doc' here would lead to a similar
|
|
||||||
// iteration over sub postings overall except that the decision making would
|
|
||||||
// happen at a higher level where more abstractions are involved and
|
|
||||||
// benchmarks suggested it causes a significant performance hit.
|
|
||||||
return doNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
// Same logic as in nextDoc
|
|
||||||
for (DisiWrapper s = lead; s != null; s = s.next) {
|
|
||||||
final DisiWrapper evicted = insertTailWithOverFlow(s);
|
|
||||||
if (evicted != null) {
|
|
||||||
evicted.doc = evicted.iterator.advance(target);
|
|
||||||
head.add(evicted);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// But this time there might also be scorers in 'head' behind the desired
|
|
||||||
// target so we need to do the same thing that we did on 'lead' on 'head'
|
|
||||||
DisiWrapper headTop = head.top();
|
|
||||||
while (headTop.doc < target) {
|
|
||||||
final DisiWrapper evicted = insertTailWithOverFlow(headTop);
|
|
||||||
// We know that the tail is full since it contains at most
|
|
||||||
// minShouldMatch - 1 entries and we just moved at least minShouldMatch
|
|
||||||
// entries to it, so evicted is not null
|
|
||||||
evicted.doc = evicted.iterator.advance(target);
|
|
||||||
headTop = head.updateTop(evicted);
|
|
||||||
}
|
|
||||||
|
|
||||||
setDocAndFreq();
|
|
||||||
return doNextCandidate();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return cost;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
return new TwoPhaseIterator(approximation) {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean matches() throws IOException {
|
|
||||||
while (freq < minShouldMatch) {
|
|
||||||
assert freq > 0;
|
|
||||||
if (freq + tailSize >= minShouldMatch) {
|
|
||||||
// a match on doc is still possible, try to
|
|
||||||
// advance scorers from the tail
|
|
||||||
advanceTail();
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float matchCost() {
|
|
||||||
// maximum number of scorer that matches() might advance
|
|
||||||
return tail.length;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addLead(DisiWrapper lead) {
|
|
||||||
lead.next = this.lead;
|
|
||||||
this.lead = lead;
|
|
||||||
freq += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void pushBackLeads() throws IOException {
|
|
||||||
for (DisiWrapper s = lead; s != null; s = s.next) {
|
|
||||||
addTail(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void advanceTail(DisiWrapper top) throws IOException {
|
|
||||||
top.doc = top.iterator.advance(doc);
|
|
||||||
if (top.doc == doc) {
|
|
||||||
addLead(top);
|
|
||||||
} else {
|
|
||||||
head.add(top);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void advanceTail() throws IOException {
|
|
||||||
final DisiWrapper top = popTail();
|
|
||||||
advanceTail(top);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Reinitializes head, freq and doc from 'head' */
|
|
||||||
private void setDocAndFreq() {
|
|
||||||
assert head.size() > 0;
|
|
||||||
|
|
||||||
// The top of `head` defines the next potential match
|
|
||||||
// pop all documents which are on this doc
|
|
||||||
lead = head.pop();
|
|
||||||
lead.next = null;
|
|
||||||
freq = 1;
|
|
||||||
doc = lead.doc;
|
|
||||||
while (head.size() > 0 && head.top().doc == doc) {
|
|
||||||
addLead(head.pop());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Advance tail to the lead until there is a match. */
|
|
||||||
private int doNext() throws IOException {
|
|
||||||
while (freq < minShouldMatch) {
|
|
||||||
assert freq > 0;
|
|
||||||
if (freq + tailSize >= minShouldMatch) {
|
|
||||||
// a match on doc is still possible, try to
|
|
||||||
// advance scorers from the tail
|
|
||||||
advanceTail();
|
|
||||||
} else {
|
|
||||||
// no match on doc is possible anymore, move to the next potential match
|
|
||||||
pushBackLeads();
|
|
||||||
setDocAndFreq();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Move iterators to the tail until the cumulated size of lead+tail is greater than or equal to
|
|
||||||
* minShouldMath
|
|
||||||
*/
|
|
||||||
private int doNextCandidate() throws IOException {
|
|
||||||
while (freq + tailSize < minShouldMatch) {
|
|
||||||
// no match on doc is possible, move to the next potential match
|
|
||||||
pushBackLeads();
|
|
||||||
setDocAndFreq();
|
|
||||||
}
|
|
||||||
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Advance all entries from the tail to know about all matches on the current doc. */
|
|
||||||
private void updateFreq() throws IOException {
|
|
||||||
assert freq >= minShouldMatch;
|
|
||||||
// we return the next doc when there are minShouldMatch matching clauses
|
|
||||||
// but some of the clauses in 'tail' might match as well
|
|
||||||
// in general we want to advance least-costly clauses first in order to
|
|
||||||
// skip over non-matching documents as fast as possible. However here,
|
|
||||||
// we are advancing everything anyway so iterating over clauses in
|
|
||||||
// (roughly) cost-descending order might help avoid some permutations in
|
|
||||||
// the head heap
|
|
||||||
for (int i = tailSize - 1; i >= 0; --i) {
|
|
||||||
advanceTail(tail[i]);
|
|
||||||
}
|
|
||||||
tailSize = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float score() throws IOException {
|
|
||||||
// we need to know about all matches
|
|
||||||
updateFreq();
|
|
||||||
double score = 0;
|
|
||||||
for (DisiWrapper s = lead; s != null; s = s.next) {
|
|
||||||
score += s.scorer.score();
|
|
||||||
}
|
|
||||||
return (float) score;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
|
||||||
// TODO: implement but be careful about floating-point errors.
|
|
||||||
return Float.POSITIVE_INFINITY;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
assert doc == lead.doc;
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
|
|
||||||
private DisiWrapper insertTailWithOverFlow(DisiWrapper s) {
|
|
||||||
if (tailSize < tail.length) {
|
|
||||||
addTail(s);
|
|
||||||
return null;
|
|
||||||
} else if (tail.length >= 1) {
|
|
||||||
final DisiWrapper top = tail[0];
|
|
||||||
if (top.cost < s.cost) {
|
|
||||||
tail[0] = s;
|
|
||||||
downHeapCost(tail, tailSize);
|
|
||||||
return top;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Add an entry to 'tail'. Fails if over capacity. */
|
|
||||||
private void addTail(DisiWrapper s) {
|
|
||||||
tail[tailSize] = s;
|
|
||||||
upHeapCost(tail, tailSize);
|
|
||||||
tailSize += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Pop the least-costly scorer from 'tail'. */
|
|
||||||
private DisiWrapper popTail() {
|
|
||||||
assert tailSize > 0;
|
|
||||||
final DisiWrapper result = tail[0];
|
|
||||||
tail[0] = tail[--tailSize];
|
|
||||||
downHeapCost(tail, tailSize);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Heap helpers */
|
|
||||||
private static void upHeapCost(DisiWrapper[] heap, int i) {
|
|
||||||
final DisiWrapper node = heap[i];
|
|
||||||
final long nodeCost = node.cost;
|
|
||||||
int j = parentNode(i);
|
|
||||||
while (j >= 0 && nodeCost < heap[j].cost) {
|
|
||||||
heap[i] = heap[j];
|
|
||||||
i = j;
|
|
||||||
j = parentNode(j);
|
|
||||||
}
|
|
||||||
heap[i] = node;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void downHeapCost(DisiWrapper[] heap, int size) {
|
|
||||||
int i = 0;
|
|
||||||
final DisiWrapper node = heap[0];
|
|
||||||
int j = leftNode(i);
|
|
||||||
if (j < size) {
|
|
||||||
int k = rightNode(j);
|
|
||||||
if (k < size && heap[k].cost < heap[j].cost) {
|
|
||||||
j = k;
|
|
||||||
}
|
|
||||||
if (heap[j].cost < node.cost) {
|
|
||||||
do {
|
|
||||||
heap[i] = heap[j];
|
|
||||||
i = j;
|
|
||||||
j = leftNode(i);
|
|
||||||
k = rightNode(j);
|
|
||||||
if (k < size && heap[k].cost < heap[j].cost) {
|
|
||||||
j = k;
|
|
||||||
}
|
|
||||||
} while (j < size && heap[j].cost < node.cost);
|
|
||||||
heap[i] = node;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -31,13 +31,25 @@ import java.util.OptionalInt;
|
||||||
* This implements the WAND (Weak AND) algorithm for dynamic pruning described in "Efficient Query
|
* This implements the WAND (Weak AND) algorithm for dynamic pruning described in "Efficient Query
|
||||||
* Evaluation using a Two-Level Retrieval Process" by Broder, Carmel, Herscovici, Soffer and Zien.
|
* Evaluation using a Two-Level Retrieval Process" by Broder, Carmel, Herscovici, Soffer and Zien.
|
||||||
* Enhanced with techniques described in "Faster Top-k Document Retrieval Using Block-Max Indexes"
|
* Enhanced with techniques described in "Faster Top-k Document Retrieval Using Block-Max Indexes"
|
||||||
* by Ding and Suel. This scorer maintains a feedback loop with the collector in order to know at
|
* by Ding and Suel. For scoreMode == {@link ScoreMode#TOP_SCORES}, this scorer maintains a feedback
|
||||||
* any time the minimum score that is required in order for a hit to be competitive. Then it
|
* loop with the collector in order to know at any time the minimum score that is required in order
|
||||||
* leverages the {@link Scorer#getMaxScore(int) max score} from each scorer in order to know when it
|
* for a hit to be competitive.
|
||||||
* may call {@link DocIdSetIterator#advance} rather than {@link DocIdSetIterator#nextDoc} to move to
|
*
|
||||||
* the next competitive hit. Implementation is similar to {@link MinShouldMatchSumScorer} except
|
* <p>The implementation supports both minCompetitiveScore by enforce that {@code ∑ max_score >=
|
||||||
* that instead of enforcing that {@code freq >= minShouldMatch}, we enforce that {@code ∑ max_score
|
* minCompetitiveScore}, and minShouldMatch by enforcing {@code freq >= minShouldMatch}. It keeps
|
||||||
* >= minCompetitiveScore}.
|
* sub scorers in 3 different places: - tail: a heap that contains scorers that are behind the
|
||||||
|
* desired doc ID. These scorers are ordered by cost so that we can advance the least costly ones
|
||||||
|
* first. - lead: a linked list of scorer that are positioned on the desired doc ID - head: a heap
|
||||||
|
* that contains scorers which are beyond the desired doc ID, ordered by doc ID in order to move
|
||||||
|
* quickly to the next candidate.
|
||||||
|
*
|
||||||
|
* <p>When scoreMode == {@link ScoreMode#TOP_SCORES}, it leverages the {@link
|
||||||
|
* Scorer#getMaxScore(int) max score} from each scorer in order to know when it may call {@link
|
||||||
|
* DocIdSetIterator#advance} rather than {@link DocIdSetIterator#nextDoc} to move to the next
|
||||||
|
* competitive hit. When scoreMode != {@link ScoreMode#TOP_SCORES}, block-max scoring related logic
|
||||||
|
* is skipped. Finding the next match consists of first setting the desired doc ID to the least
|
||||||
|
* entry in 'head', and then advance 'tail' until there is a match, by meeting the configured {@code
|
||||||
|
* freq >= minShouldMatch} and / or {@code ∑ max_score >= minCompetitiveScore} requirements.
|
||||||
*/
|
*/
|
||||||
final class WANDScorer extends Scorer {
|
final class WANDScorer extends Scorer {
|
||||||
|
|
||||||
|
@ -134,7 +146,10 @@ final class WANDScorer extends Scorer {
|
||||||
final int minShouldMatch;
|
final int minShouldMatch;
|
||||||
int freq;
|
int freq;
|
||||||
|
|
||||||
WANDScorer(Weight weight, Collection<Scorer> scorers, int minShouldMatch) throws IOException {
|
final ScoreMode scoreMode;
|
||||||
|
|
||||||
|
WANDScorer(Weight weight, Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode)
|
||||||
|
throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
|
|
||||||
if (minShouldMatch >= scorers.size()) {
|
if (minShouldMatch >= scorers.size()) {
|
||||||
|
@ -149,23 +164,32 @@ final class WANDScorer extends Scorer {
|
||||||
this.doc = -1;
|
this.doc = -1;
|
||||||
this.upTo = -1; // will be computed on the first call to nextDoc/advance
|
this.upTo = -1; // will be computed on the first call to nextDoc/advance
|
||||||
|
|
||||||
|
this.scoreMode = scoreMode;
|
||||||
|
|
||||||
head = new DisiPriorityQueue(scorers.size());
|
head = new DisiPriorityQueue(scorers.size());
|
||||||
// there can be at most num_scorers - 1 scorers beyond the current position
|
// there can be at most num_scorers - 1 scorers beyond the current position
|
||||||
tail = new DisiWrapper[scorers.size()];
|
tail = new DisiWrapper[scorers.size()];
|
||||||
|
|
||||||
OptionalInt scalingFactor = OptionalInt.empty();
|
if (this.scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
for (Scorer scorer : scorers) {
|
OptionalInt scalingFactor = OptionalInt.empty();
|
||||||
scorer.advanceShallow(0);
|
for (Scorer scorer : scorers) {
|
||||||
float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
|
scorer.advanceShallow(0);
|
||||||
if (maxScore != 0 && Float.isFinite(maxScore)) {
|
float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
|
||||||
// 0 and +Infty should not impact the scale
|
if (maxScore != 0 && Float.isFinite(maxScore)) {
|
||||||
scalingFactor =
|
// 0 and +Infty should not impact the scale
|
||||||
OptionalInt.of(
|
scalingFactor =
|
||||||
Math.min(scalingFactor.orElse(Integer.MAX_VALUE), scalingFactor(maxScore)));
|
OptionalInt.of(
|
||||||
|
Math.min(scalingFactor.orElse(Integer.MAX_VALUE), scalingFactor(maxScore)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use a scaling factor of 0 if all max scores are either 0 or +Infty
|
||||||
|
this.scalingFactor = scalingFactor.orElse(0);
|
||||||
|
this.maxScorePropagator = new MaxScoreSumPropagator(scorers);
|
||||||
|
} else {
|
||||||
|
this.scalingFactor = 0;
|
||||||
|
this.maxScorePropagator = null;
|
||||||
}
|
}
|
||||||
// Use a scaling factor of 0 if all max scores are either 0 or +Infty
|
|
||||||
this.scalingFactor = scalingFactor.orElse(0);
|
|
||||||
|
|
||||||
for (Scorer scorer : scorers) {
|
for (Scorer scorer : scorers) {
|
||||||
addLead(new DisiWrapper(scorer));
|
addLead(new DisiWrapper(scorer));
|
||||||
|
@ -176,33 +200,34 @@ final class WANDScorer extends Scorer {
|
||||||
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
|
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
|
||||||
scorers.size(),
|
scorers.size(),
|
||||||
minShouldMatch);
|
minShouldMatch);
|
||||||
this.maxScorePropagator = new MaxScoreSumPropagator(scorers);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns a boolean so that it can be called from assert
|
// returns a boolean so that it can be called from assert
|
||||||
// the return value is useless: it always returns true
|
// the return value is useless: it always returns true
|
||||||
private boolean ensureConsistent() {
|
private boolean ensureConsistent() {
|
||||||
long maxScoreSum = 0;
|
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
for (int i = 0; i < tailSize; ++i) {
|
long maxScoreSum = 0;
|
||||||
assert tail[i].doc < doc;
|
for (int i = 0; i < tailSize; ++i) {
|
||||||
maxScoreSum = Math.addExact(maxScoreSum, tail[i].maxScore);
|
assert tail[i].doc < doc;
|
||||||
}
|
maxScoreSum = Math.addExact(maxScoreSum, tail[i].maxScore);
|
||||||
assert maxScoreSum == tailMaxScore : maxScoreSum + " " + tailMaxScore;
|
}
|
||||||
|
assert maxScoreSum == tailMaxScore : maxScoreSum + " " + tailMaxScore;
|
||||||
|
|
||||||
maxScoreSum = 0;
|
maxScoreSum = 0;
|
||||||
for (DisiWrapper w = lead; w != null; w = w.next) {
|
for (DisiWrapper w = lead; w != null; w = w.next) {
|
||||||
assert w.doc == doc;
|
assert w.doc == doc;
|
||||||
maxScoreSum = Math.addExact(maxScoreSum, w.maxScore);
|
maxScoreSum = Math.addExact(maxScoreSum, w.maxScore);
|
||||||
|
}
|
||||||
|
assert maxScoreSum == leadMaxScore : maxScoreSum + " " + leadMaxScore;
|
||||||
|
|
||||||
|
assert minCompetitiveScore == 0 || tailMaxScore < minCompetitiveScore;
|
||||||
|
assert doc <= upTo;
|
||||||
}
|
}
|
||||||
assert maxScoreSum == leadMaxScore : maxScoreSum + " " + leadMaxScore;
|
|
||||||
|
|
||||||
for (DisiWrapper w : head) {
|
for (DisiWrapper w : head) {
|
||||||
assert w.doc > doc;
|
assert w.doc > doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert minCompetitiveScore == 0 || tailMaxScore < minCompetitiveScore;
|
|
||||||
assert doc <= upTo;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,6 +235,8 @@ final class WANDScorer extends Scorer {
|
||||||
public void setMinCompetitiveScore(float minScore) throws IOException {
|
public void setMinCompetitiveScore(float minScore) throws IOException {
|
||||||
// Let this disjunction know about the new min score so that it can skip
|
// Let this disjunction know about the new min score so that it can skip
|
||||||
// over clauses that produce low scores.
|
// over clauses that produce low scores.
|
||||||
|
assert scoreMode == ScoreMode.TOP_SCORES
|
||||||
|
: "minCompetitiveScore can only be set for ScoreMode.TOP_SCORES, but got: " + scoreMode;
|
||||||
assert minScore >= 0;
|
assert minScore >= 0;
|
||||||
long scaledMinScore = scaleMinScore(minScore, scalingFactor);
|
long scaledMinScore = scaleMinScore(minScore, scalingFactor);
|
||||||
assert scaledMinScore >= minCompetitiveScore;
|
assert scaledMinScore >= minCompetitiveScore;
|
||||||
|
@ -421,7 +448,9 @@ final class WANDScorer extends Scorer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert upTo == DocIdSetIterator.NO_MORE_DOCS || (head.size() > 0 && head.top().doc <= upTo);
|
assert (head.size() == 0 && upTo == DocIdSetIterator.NO_MORE_DOCS)
|
||||||
|
|| (head.size() > 0 && head.top().doc <= upTo);
|
||||||
|
assert upTo >= target;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -429,16 +458,18 @@ final class WANDScorer extends Scorer {
|
||||||
* 'lead'.
|
* 'lead'.
|
||||||
*/
|
*/
|
||||||
private void moveToNextCandidate(int target) throws IOException {
|
private void moveToNextCandidate(int target) throws IOException {
|
||||||
// Update score bounds if necessary so
|
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
updateMaxScoresIfNecessary(target);
|
// Update score bounds if necessary so
|
||||||
assert upTo >= target;
|
updateMaxScoresIfNecessary(target);
|
||||||
|
assert upTo >= target;
|
||||||
|
|
||||||
// updateMaxScores tries to move forward until a block with matches is found
|
// updateMaxScores tries to move forward until a block with matches is found
|
||||||
// so if the head is empty it means there are no matches at all anymore
|
// so if the head is empty it means there are no matches at all anymore
|
||||||
if (head.size() == 0) {
|
if (head.size() == 0) {
|
||||||
assert upTo == DocIdSetIterator.NO_MORE_DOCS;
|
assert upTo == DocIdSetIterator.NO_MORE_DOCS;
|
||||||
doc = DocIdSetIterator.NO_MORE_DOCS;
|
doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The top of `head` defines the next potential match
|
// The top of `head` defines the next potential match
|
||||||
|
|
|
@ -240,7 +240,7 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"ConjunctionScorer\n"
|
"ConjunctionScorer\n"
|
||||||
+ " MUST ConstantScoreScorer\n"
|
+ " MUST ConstantScoreScorer\n"
|
||||||
+ " MUST MinShouldMatchSumScorer\n"
|
+ " MUST WANDScorer\n"
|
||||||
+ " SHOULD TermScorer body:crawler\n"
|
+ " SHOULD TermScorer body:crawler\n"
|
||||||
+ " SHOULD TermScorer body:web\n"
|
+ " SHOULD TermScorer body:web\n"
|
||||||
+ " SHOULD TermScorer body:nutch",
|
+ " SHOULD TermScorer body:nutch",
|
||||||
|
|
|
@ -312,6 +312,57 @@ public class TestWANDScorer extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithDisjunctionAndMinShouldMatchAndNonScoringMode() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
try (IndexWriter w =
|
||||||
|
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||||
|
for (String[] values :
|
||||||
|
Arrays.asList(
|
||||||
|
new String[] {"A", "B"}, // 0
|
||||||
|
new String[] {"A"}, // 1
|
||||||
|
new String[] {}, // 2
|
||||||
|
new String[] {"A", "B", "C"}, // 3
|
||||||
|
new String[] {"B"}, // 4
|
||||||
|
new String[] {"B", "C"} // 5
|
||||||
|
)) {
|
||||||
|
Document doc = new Document();
|
||||||
|
for (String value : values) {
|
||||||
|
doc.add(new StringField("foo", value, Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
w.forceMerge(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
Occur.SHOULD)
|
||||||
|
.add(new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
||||||
|
Occur.SHOULD)
|
||||||
|
.setMinimumNumberShouldMatch(2)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Scorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
assertEquals(0, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(3, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(5, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testBasicsWithFilteredDisjunctionAndMinShouldMatch() throws Exception {
|
public void testBasicsWithFilteredDisjunctionAndMinShouldMatch() throws Exception {
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w =
|
try (IndexWriter w =
|
||||||
|
@ -387,6 +438,66 @@ public class TestWANDScorer extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithFilteredDisjunctionAndMinShouldMatchAndNonScoringMode()
|
||||||
|
throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
try (IndexWriter w =
|
||||||
|
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||||
|
for (String[] values :
|
||||||
|
Arrays.asList(
|
||||||
|
new String[] {"A", "B"}, // 0
|
||||||
|
new String[] {"A", "C", "D"}, // 1
|
||||||
|
new String[] {}, // 2
|
||||||
|
new String[] {"A", "B", "C", "D"}, // 3
|
||||||
|
new String[] {"B"}, // 4
|
||||||
|
new String[] {"C", "D"} // 5
|
||||||
|
)) {
|
||||||
|
Document doc = new Document();
|
||||||
|
for (String value : values) {
|
||||||
|
doc.add(new StringField("foo", value, Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
w.forceMerge(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new BoostQuery(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "D"))), 4),
|
||||||
|
Occur.SHOULD)
|
||||||
|
.setMinimumNumberShouldMatch(2)
|
||||||
|
.build(),
|
||||||
|
Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "C")), Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Scorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_DOCS, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
assertEquals(1, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(3, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testBasicsWithFilteredDisjunctionAndMustNotAndMinShouldMatch() throws Exception {
|
public void testBasicsWithFilteredDisjunctionAndMustNotAndMinShouldMatch() throws Exception {
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
try (IndexWriter w =
|
try (IndexWriter w =
|
||||||
|
@ -454,6 +565,58 @@ public class TestWANDScorer extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithFilteredDisjunctionAndMustNotAndMinShouldMatchAndNonScoringMode()
|
||||||
|
throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
try (IndexWriter w =
|
||||||
|
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||||
|
for (String[] values :
|
||||||
|
Arrays.asList(
|
||||||
|
new String[] {"A", "B"}, // 0
|
||||||
|
new String[] {"A", "C", "D"}, // 1
|
||||||
|
new String[] {}, // 2
|
||||||
|
new String[] {"A", "B", "C", "D"}, // 3
|
||||||
|
new String[] {"B", "D"}, // 4
|
||||||
|
new String[] {"C", "D"} // 5
|
||||||
|
)) {
|
||||||
|
Document doc = new Document();
|
||||||
|
for (String value : values) {
|
||||||
|
doc.add(new StringField("foo", value, Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
w.forceMerge(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
Occur.SHOULD)
|
||||||
|
.add(new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("foo", "C")), Occur.MUST_NOT)
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "D"))), 4),
|
||||||
|
Occur.SHOULD)
|
||||||
|
.setMinimumNumberShouldMatch(2)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Scorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
assertEquals(0, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(4, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testRandom() throws IOException {
|
public void testRandom() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||||
|
|
Loading…
Reference in New Issue