LUCENE-9668: Deprecate MinShouldMatchSumScorer with WANDScorer (#2205)

This commit is contained in:
zacharymorn 2021-01-24 22:28:19 -08:00 committed by GitHub
parent 93107d6379
commit 7f4d4dfdbf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 249 additions and 432 deletions

View File

@ -231,10 +231,15 @@ final class Boolean2ScorerSupplier extends ScorerSupplier {
optionalScorers.add(scorer.get(leadCost)); optionalScorers.add(scorer.get(leadCost));
} }
if (scoreMode == ScoreMode.TOP_SCORES) { // Technically speaking, WANDScorer should be able to handle the following 3 conditions now
return new WANDScorer(weight, optionalScorers, minShouldMatch); // 1. Any ScoreMode (with scoring or not)
} else if (minShouldMatch > 1) { // 2. Any minCompetitiveScore ( >= 0 )
return new MinShouldMatchSumScorer(weight, optionalScorers, minShouldMatch); // 3. Any minShouldMatch ( >= 0 )
//
// However, as WANDScorer uses more complex algorithm and data structure, we would like to
// still use DisjunctionSumScorer to handle exhaustive pure disjunctions, which may be faster
if (scoreMode == ScoreMode.TOP_SCORES || minShouldMatch > 1) {
return new WANDScorer(weight, optionalScorers, minShouldMatch, scoreMode);
} else { } else {
return new DisjunctionSumScorer(weight, optionalScorers, scoreMode); return new DisjunctionSumScorer(weight, optionalScorers, scoreMode);
} }

View File

@ -61,7 +61,7 @@ final class BooleanScorer extends BulkScorer {
} }
} }
// See MinShouldMatchSumScorer for an explanation // See WANDScorer for an explanation
private static long cost(Collection<BulkScorer> scorers, int minShouldMatch) { private static long cost(Collection<BulkScorer> scorers, int minShouldMatch) {
final PriorityQueue<BulkScorer> pq = final PriorityQueue<BulkScorer> pq =
new PriorityQueue<BulkScorer>(scorers.size() - minShouldMatch + 1) { new PriorityQueue<BulkScorer>(scorers.size() - minShouldMatch + 1) {

View File

@ -1,382 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import static org.apache.lucene.search.DisiPriorityQueue.leftNode;
import static org.apache.lucene.search.DisiPriorityQueue.parentNode;
import static org.apache.lucene.search.DisiPriorityQueue.rightNode;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* A {@link Scorer} for {@link BooleanQuery} when {@link
* BooleanQuery.Builder#setMinimumNumberShouldMatch(int) minShouldMatch} is between 2 and the total
* number of clauses.
*
* <p>This implementation keeps sub scorers in 3 different places: - lead: a linked list of scorer
* that are positioned on the desired doc ID - tail: a heap that contains at most minShouldMatch - 1
* scorers that are behind the desired doc ID. These scorers are ordered by cost so that we can
* advance the least costly ones first. - head: a heap that contains scorers which are beyond the
* desired doc ID, ordered by doc ID in order to move quickly to the next candidate.
*
* <p>Finding the next match consists of first setting the desired doc ID to the least entry in
* 'head' and then advance 'tail' until there is a match.
*/
final class MinShouldMatchSumScorer extends Scorer {
final int minShouldMatch;
// list of scorers which 'lead' the iteration and are currently
// positioned on 'doc'
DisiWrapper lead;
int doc; // current doc ID of the leads
int freq; // number of scorers on the desired doc ID
// priority queue of scorers that are too advanced compared to the current
// doc. Ordered by doc ID.
final DisiPriorityQueue head;
// priority queue of scorers which are behind the current doc.
// Ordered by cost.
final DisiWrapper[] tail;
int tailSize;
final long cost;
MinShouldMatchSumScorer(Weight weight, Collection<Scorer> scorers, int minShouldMatch) {
super(weight);
if (minShouldMatch > scorers.size()) {
throw new IllegalArgumentException("minShouldMatch should be <= the number of scorers");
}
if (minShouldMatch < 1) {
throw new IllegalArgumentException("minShouldMatch should be >= 1");
}
this.minShouldMatch = minShouldMatch;
this.doc = -1;
head = new DisiPriorityQueue(scorers.size() - minShouldMatch + 1);
// there can be at most minShouldMatch - 1 scorers beyond the current position
// otherwise we might be skipping over matching documents
tail = new DisiWrapper[minShouldMatch - 1];
for (Scorer scorer : scorers) {
addLead(new DisiWrapper(scorer));
}
this.cost =
ScorerUtil.costWithMinShouldMatch(
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
scorers.size(),
minShouldMatch);
}
@Override
public final Collection<ChildScorable> getChildren() throws IOException {
List<ChildScorable> matchingChildren = new ArrayList<>();
updateFreq();
for (DisiWrapper s = lead; s != null; s = s.next) {
matchingChildren.add(new ChildScorable(s.scorer, "SHOULD"));
}
return matchingChildren;
}
@Override
public DocIdSetIterator iterator() {
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
}
@Override
public TwoPhaseIterator twoPhaseIterator() {
DocIdSetIterator approximation =
new DocIdSetIterator() {
@Override
public int docID() {
assert doc == lead.doc;
return doc;
}
@Override
public int nextDoc() throws IOException {
// We are moving to the next doc ID, so scorers in 'lead' need to go in
// 'tail'. If there is not enough space in 'tail', then we take the least
// costly scorers and advance them.
for (DisiWrapper s = lead; s != null; s = s.next) {
final DisiWrapper evicted = insertTailWithOverFlow(s);
if (evicted != null) {
if (evicted.doc == doc) {
evicted.doc = evicted.iterator.nextDoc();
} else {
evicted.doc = evicted.iterator.advance(doc + 1);
}
head.add(evicted);
}
}
setDocAndFreq();
// It would be correct to return doNextCandidate() at this point but if you
// call nextDoc as opposed to advance, it probably means that you really
// need the next match. Returning 'doc' here would lead to a similar
// iteration over sub postings overall except that the decision making would
// happen at a higher level where more abstractions are involved and
// benchmarks suggested it causes a significant performance hit.
return doNext();
}
@Override
public int advance(int target) throws IOException {
// Same logic as in nextDoc
for (DisiWrapper s = lead; s != null; s = s.next) {
final DisiWrapper evicted = insertTailWithOverFlow(s);
if (evicted != null) {
evicted.doc = evicted.iterator.advance(target);
head.add(evicted);
}
}
// But this time there might also be scorers in 'head' behind the desired
// target so we need to do the same thing that we did on 'lead' on 'head'
DisiWrapper headTop = head.top();
while (headTop.doc < target) {
final DisiWrapper evicted = insertTailWithOverFlow(headTop);
// We know that the tail is full since it contains at most
// minShouldMatch - 1 entries and we just moved at least minShouldMatch
// entries to it, so evicted is not null
evicted.doc = evicted.iterator.advance(target);
headTop = head.updateTop(evicted);
}
setDocAndFreq();
return doNextCandidate();
}
@Override
public long cost() {
return cost;
}
};
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
while (freq < minShouldMatch) {
assert freq > 0;
if (freq + tailSize >= minShouldMatch) {
// a match on doc is still possible, try to
// advance scorers from the tail
advanceTail();
} else {
return false;
}
}
return true;
}
@Override
public float matchCost() {
// maximum number of scorer that matches() might advance
return tail.length;
}
};
}
private void addLead(DisiWrapper lead) {
lead.next = this.lead;
this.lead = lead;
freq += 1;
}
private void pushBackLeads() throws IOException {
for (DisiWrapper s = lead; s != null; s = s.next) {
addTail(s);
}
}
private void advanceTail(DisiWrapper top) throws IOException {
top.doc = top.iterator.advance(doc);
if (top.doc == doc) {
addLead(top);
} else {
head.add(top);
}
}
private void advanceTail() throws IOException {
final DisiWrapper top = popTail();
advanceTail(top);
}
/** Reinitializes head, freq and doc from 'head' */
private void setDocAndFreq() {
assert head.size() > 0;
// The top of `head` defines the next potential match
// pop all documents which are on this doc
lead = head.pop();
lead.next = null;
freq = 1;
doc = lead.doc;
while (head.size() > 0 && head.top().doc == doc) {
addLead(head.pop());
}
}
/** Advance tail to the lead until there is a match. */
private int doNext() throws IOException {
while (freq < minShouldMatch) {
assert freq > 0;
if (freq + tailSize >= minShouldMatch) {
// a match on doc is still possible, try to
// advance scorers from the tail
advanceTail();
} else {
// no match on doc is possible anymore, move to the next potential match
pushBackLeads();
setDocAndFreq();
}
}
return doc;
}
/**
* Move iterators to the tail until the cumulated size of lead+tail is greater than or equal to
* minShouldMath
*/
private int doNextCandidate() throws IOException {
while (freq + tailSize < minShouldMatch) {
// no match on doc is possible, move to the next potential match
pushBackLeads();
setDocAndFreq();
}
return doc;
}
/** Advance all entries from the tail to know about all matches on the current doc. */
private void updateFreq() throws IOException {
assert freq >= minShouldMatch;
// we return the next doc when there are minShouldMatch matching clauses
// but some of the clauses in 'tail' might match as well
// in general we want to advance least-costly clauses first in order to
// skip over non-matching documents as fast as possible. However here,
// we are advancing everything anyway so iterating over clauses in
// (roughly) cost-descending order might help avoid some permutations in
// the head heap
for (int i = tailSize - 1; i >= 0; --i) {
advanceTail(tail[i]);
}
tailSize = 0;
}
@Override
public float score() throws IOException {
// we need to know about all matches
updateFreq();
double score = 0;
for (DisiWrapper s = lead; s != null; s = s.next) {
score += s.scorer.score();
}
return (float) score;
}
@Override
public float getMaxScore(int upTo) throws IOException {
// TODO: implement but be careful about floating-point errors.
return Float.POSITIVE_INFINITY;
}
@Override
public int docID() {
assert doc == lead.doc;
return doc;
}
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
private DisiWrapper insertTailWithOverFlow(DisiWrapper s) {
if (tailSize < tail.length) {
addTail(s);
return null;
} else if (tail.length >= 1) {
final DisiWrapper top = tail[0];
if (top.cost < s.cost) {
tail[0] = s;
downHeapCost(tail, tailSize);
return top;
}
}
return s;
}
/** Add an entry to 'tail'. Fails if over capacity. */
private void addTail(DisiWrapper s) {
tail[tailSize] = s;
upHeapCost(tail, tailSize);
tailSize += 1;
}
/** Pop the least-costly scorer from 'tail'. */
private DisiWrapper popTail() {
assert tailSize > 0;
final DisiWrapper result = tail[0];
tail[0] = tail[--tailSize];
downHeapCost(tail, tailSize);
return result;
}
/** Heap helpers */
private static void upHeapCost(DisiWrapper[] heap, int i) {
final DisiWrapper node = heap[i];
final long nodeCost = node.cost;
int j = parentNode(i);
while (j >= 0 && nodeCost < heap[j].cost) {
heap[i] = heap[j];
i = j;
j = parentNode(j);
}
heap[i] = node;
}
private static void downHeapCost(DisiWrapper[] heap, int size) {
int i = 0;
final DisiWrapper node = heap[0];
int j = leftNode(i);
if (j < size) {
int k = rightNode(j);
if (k < size && heap[k].cost < heap[j].cost) {
j = k;
}
if (heap[j].cost < node.cost) {
do {
heap[i] = heap[j];
i = j;
j = leftNode(i);
k = rightNode(j);
if (k < size && heap[k].cost < heap[j].cost) {
j = k;
}
} while (j < size && heap[j].cost < node.cost);
heap[i] = node;
}
}
}
}

View File

@ -31,13 +31,25 @@ import java.util.OptionalInt;
* This implements the WAND (Weak AND) algorithm for dynamic pruning described in "Efficient Query * This implements the WAND (Weak AND) algorithm for dynamic pruning described in "Efficient Query
* Evaluation using a Two-Level Retrieval Process" by Broder, Carmel, Herscovici, Soffer and Zien. * Evaluation using a Two-Level Retrieval Process" by Broder, Carmel, Herscovici, Soffer and Zien.
* Enhanced with techniques described in "Faster Top-k Document Retrieval Using Block-Max Indexes" * Enhanced with techniques described in "Faster Top-k Document Retrieval Using Block-Max Indexes"
* by Ding and Suel. This scorer maintains a feedback loop with the collector in order to know at * by Ding and Suel. For scoreMode == {@link ScoreMode#TOP_SCORES}, this scorer maintains a feedback
* any time the minimum score that is required in order for a hit to be competitive. Then it * loop with the collector in order to know at any time the minimum score that is required in order
* leverages the {@link Scorer#getMaxScore(int) max score} from each scorer in order to know when it * for a hit to be competitive.
* may call {@link DocIdSetIterator#advance} rather than {@link DocIdSetIterator#nextDoc} to move to *
* the next competitive hit. Implementation is similar to {@link MinShouldMatchSumScorer} except * <p>The implementation supports both minCompetitiveScore by enforce that {@code max_score >=
* that instead of enforcing that {@code freq >= minShouldMatch}, we enforce that {@code max_score * minCompetitiveScore}, and minShouldMatch by enforcing {@code freq >= minShouldMatch}. It keeps
* >= minCompetitiveScore}. * sub scorers in 3 different places: - tail: a heap that contains scorers that are behind the
* desired doc ID. These scorers are ordered by cost so that we can advance the least costly ones
* first. - lead: a linked list of scorer that are positioned on the desired doc ID - head: a heap
* that contains scorers which are beyond the desired doc ID, ordered by doc ID in order to move
* quickly to the next candidate.
*
* <p>When scoreMode == {@link ScoreMode#TOP_SCORES}, it leverages the {@link
* Scorer#getMaxScore(int) max score} from each scorer in order to know when it may call {@link
* DocIdSetIterator#advance} rather than {@link DocIdSetIterator#nextDoc} to move to the next
* competitive hit. When scoreMode != {@link ScoreMode#TOP_SCORES}, block-max scoring related logic
* is skipped. Finding the next match consists of first setting the desired doc ID to the least
* entry in 'head', and then advance 'tail' until there is a match, by meeting the configured {@code
* freq >= minShouldMatch} and / or {@code max_score >= minCompetitiveScore} requirements.
*/ */
final class WANDScorer extends Scorer { final class WANDScorer extends Scorer {
@ -134,7 +146,10 @@ final class WANDScorer extends Scorer {
final int minShouldMatch; final int minShouldMatch;
int freq; int freq;
WANDScorer(Weight weight, Collection<Scorer> scorers, int minShouldMatch) throws IOException { final ScoreMode scoreMode;
WANDScorer(Weight weight, Collection<Scorer> scorers, int minShouldMatch, ScoreMode scoreMode)
throws IOException {
super(weight); super(weight);
if (minShouldMatch >= scorers.size()) { if (minShouldMatch >= scorers.size()) {
@ -149,23 +164,32 @@ final class WANDScorer extends Scorer {
this.doc = -1; this.doc = -1;
this.upTo = -1; // will be computed on the first call to nextDoc/advance this.upTo = -1; // will be computed on the first call to nextDoc/advance
this.scoreMode = scoreMode;
head = new DisiPriorityQueue(scorers.size()); head = new DisiPriorityQueue(scorers.size());
// there can be at most num_scorers - 1 scorers beyond the current position // there can be at most num_scorers - 1 scorers beyond the current position
tail = new DisiWrapper[scorers.size()]; tail = new DisiWrapper[scorers.size()];
OptionalInt scalingFactor = OptionalInt.empty(); if (this.scoreMode == ScoreMode.TOP_SCORES) {
for (Scorer scorer : scorers) { OptionalInt scalingFactor = OptionalInt.empty();
scorer.advanceShallow(0); for (Scorer scorer : scorers) {
float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS); scorer.advanceShallow(0);
if (maxScore != 0 && Float.isFinite(maxScore)) { float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
// 0 and +Infty should not impact the scale if (maxScore != 0 && Float.isFinite(maxScore)) {
scalingFactor = // 0 and +Infty should not impact the scale
OptionalInt.of( scalingFactor =
Math.min(scalingFactor.orElse(Integer.MAX_VALUE), scalingFactor(maxScore))); OptionalInt.of(
Math.min(scalingFactor.orElse(Integer.MAX_VALUE), scalingFactor(maxScore)));
}
} }
// Use a scaling factor of 0 if all max scores are either 0 or +Infty
this.scalingFactor = scalingFactor.orElse(0);
this.maxScorePropagator = new MaxScoreSumPropagator(scorers);
} else {
this.scalingFactor = 0;
this.maxScorePropagator = null;
} }
// Use a scaling factor of 0 if all max scores are either 0 or +Infty
this.scalingFactor = scalingFactor.orElse(0);
for (Scorer scorer : scorers) { for (Scorer scorer : scorers) {
addLead(new DisiWrapper(scorer)); addLead(new DisiWrapper(scorer));
@ -176,33 +200,34 @@ final class WANDScorer extends Scorer {
scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost), scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost),
scorers.size(), scorers.size(),
minShouldMatch); minShouldMatch);
this.maxScorePropagator = new MaxScoreSumPropagator(scorers);
} }
// returns a boolean so that it can be called from assert // returns a boolean so that it can be called from assert
// the return value is useless: it always returns true // the return value is useless: it always returns true
private boolean ensureConsistent() { private boolean ensureConsistent() {
long maxScoreSum = 0; if (scoreMode == ScoreMode.TOP_SCORES) {
for (int i = 0; i < tailSize; ++i) { long maxScoreSum = 0;
assert tail[i].doc < doc; for (int i = 0; i < tailSize; ++i) {
maxScoreSum = Math.addExact(maxScoreSum, tail[i].maxScore); assert tail[i].doc < doc;
} maxScoreSum = Math.addExact(maxScoreSum, tail[i].maxScore);
assert maxScoreSum == tailMaxScore : maxScoreSum + " " + tailMaxScore; }
assert maxScoreSum == tailMaxScore : maxScoreSum + " " + tailMaxScore;
maxScoreSum = 0; maxScoreSum = 0;
for (DisiWrapper w = lead; w != null; w = w.next) { for (DisiWrapper w = lead; w != null; w = w.next) {
assert w.doc == doc; assert w.doc == doc;
maxScoreSum = Math.addExact(maxScoreSum, w.maxScore); maxScoreSum = Math.addExact(maxScoreSum, w.maxScore);
}
assert maxScoreSum == leadMaxScore : maxScoreSum + " " + leadMaxScore;
assert minCompetitiveScore == 0 || tailMaxScore < minCompetitiveScore;
assert doc <= upTo;
} }
assert maxScoreSum == leadMaxScore : maxScoreSum + " " + leadMaxScore;
for (DisiWrapper w : head) { for (DisiWrapper w : head) {
assert w.doc > doc; assert w.doc > doc;
} }
assert minCompetitiveScore == 0 || tailMaxScore < minCompetitiveScore;
assert doc <= upTo;
return true; return true;
} }
@ -210,6 +235,8 @@ final class WANDScorer extends Scorer {
public void setMinCompetitiveScore(float minScore) throws IOException { public void setMinCompetitiveScore(float minScore) throws IOException {
// Let this disjunction know about the new min score so that it can skip // Let this disjunction know about the new min score so that it can skip
// over clauses that produce low scores. // over clauses that produce low scores.
assert scoreMode == ScoreMode.TOP_SCORES
: "minCompetitiveScore can only be set for ScoreMode.TOP_SCORES, but got: " + scoreMode;
assert minScore >= 0; assert minScore >= 0;
long scaledMinScore = scaleMinScore(minScore, scalingFactor); long scaledMinScore = scaleMinScore(minScore, scalingFactor);
assert scaledMinScore >= minCompetitiveScore; assert scaledMinScore >= minCompetitiveScore;
@ -421,7 +448,9 @@ final class WANDScorer extends Scorer {
} }
} }
assert upTo == DocIdSetIterator.NO_MORE_DOCS || (head.size() > 0 && head.top().doc <= upTo); assert (head.size() == 0 && upTo == DocIdSetIterator.NO_MORE_DOCS)
|| (head.size() > 0 && head.top().doc <= upTo);
assert upTo >= target;
} }
/** /**
@ -429,16 +458,18 @@ final class WANDScorer extends Scorer {
* 'lead'. * 'lead'.
*/ */
private void moveToNextCandidate(int target) throws IOException { private void moveToNextCandidate(int target) throws IOException {
// Update score bounds if necessary so if (scoreMode == ScoreMode.TOP_SCORES) {
updateMaxScoresIfNecessary(target); // Update score bounds if necessary so
assert upTo >= target; updateMaxScoresIfNecessary(target);
assert upTo >= target;
// updateMaxScores tries to move forward until a block with matches is found // updateMaxScores tries to move forward until a block with matches is found
// so if the head is empty it means there are no matches at all anymore // so if the head is empty it means there are no matches at all anymore
if (head.size() == 0) { if (head.size() == 0) {
assert upTo == DocIdSetIterator.NO_MORE_DOCS; assert upTo == DocIdSetIterator.NO_MORE_DOCS;
doc = DocIdSetIterator.NO_MORE_DOCS; doc = DocIdSetIterator.NO_MORE_DOCS;
return; return;
}
} }
// The top of `head` defines the next potential match // The top of `head` defines the next potential match

View File

@ -240,7 +240,7 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
assertEquals( assertEquals(
"ConjunctionScorer\n" "ConjunctionScorer\n"
+ " MUST ConstantScoreScorer\n" + " MUST ConstantScoreScorer\n"
+ " MUST MinShouldMatchSumScorer\n" + " MUST WANDScorer\n"
+ " SHOULD TermScorer body:crawler\n" + " SHOULD TermScorer body:crawler\n"
+ " SHOULD TermScorer body:web\n" + " SHOULD TermScorer body:web\n"
+ " SHOULD TermScorer body:nutch", + " SHOULD TermScorer body:nutch",

View File

@ -312,6 +312,57 @@ public class TestWANDScorer extends LuceneTestCase {
} }
} }
public void testBasicsWithDisjunctionAndMinShouldMatchAndNonScoringMode() throws Exception {
try (Directory dir = newDirectory()) {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
for (String[] values :
Arrays.asList(
new String[] {"A", "B"}, // 0
new String[] {"A"}, // 1
new String[] {}, // 2
new String[] {"A", "B", "C"}, // 3
new String[] {"B"}, // 4
new String[] {"B", "C"} // 5
)) {
Document doc = new Document();
for (String value : values) {
doc.add(new StringField("foo", value, Store.NO));
}
w.addDocument(doc);
}
w.forceMerge(1);
}
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
Occur.SHOULD)
.add(new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), Occur.SHOULD)
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
Scorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(5, scorer.iterator().nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
public void testBasicsWithFilteredDisjunctionAndMinShouldMatch() throws Exception { public void testBasicsWithFilteredDisjunctionAndMinShouldMatch() throws Exception {
try (Directory dir = newDirectory()) { try (Directory dir = newDirectory()) {
try (IndexWriter w = try (IndexWriter w =
@ -387,6 +438,66 @@ public class TestWANDScorer extends LuceneTestCase {
} }
} }
public void testBasicsWithFilteredDisjunctionAndMinShouldMatchAndNonScoringMode()
throws Exception {
try (Directory dir = newDirectory()) {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
for (String[] values :
Arrays.asList(
new String[] {"A", "B"}, // 0
new String[] {"A", "C", "D"}, // 1
new String[] {}, // 2
new String[] {"A", "B", "C", "D"}, // 3
new String[] {"B"}, // 4
new String[] {"C", "D"} // 5
)) {
Document doc = new Document();
for (String value : values) {
doc.add(new StringField("foo", value, Store.NO));
}
w.addDocument(doc);
}
w.forceMerge(1);
}
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BooleanQuery.Builder()
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
Occur.SHOULD)
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("foo", "D"))), 4),
Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build(),
Occur.MUST)
.add(new TermQuery(new Term("foo", "C")), Occur.FILTER)
.build();
Scorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_DOCS, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
public void testBasicsWithFilteredDisjunctionAndMustNotAndMinShouldMatch() throws Exception { public void testBasicsWithFilteredDisjunctionAndMustNotAndMinShouldMatch() throws Exception {
try (Directory dir = newDirectory()) { try (Directory dir = newDirectory()) {
try (IndexWriter w = try (IndexWriter w =
@ -454,6 +565,58 @@ public class TestWANDScorer extends LuceneTestCase {
} }
} }
public void testBasicsWithFilteredDisjunctionAndMustNotAndMinShouldMatchAndNonScoringMode()
throws Exception {
try (Directory dir = newDirectory()) {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
for (String[] values :
Arrays.asList(
new String[] {"A", "B"}, // 0
new String[] {"A", "C", "D"}, // 1
new String[] {}, // 2
new String[] {"A", "B", "C", "D"}, // 3
new String[] {"B", "D"}, // 4
new String[] {"C", "D"} // 5
)) {
Document doc = new Document();
for (String value : values) {
doc.add(new StringField("foo", value, Store.NO));
}
w.addDocument(doc);
}
w.forceMerge(1);
}
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
Occur.SHOULD)
.add(new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "C")), Occur.MUST_NOT)
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "D"))), 4),
Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
Scorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
public void testRandom() throws IOException { public void testRandom() throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());