mirror of https://github.com/apache/lucene.git
LUCENE-10480: Use BMM scorer for 2 clauses disjunction (#972)
This commit is contained in:
parent
187f843e2a
commit
503ec55973
|
@ -112,6 +112,8 @@ Optimizations
|
|||
|
||||
* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou)
|
||||
|
||||
* LUCENE-10480: Implement Block-Max-Maxscore scorer for 2 clauses disjunction. (Zach Chen, Adrien Grand)
|
||||
|
||||
* LUCENE-10606: For KnnVectorQuery, optimize case where filter is backed by BitSetIterator (Kaival Parikh)
|
||||
|
||||
* LUCENE-10593: Vector similarity function and NeighborQueue reverse removal. (Alessandro Benedetti)
|
||||
|
|
|
@ -0,0 +1,328 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/** Scorer implementing Block-Max Maxscore algorithm */
|
||||
class BlockMaxMaxscoreScorer extends Scorer {
|
||||
// current doc ID of the leads
|
||||
private int doc;
|
||||
|
||||
// doc id boundary that all scorers maxScore are valid
|
||||
private int upTo;
|
||||
|
||||
// heap of scorers ordered by doc ID
|
||||
private final DisiPriorityQueue essentialsScorers;
|
||||
|
||||
// list of scorers ordered by maxScore
|
||||
private final LinkedList<DisiWrapper> maxScoreSortedEssentialScorers;
|
||||
|
||||
private final DisiWrapper[] allScorers;
|
||||
|
||||
// sum of max scores of scorers in nonEssentialScorers list
|
||||
private double nonEssentialMaxScoreSum;
|
||||
|
||||
private final long cost;
|
||||
|
||||
private final MaxScoreSumPropagator maxScoreSumPropagator;
|
||||
|
||||
private float minCompetitiveScore;
|
||||
|
||||
private int cachedScoredDoc;
|
||||
|
||||
private float cachedScore;
|
||||
|
||||
/**
|
||||
* Constructs a Scorer that scores doc based on Block-Max-Maxscore (BMM) algorithm
|
||||
* http://engineering.nyu.edu/~suel/papers/bmm.pdf . This algorithm has lower overhead compared to
|
||||
* WANDScorer, and could be used for simple disjunction queries.
|
||||
*
|
||||
* @param weight The weight to be used.
|
||||
* @param scorers The sub scorers this Scorer should iterate on for optional clauses.
|
||||
*/
|
||||
public BlockMaxMaxscoreScorer(Weight weight, List<Scorer> scorers) throws IOException {
|
||||
super(weight);
|
||||
|
||||
this.upTo = -1;
|
||||
this.doc = -1;
|
||||
this.minCompetitiveScore = 0;
|
||||
this.cachedScoredDoc = -1;
|
||||
this.cachedScore = 0;
|
||||
this.allScorers = new DisiWrapper[scorers.size()];
|
||||
this.essentialsScorers = new DisiPriorityQueue(scorers.size());
|
||||
this.maxScoreSortedEssentialScorers = new LinkedList<>();
|
||||
|
||||
long cost = 0;
|
||||
for (int i = 0; i < scorers.size(); i++) {
|
||||
DisiWrapper w = new DisiWrapper(scorers.get(i));
|
||||
cost += w.cost;
|
||||
allScorers[i] = w;
|
||||
}
|
||||
|
||||
this.cost = cost;
|
||||
maxScoreSumPropagator = new MaxScoreSumPropagator(scorers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
// twoPhaseIterator needed to honor scorer.setMinCompetitiveScore guarantee
|
||||
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator twoPhaseIterator() {
|
||||
DocIdSetIterator approximation =
|
||||
new DocIdSetIterator() {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
while (true) {
|
||||
|
||||
if (target > upTo) {
|
||||
updateMaxScoresAndLists(target);
|
||||
} else {
|
||||
// minCompetitiveScore might have increased,
|
||||
// move potentially no-longer-competitive scorers from essential to non-essential
|
||||
// list
|
||||
movePotentiallyNonCompetitiveScorers();
|
||||
}
|
||||
|
||||
assert target <= upTo;
|
||||
|
||||
DisiWrapper top = essentialsScorers.top();
|
||||
|
||||
if (top == null) {
|
||||
// all scorers in non-essential list, skip to next boundary or return no_more_docs
|
||||
if (upTo == NO_MORE_DOCS) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
target = upTo + 1;
|
||||
}
|
||||
} else {
|
||||
// position all scorers in essential list to on or after target
|
||||
while (top.doc < target) {
|
||||
top.doc = top.iterator.advance(target);
|
||||
top = essentialsScorers.updateTop();
|
||||
}
|
||||
|
||||
if (top.doc == NO_MORE_DOCS) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
} else if (top.doc > upTo) {
|
||||
target = upTo + 1;
|
||||
} else {
|
||||
double docScoreUpperBound = nonEssentialMaxScoreSum;
|
||||
|
||||
for (DisiWrapper w = essentialsScorers.topList(); w != null; w = w.next) {
|
||||
docScoreUpperBound += w.scorer.score();
|
||||
}
|
||||
|
||||
if (maxScoreSumPropagator.scoreSumUpperBound(docScoreUpperBound)
|
||||
< minCompetitiveScore) {
|
||||
// skip straight to next candidate doc from essential scorer
|
||||
int docId = top.doc;
|
||||
do {
|
||||
top.doc = top.iterator.nextDoc();
|
||||
top = essentialsScorers.updateTop();
|
||||
} while (top.doc == docId);
|
||||
|
||||
target = top.doc;
|
||||
} else {
|
||||
return doc = top.doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void movePotentiallyNonCompetitiveScorers() {
|
||||
while (maxScoreSortedEssentialScorers.size() > 0
|
||||
&& maxScoreSumPropagator.scoreSumUpperBound(
|
||||
nonEssentialMaxScoreSum + maxScoreSortedEssentialScorers.get(0).maxScore)
|
||||
< minCompetitiveScore) {
|
||||
DisiWrapper nextLeastContributingScorer =
|
||||
maxScoreSortedEssentialScorers.removeFirst();
|
||||
nonEssentialMaxScoreSum += nextLeastContributingScorer.maxScore;
|
||||
}
|
||||
|
||||
// list adjusted
|
||||
if (essentialsScorers.size() != maxScoreSortedEssentialScorers.size()) {
|
||||
essentialsScorers.clear();
|
||||
for (DisiWrapper w : maxScoreSortedEssentialScorers) {
|
||||
essentialsScorers.add(w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void updateMaxScoresAndLists(int target) throws IOException {
|
||||
assert target > upTo;
|
||||
// Next candidate doc id is above interval boundary, or minCompetitive has increased.
|
||||
// Find next interval boundary.
|
||||
// Block boundary alignment strategy is adapted from "Optimizing Top-k Document
|
||||
// Retrieval Strategies for Block-Max Indexes" by Dimopoulos, Nepomnyachiy and Suel.
|
||||
// Find the block interval boundary by computing statistics (max, avg etc.) from all
|
||||
// participating scorer's block boundary. Then run BMM within the boundary.
|
||||
updateUpToAndMaxScore(target);
|
||||
repartitionLists();
|
||||
}
|
||||
|
||||
private void updateUpToAndMaxScore(int target) throws IOException {
|
||||
// reset upTo
|
||||
upTo = -1;
|
||||
for (DisiWrapper w : allScorers) {
|
||||
// using Math.max here is a good approach when there are only two clauses,
|
||||
// but when this scorer is used for more than two clauses, we may need to
|
||||
// consider other approaches such as avg, as the further out the boundary,
|
||||
// the higher maxScore would be for a scorer, which makes skipping based on
|
||||
// comparison with minCompetitiveScore harder / less effective.
|
||||
upTo = Math.max(w.scorer.advanceShallow(Math.max(w.doc, target)), upTo);
|
||||
}
|
||||
assert target <= upTo;
|
||||
|
||||
for (DisiWrapper w : allScorers) {
|
||||
// The assertion below will hold as long as upTo was computed using Math.max
|
||||
// However, when the upTo computation method changes (to Math.avg etc),
|
||||
// we may need to also handle the scenario where w.doc > upTo
|
||||
assert w.doc <= upTo;
|
||||
w.maxScore = w.scorer.getMaxScore(upTo);
|
||||
}
|
||||
}
|
||||
|
||||
private void repartitionLists() {
|
||||
essentialsScorers.clear();
|
||||
maxScoreSortedEssentialScorers.clear();
|
||||
Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxScore));
|
||||
|
||||
// Re-partition the scorers into non-essential list and essential list, as defined in
|
||||
// the "Optimizing Top-k Document Retrieval Strategies for Block-Max Indexes" paper.
|
||||
nonEssentialMaxScoreSum = 0;
|
||||
for (DisiWrapper w : allScorers) {
|
||||
if (maxScoreSumPropagator.scoreSumUpperBound(nonEssentialMaxScoreSum + w.maxScore)
|
||||
< minCompetitiveScore) {
|
||||
nonEssentialMaxScoreSum += w.maxScore;
|
||||
} else {
|
||||
maxScoreSortedEssentialScorers.add(w);
|
||||
essentialsScorers.add(w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// fixed at initialization
|
||||
return cost;
|
||||
}
|
||||
};
|
||||
|
||||
return new TwoPhaseIterator(approximation) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return score() >= minCompetitiveScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
// over-estimate
|
||||
return allScorers.length;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
// Propagate to improve score bounds
|
||||
maxScoreSumPropagator.advanceShallow(target);
|
||||
|
||||
int result = DocIdSetIterator.NO_MORE_DOCS;
|
||||
for (DisiWrapper s : allScorers) {
|
||||
if (s.doc < target) {
|
||||
result = Math.min(result, s.scorer.advanceShallow(target));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return maxScoreSumPropagator.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
if (doc == cachedScoredDoc) {
|
||||
return cachedScore;
|
||||
} else {
|
||||
double sum = 0;
|
||||
|
||||
for (DisiWrapper w : allScorers) {
|
||||
if (w.doc < doc) {
|
||||
w.doc = w.iterator.advance(doc);
|
||||
}
|
||||
|
||||
if (w.doc == doc) {
|
||||
sum += w.scorer.score();
|
||||
}
|
||||
}
|
||||
|
||||
cachedScoredDoc = doc;
|
||||
cachedScore = (float) sum;
|
||||
|
||||
return cachedScore;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Collection<ChildScorable> getChildren() {
|
||||
List<ChildScorable> matchingChildren = new ArrayList<>();
|
||||
for (DisiWrapper s : allScorers) {
|
||||
if (s.doc == doc) {
|
||||
matchingChildren.add(new ChildScorable(s.scorer, "SHOULD"));
|
||||
}
|
||||
}
|
||||
return matchingChildren;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) throws IOException {
|
||||
assert minScore >= 0;
|
||||
minCompetitiveScore = minScore;
|
||||
maxScoreSumPropagator.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
}
|
|
@ -118,6 +118,21 @@ final class Boolean2ScorerSupplier extends ScorerSupplier {
|
|||
leadCost);
|
||||
}
|
||||
|
||||
// pure two terms disjunction
|
||||
if (scoreMode == ScoreMode.TOP_SCORES
|
||||
&& minShouldMatch <= 1
|
||||
&& subs.get(Occur.FILTER).isEmpty()
|
||||
&& subs.get(Occur.MUST).isEmpty()
|
||||
&& subs.get(Occur.MUST_NOT).isEmpty()
|
||||
&& subs.get(Occur.SHOULD).size() == 2) {
|
||||
|
||||
final List<Scorer> optionalScorers = new ArrayList<>();
|
||||
for (ScorerSupplier scorer : subs.get(Occur.SHOULD)) {
|
||||
optionalScorers.add(scorer.get(leadCost));
|
||||
}
|
||||
return new BlockMaxMaxscoreScorer(weight, optionalScorers);
|
||||
}
|
||||
|
||||
// pure disjunction
|
||||
if (subs.get(Occur.FILTER).isEmpty() && subs.get(Occur.MUST).isEmpty()) {
|
||||
return excl(
|
||||
|
|
|
@ -123,6 +123,12 @@ public final class DisiPriorityQueue implements Iterable<DisiWrapper> {
|
|||
return updateTop();
|
||||
}
|
||||
|
||||
/** Clear the heap. */
|
||||
public void clear() {
|
||||
Arrays.fill(heap, null);
|
||||
size = 0;
|
||||
}
|
||||
|
||||
void upHeap(int i) {
|
||||
final DisiWrapper node = heap[i];
|
||||
final int nodeDoc = node.doc;
|
||||
|
|
|
@ -37,7 +37,10 @@ public class DisiWrapper {
|
|||
public final TwoPhaseIterator twoPhaseView;
|
||||
|
||||
// For WANDScorer
|
||||
long maxScore;
|
||||
long scaledMaxScore;
|
||||
|
||||
// For BlockMaxMaxscoreScorer
|
||||
float maxScore;
|
||||
|
||||
public DisiWrapper(Scorer scorer) {
|
||||
this.scorer = scorer;
|
||||
|
|
|
@ -211,14 +211,14 @@ final class WANDScorer extends Scorer {
|
|||
long maxScoreSum = 0;
|
||||
for (int i = 0; i < tailSize; ++i) {
|
||||
assert tail[i].doc < doc;
|
||||
maxScoreSum = Math.addExact(maxScoreSum, tail[i].maxScore);
|
||||
maxScoreSum = Math.addExact(maxScoreSum, tail[i].scaledMaxScore);
|
||||
}
|
||||
assert maxScoreSum == tailMaxScore : maxScoreSum + " " + tailMaxScore;
|
||||
|
||||
maxScoreSum = 0;
|
||||
for (DisiWrapper w = lead; w != null; w = w.next) {
|
||||
assert w.doc == doc;
|
||||
maxScoreSum = Math.addExact(maxScoreSum, w.maxScore);
|
||||
maxScoreSum = Math.addExact(maxScoreSum, w.scaledMaxScore);
|
||||
}
|
||||
assert maxScoreSum == leadMaxScore : maxScoreSum + " " + leadMaxScore;
|
||||
|
||||
|
@ -336,7 +336,7 @@ final class WANDScorer extends Scorer {
|
|||
private void addLead(DisiWrapper lead) {
|
||||
lead.next = this.lead;
|
||||
this.lead = lead;
|
||||
leadMaxScore += lead.maxScore;
|
||||
leadMaxScore += lead.scaledMaxScore;
|
||||
freq += 1;
|
||||
}
|
||||
|
||||
|
@ -402,7 +402,7 @@ final class WANDScorer extends Scorer {
|
|||
for (DisiWrapper w : head) {
|
||||
if (w.doc <= newUpTo) {
|
||||
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
|
||||
w.maxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
||||
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
||||
}
|
||||
}
|
||||
upTo = newUpTo;
|
||||
|
@ -412,9 +412,9 @@ final class WANDScorer extends Scorer {
|
|||
for (int i = 0; i < tailSize; ++i) {
|
||||
DisiWrapper w = tail[i];
|
||||
w.scorer.advanceShallow(target);
|
||||
w.maxScore = scaleMaxScore(w.scorer.getMaxScore(upTo), scalingFactor);
|
||||
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(upTo), scalingFactor);
|
||||
upHeapMaxScore(tail, i); // the heap might need to be reordered
|
||||
tailMaxScore += w.maxScore;
|
||||
tailMaxScore += w.scaledMaxScore;
|
||||
}
|
||||
|
||||
// We need to make sure that entries in 'tail' alone cannot match
|
||||
|
@ -480,7 +480,7 @@ final class WANDScorer extends Scorer {
|
|||
// pop all documents which are on this doc
|
||||
lead = head.pop();
|
||||
lead.next = null;
|
||||
leadMaxScore = lead.maxScore;
|
||||
leadMaxScore = lead.scaledMaxScore;
|
||||
freq = 1;
|
||||
doc = lead.doc;
|
||||
while (head.size() > 0 && head.top().doc == doc) {
|
||||
|
@ -552,10 +552,10 @@ final class WANDScorer extends Scorer {
|
|||
|
||||
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
|
||||
private DisiWrapper insertTailWithOverFlow(DisiWrapper s) {
|
||||
if (tailMaxScore + s.maxScore < minCompetitiveScore || tailSize + 1 < minShouldMatch) {
|
||||
if (tailMaxScore + s.scaledMaxScore < minCompetitiveScore || tailSize + 1 < minShouldMatch) {
|
||||
// we have free room for this new entry
|
||||
addTail(s);
|
||||
tailMaxScore += s.maxScore;
|
||||
tailMaxScore += s.scaledMaxScore;
|
||||
return null;
|
||||
} else if (tailSize == 0) {
|
||||
return s;
|
||||
|
@ -567,7 +567,7 @@ final class WANDScorer extends Scorer {
|
|||
// Swap top and s
|
||||
tail[0] = s;
|
||||
downHeapMaxScore(tail, tailSize);
|
||||
tailMaxScore = tailMaxScore - top.maxScore + s.maxScore;
|
||||
tailMaxScore = tailMaxScore - top.scaledMaxScore + s.scaledMaxScore;
|
||||
return top;
|
||||
}
|
||||
}
|
||||
|
@ -585,7 +585,7 @@ final class WANDScorer extends Scorer {
|
|||
final DisiWrapper result = tail[0];
|
||||
tail[0] = tail[--tailSize];
|
||||
downHeapMaxScore(tail, tailSize);
|
||||
tailMaxScore -= result.maxScore;
|
||||
tailMaxScore -= result.scaledMaxScore;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -631,9 +631,9 @@ final class WANDScorer extends Scorer {
|
|||
* further.
|
||||
*/
|
||||
private static boolean greaterMaxScore(DisiWrapper w1, DisiWrapper w2) {
|
||||
if (w1.maxScore > w2.maxScore) {
|
||||
if (w1.scaledMaxScore > w2.scaledMaxScore) {
|
||||
return true;
|
||||
} else if (w1.maxScore < w2.maxScore) {
|
||||
} else if (w1.scaledMaxScore < w2.scaledMaxScore) {
|
||||
return false;
|
||||
} else {
|
||||
return w1.cost < w2.cost;
|
||||
|
|
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.search.AssertingScorer;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
||||
public class TestBlockMaxMaxscoreScorer extends LuceneTestCase {
|
||||
private void writeDocuments(Directory dir) throws IOException {
|
||||
try (IndexWriter w =
|
||||
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||
|
||||
for (String[] values :
|
||||
Arrays.asList(
|
||||
new String[] {"A", "B"}, // 0
|
||||
new String[] {"A"}, // 1
|
||||
new String[] {}, // 2
|
||||
new String[] {"A", "B", "C"}, // 3
|
||||
new String[] {"B"}, // 4
|
||||
new String[] {"B", "C"} // 5
|
||||
)) {
|
||||
Document doc = new Document();
|
||||
for (String value : values) {
|
||||
doc.add(new StringField("foo", value, Field.Store.NO));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
w.forceMerge(1);
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasicsWithTwoDisjunctionClauses() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
writeDocuments(dir);
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
Scorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
|
||||
if (scorer instanceof AssertingScorer) {
|
||||
assertTrue(((AssertingScorer) scorer).getIn() instanceof BlockMaxMaxscoreScorer);
|
||||
} else {
|
||||
assertTrue(scorer instanceof BlockMaxMaxscoreScorer);
|
||||
}
|
||||
|
||||
assertEquals(0, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1, scorer.score(), 0);
|
||||
|
||||
assertEquals(1, scorer.iterator().nextDoc());
|
||||
assertEquals(2, scorer.score(), 0);
|
||||
|
||||
assertEquals(3, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1, scorer.score(), 0);
|
||||
|
||||
assertEquals(4, scorer.iterator().nextDoc());
|
||||
assertEquals(1, scorer.score(), 0);
|
||||
|
||||
assertEquals(5, scorer.iterator().nextDoc());
|
||||
assertEquals(1, scorer.score(), 0);
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasicsWithThreeDisjunctionClausesNotUseBMMScorer() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
writeDocuments(dir);
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
Scorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
|
||||
if (scorer instanceof AssertingScorer) {
|
||||
assertTrue(((AssertingScorer) scorer).getIn() instanceof WANDScorer);
|
||||
} else {
|
||||
assertTrue(scorer instanceof WANDScorer);
|
||||
}
|
||||
|
||||
assertEquals(0, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1, scorer.score(), 0);
|
||||
|
||||
assertEquals(1, scorer.iterator().nextDoc());
|
||||
assertEquals(2, scorer.score(), 0);
|
||||
|
||||
assertEquals(3, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1 + 3, scorer.score(), 0);
|
||||
|
||||
assertEquals(4, scorer.iterator().nextDoc());
|
||||
assertEquals(1, scorer.score(), 0);
|
||||
|
||||
assertEquals(5, scorer.iterator().nextDoc());
|
||||
assertEquals(1 + 3, scorer.score(), 0);
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasicsWithFilteredDisjunction() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
writeDocuments(dir);
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.build(),
|
||||
BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.FILTER)
|
||||
.build();
|
||||
|
||||
Scorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
|
||||
assertEquals(3, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1, scorer.score(), 0);
|
||||
|
||||
assertEquals(5, scorer.iterator().nextDoc());
|
||||
assertEquals(1, scorer.score(), 0);
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||
|
||||
scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
|
||||
scorer.setMinCompetitiveScore(2);
|
||||
|
||||
assertEquals(3, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1, scorer.score(), 0);
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasicsWithExclusion() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
writeDocuments(dir);
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.MUST_NOT)
|
||||
.build();
|
||||
|
||||
Scorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
|
||||
assertEquals(0, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1, scorer.score(), 0);
|
||||
|
||||
assertEquals(1, scorer.iterator().nextDoc());
|
||||
assertEquals(2, scorer.score(), 0);
|
||||
|
||||
assertEquals(4, scorer.iterator().nextDoc());
|
||||
assertEquals(1, scorer.score(), 0);
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||
|
||||
scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||
|
||||
scorer.setMinCompetitiveScore(3);
|
||||
|
||||
assertEquals(0, scorer.iterator().nextDoc());
|
||||
assertEquals(2 + 1, scorer.score(), 0);
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue