mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
LUCENE-10480: Use BMM scorer for 2 clauses disjunction (#972)
This commit is contained in:
parent
187f843e2a
commit
503ec55973
@ -112,6 +112,8 @@ Optimizations
|
|||||||
|
|
||||||
* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou)
|
* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou)
|
||||||
|
|
||||||
|
* LUCENE-10480: Implement Block-Max-Maxscore scorer for 2 clauses disjunction. (Zach Chen, Adrien Grand)
|
||||||
|
|
||||||
* LUCENE-10606: For KnnVectorQuery, optimize case where filter is backed by BitSetIterator (Kaival Parikh)
|
* LUCENE-10606: For KnnVectorQuery, optimize case where filter is backed by BitSetIterator (Kaival Parikh)
|
||||||
|
|
||||||
* LUCENE-10593: Vector similarity function and NeighborQueue reverse removal. (Alessandro Benedetti)
|
* LUCENE-10593: Vector similarity function and NeighborQueue reverse removal. (Alessandro Benedetti)
|
||||||
|
@ -0,0 +1,328 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/** Scorer implementing Block-Max Maxscore algorithm */
|
||||||
|
class BlockMaxMaxscoreScorer extends Scorer {
|
||||||
|
// current doc ID of the leads
|
||||||
|
private int doc;
|
||||||
|
|
||||||
|
// doc id boundary that all scorers maxScore are valid
|
||||||
|
private int upTo;
|
||||||
|
|
||||||
|
// heap of scorers ordered by doc ID
|
||||||
|
private final DisiPriorityQueue essentialsScorers;
|
||||||
|
|
||||||
|
// list of scorers ordered by maxScore
|
||||||
|
private final LinkedList<DisiWrapper> maxScoreSortedEssentialScorers;
|
||||||
|
|
||||||
|
private final DisiWrapper[] allScorers;
|
||||||
|
|
||||||
|
// sum of max scores of scorers in nonEssentialScorers list
|
||||||
|
private double nonEssentialMaxScoreSum;
|
||||||
|
|
||||||
|
private final long cost;
|
||||||
|
|
||||||
|
private final MaxScoreSumPropagator maxScoreSumPropagator;
|
||||||
|
|
||||||
|
private float minCompetitiveScore;
|
||||||
|
|
||||||
|
private int cachedScoredDoc;
|
||||||
|
|
||||||
|
private float cachedScore;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a Scorer that scores doc based on Block-Max-Maxscore (BMM) algorithm
|
||||||
|
* http://engineering.nyu.edu/~suel/papers/bmm.pdf . This algorithm has lower overhead compared to
|
||||||
|
* WANDScorer, and could be used for simple disjunction queries.
|
||||||
|
*
|
||||||
|
* @param weight The weight to be used.
|
||||||
|
* @param scorers The sub scorers this Scorer should iterate on for optional clauses.
|
||||||
|
*/
|
||||||
|
public BlockMaxMaxscoreScorer(Weight weight, List<Scorer> scorers) throws IOException {
|
||||||
|
super(weight);
|
||||||
|
|
||||||
|
this.upTo = -1;
|
||||||
|
this.doc = -1;
|
||||||
|
this.minCompetitiveScore = 0;
|
||||||
|
this.cachedScoredDoc = -1;
|
||||||
|
this.cachedScore = 0;
|
||||||
|
this.allScorers = new DisiWrapper[scorers.size()];
|
||||||
|
this.essentialsScorers = new DisiPriorityQueue(scorers.size());
|
||||||
|
this.maxScoreSortedEssentialScorers = new LinkedList<>();
|
||||||
|
|
||||||
|
long cost = 0;
|
||||||
|
for (int i = 0; i < scorers.size(); i++) {
|
||||||
|
DisiWrapper w = new DisiWrapper(scorers.get(i));
|
||||||
|
cost += w.cost;
|
||||||
|
allScorers[i] = w;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.cost = cost;
|
||||||
|
maxScoreSumPropagator = new MaxScoreSumPropagator(scorers);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSetIterator iterator() {
|
||||||
|
// twoPhaseIterator needed to honor scorer.setMinCompetitiveScore guarantee
|
||||||
|
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TwoPhaseIterator twoPhaseIterator() {
|
||||||
|
DocIdSetIterator approximation =
|
||||||
|
new DocIdSetIterator() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return advance(doc + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
while (true) {
|
||||||
|
|
||||||
|
if (target > upTo) {
|
||||||
|
updateMaxScoresAndLists(target);
|
||||||
|
} else {
|
||||||
|
// minCompetitiveScore might have increased,
|
||||||
|
// move potentially no-longer-competitive scorers from essential to non-essential
|
||||||
|
// list
|
||||||
|
movePotentiallyNonCompetitiveScorers();
|
||||||
|
}
|
||||||
|
|
||||||
|
assert target <= upTo;
|
||||||
|
|
||||||
|
DisiWrapper top = essentialsScorers.top();
|
||||||
|
|
||||||
|
if (top == null) {
|
||||||
|
// all scorers in non-essential list, skip to next boundary or return no_more_docs
|
||||||
|
if (upTo == NO_MORE_DOCS) {
|
||||||
|
return doc = NO_MORE_DOCS;
|
||||||
|
} else {
|
||||||
|
target = upTo + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// position all scorers in essential list to on or after target
|
||||||
|
while (top.doc < target) {
|
||||||
|
top.doc = top.iterator.advance(target);
|
||||||
|
top = essentialsScorers.updateTop();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (top.doc == NO_MORE_DOCS) {
|
||||||
|
return doc = NO_MORE_DOCS;
|
||||||
|
} else if (top.doc > upTo) {
|
||||||
|
target = upTo + 1;
|
||||||
|
} else {
|
||||||
|
double docScoreUpperBound = nonEssentialMaxScoreSum;
|
||||||
|
|
||||||
|
for (DisiWrapper w = essentialsScorers.topList(); w != null; w = w.next) {
|
||||||
|
docScoreUpperBound += w.scorer.score();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxScoreSumPropagator.scoreSumUpperBound(docScoreUpperBound)
|
||||||
|
< minCompetitiveScore) {
|
||||||
|
// skip straight to next candidate doc from essential scorer
|
||||||
|
int docId = top.doc;
|
||||||
|
do {
|
||||||
|
top.doc = top.iterator.nextDoc();
|
||||||
|
top = essentialsScorers.updateTop();
|
||||||
|
} while (top.doc == docId);
|
||||||
|
|
||||||
|
target = top.doc;
|
||||||
|
} else {
|
||||||
|
return doc = top.doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void movePotentiallyNonCompetitiveScorers() {
|
||||||
|
while (maxScoreSortedEssentialScorers.size() > 0
|
||||||
|
&& maxScoreSumPropagator.scoreSumUpperBound(
|
||||||
|
nonEssentialMaxScoreSum + maxScoreSortedEssentialScorers.get(0).maxScore)
|
||||||
|
< minCompetitiveScore) {
|
||||||
|
DisiWrapper nextLeastContributingScorer =
|
||||||
|
maxScoreSortedEssentialScorers.removeFirst();
|
||||||
|
nonEssentialMaxScoreSum += nextLeastContributingScorer.maxScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
// list adjusted
|
||||||
|
if (essentialsScorers.size() != maxScoreSortedEssentialScorers.size()) {
|
||||||
|
essentialsScorers.clear();
|
||||||
|
for (DisiWrapper w : maxScoreSortedEssentialScorers) {
|
||||||
|
essentialsScorers.add(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateMaxScoresAndLists(int target) throws IOException {
|
||||||
|
assert target > upTo;
|
||||||
|
// Next candidate doc id is above interval boundary, or minCompetitive has increased.
|
||||||
|
// Find next interval boundary.
|
||||||
|
// Block boundary alignment strategy is adapted from "Optimizing Top-k Document
|
||||||
|
// Retrieval Strategies for Block-Max Indexes" by Dimopoulos, Nepomnyachiy and Suel.
|
||||||
|
// Find the block interval boundary by computing statistics (max, avg etc.) from all
|
||||||
|
// participating scorer's block boundary. Then run BMM within the boundary.
|
||||||
|
updateUpToAndMaxScore(target);
|
||||||
|
repartitionLists();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateUpToAndMaxScore(int target) throws IOException {
|
||||||
|
// reset upTo
|
||||||
|
upTo = -1;
|
||||||
|
for (DisiWrapper w : allScorers) {
|
||||||
|
// using Math.max here is a good approach when there are only two clauses,
|
||||||
|
// but when this scorer is used for more than two clauses, we may need to
|
||||||
|
// consider other approaches such as avg, as the further out the boundary,
|
||||||
|
// the higher maxScore would be for a scorer, which makes skipping based on
|
||||||
|
// comparison with minCompetitiveScore harder / less effective.
|
||||||
|
upTo = Math.max(w.scorer.advanceShallow(Math.max(w.doc, target)), upTo);
|
||||||
|
}
|
||||||
|
assert target <= upTo;
|
||||||
|
|
||||||
|
for (DisiWrapper w : allScorers) {
|
||||||
|
// The assertion below will hold as long as upTo was computed using Math.max
|
||||||
|
// However, when the upTo computation method changes (to Math.avg etc),
|
||||||
|
// we may need to also handle the scenario where w.doc > upTo
|
||||||
|
assert w.doc <= upTo;
|
||||||
|
w.maxScore = w.scorer.getMaxScore(upTo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void repartitionLists() {
|
||||||
|
essentialsScorers.clear();
|
||||||
|
maxScoreSortedEssentialScorers.clear();
|
||||||
|
Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxScore));
|
||||||
|
|
||||||
|
// Re-partition the scorers into non-essential list and essential list, as defined in
|
||||||
|
// the "Optimizing Top-k Document Retrieval Strategies for Block-Max Indexes" paper.
|
||||||
|
nonEssentialMaxScoreSum = 0;
|
||||||
|
for (DisiWrapper w : allScorers) {
|
||||||
|
if (maxScoreSumPropagator.scoreSumUpperBound(nonEssentialMaxScoreSum + w.maxScore)
|
||||||
|
< minCompetitiveScore) {
|
||||||
|
nonEssentialMaxScoreSum += w.maxScore;
|
||||||
|
} else {
|
||||||
|
maxScoreSortedEssentialScorers.add(w);
|
||||||
|
essentialsScorers.add(w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
// fixed at initialization
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return new TwoPhaseIterator(approximation) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
return score() >= minCompetitiveScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
// over-estimate
|
||||||
|
return allScorers.length;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advanceShallow(int target) throws IOException {
|
||||||
|
// Propagate to improve score bounds
|
||||||
|
maxScoreSumPropagator.advanceShallow(target);
|
||||||
|
|
||||||
|
int result = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
for (DisiWrapper s : allScorers) {
|
||||||
|
if (s.doc < target) {
|
||||||
|
result = Math.min(result, s.scorer.advanceShallow(target));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
|
return maxScoreSumPropagator.getMaxScore(upTo);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() throws IOException {
|
||||||
|
if (doc == cachedScoredDoc) {
|
||||||
|
return cachedScore;
|
||||||
|
} else {
|
||||||
|
double sum = 0;
|
||||||
|
|
||||||
|
for (DisiWrapper w : allScorers) {
|
||||||
|
if (w.doc < doc) {
|
||||||
|
w.doc = w.iterator.advance(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (w.doc == doc) {
|
||||||
|
sum += w.scorer.score();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cachedScoredDoc = doc;
|
||||||
|
cachedScore = (float) sum;
|
||||||
|
|
||||||
|
return cachedScore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final Collection<ChildScorable> getChildren() {
|
||||||
|
List<ChildScorable> matchingChildren = new ArrayList<>();
|
||||||
|
for (DisiWrapper s : allScorers) {
|
||||||
|
if (s.doc == doc) {
|
||||||
|
matchingChildren.add(new ChildScorable(s.scorer, "SHOULD"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return matchingChildren;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setMinCompetitiveScore(float minScore) throws IOException {
|
||||||
|
assert minScore >= 0;
|
||||||
|
minCompetitiveScore = minScore;
|
||||||
|
maxScoreSumPropagator.setMinCompetitiveScore(minScore);
|
||||||
|
}
|
||||||
|
}
|
@ -118,6 +118,21 @@ final class Boolean2ScorerSupplier extends ScorerSupplier {
|
|||||||
leadCost);
|
leadCost);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pure two terms disjunction
|
||||||
|
if (scoreMode == ScoreMode.TOP_SCORES
|
||||||
|
&& minShouldMatch <= 1
|
||||||
|
&& subs.get(Occur.FILTER).isEmpty()
|
||||||
|
&& subs.get(Occur.MUST).isEmpty()
|
||||||
|
&& subs.get(Occur.MUST_NOT).isEmpty()
|
||||||
|
&& subs.get(Occur.SHOULD).size() == 2) {
|
||||||
|
|
||||||
|
final List<Scorer> optionalScorers = new ArrayList<>();
|
||||||
|
for (ScorerSupplier scorer : subs.get(Occur.SHOULD)) {
|
||||||
|
optionalScorers.add(scorer.get(leadCost));
|
||||||
|
}
|
||||||
|
return new BlockMaxMaxscoreScorer(weight, optionalScorers);
|
||||||
|
}
|
||||||
|
|
||||||
// pure disjunction
|
// pure disjunction
|
||||||
if (subs.get(Occur.FILTER).isEmpty() && subs.get(Occur.MUST).isEmpty()) {
|
if (subs.get(Occur.FILTER).isEmpty() && subs.get(Occur.MUST).isEmpty()) {
|
||||||
return excl(
|
return excl(
|
||||||
|
@ -123,6 +123,12 @@ public final class DisiPriorityQueue implements Iterable<DisiWrapper> {
|
|||||||
return updateTop();
|
return updateTop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Clear the heap. */
|
||||||
|
public void clear() {
|
||||||
|
Arrays.fill(heap, null);
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
void upHeap(int i) {
|
void upHeap(int i) {
|
||||||
final DisiWrapper node = heap[i];
|
final DisiWrapper node = heap[i];
|
||||||
final int nodeDoc = node.doc;
|
final int nodeDoc = node.doc;
|
||||||
|
@ -37,7 +37,10 @@ public class DisiWrapper {
|
|||||||
public final TwoPhaseIterator twoPhaseView;
|
public final TwoPhaseIterator twoPhaseView;
|
||||||
|
|
||||||
// For WANDScorer
|
// For WANDScorer
|
||||||
long maxScore;
|
long scaledMaxScore;
|
||||||
|
|
||||||
|
// For BlockMaxMaxscoreScorer
|
||||||
|
float maxScore;
|
||||||
|
|
||||||
public DisiWrapper(Scorer scorer) {
|
public DisiWrapper(Scorer scorer) {
|
||||||
this.scorer = scorer;
|
this.scorer = scorer;
|
||||||
|
@ -211,14 +211,14 @@ final class WANDScorer extends Scorer {
|
|||||||
long maxScoreSum = 0;
|
long maxScoreSum = 0;
|
||||||
for (int i = 0; i < tailSize; ++i) {
|
for (int i = 0; i < tailSize; ++i) {
|
||||||
assert tail[i].doc < doc;
|
assert tail[i].doc < doc;
|
||||||
maxScoreSum = Math.addExact(maxScoreSum, tail[i].maxScore);
|
maxScoreSum = Math.addExact(maxScoreSum, tail[i].scaledMaxScore);
|
||||||
}
|
}
|
||||||
assert maxScoreSum == tailMaxScore : maxScoreSum + " " + tailMaxScore;
|
assert maxScoreSum == tailMaxScore : maxScoreSum + " " + tailMaxScore;
|
||||||
|
|
||||||
maxScoreSum = 0;
|
maxScoreSum = 0;
|
||||||
for (DisiWrapper w = lead; w != null; w = w.next) {
|
for (DisiWrapper w = lead; w != null; w = w.next) {
|
||||||
assert w.doc == doc;
|
assert w.doc == doc;
|
||||||
maxScoreSum = Math.addExact(maxScoreSum, w.maxScore);
|
maxScoreSum = Math.addExact(maxScoreSum, w.scaledMaxScore);
|
||||||
}
|
}
|
||||||
assert maxScoreSum == leadMaxScore : maxScoreSum + " " + leadMaxScore;
|
assert maxScoreSum == leadMaxScore : maxScoreSum + " " + leadMaxScore;
|
||||||
|
|
||||||
@ -336,7 +336,7 @@ final class WANDScorer extends Scorer {
|
|||||||
private void addLead(DisiWrapper lead) {
|
private void addLead(DisiWrapper lead) {
|
||||||
lead.next = this.lead;
|
lead.next = this.lead;
|
||||||
this.lead = lead;
|
this.lead = lead;
|
||||||
leadMaxScore += lead.maxScore;
|
leadMaxScore += lead.scaledMaxScore;
|
||||||
freq += 1;
|
freq += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -402,7 +402,7 @@ final class WANDScorer extends Scorer {
|
|||||||
for (DisiWrapper w : head) {
|
for (DisiWrapper w : head) {
|
||||||
if (w.doc <= newUpTo) {
|
if (w.doc <= newUpTo) {
|
||||||
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
|
newUpTo = Math.min(w.scorer.advanceShallow(w.doc), newUpTo);
|
||||||
w.maxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(newUpTo), scalingFactor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
upTo = newUpTo;
|
upTo = newUpTo;
|
||||||
@ -412,9 +412,9 @@ final class WANDScorer extends Scorer {
|
|||||||
for (int i = 0; i < tailSize; ++i) {
|
for (int i = 0; i < tailSize; ++i) {
|
||||||
DisiWrapper w = tail[i];
|
DisiWrapper w = tail[i];
|
||||||
w.scorer.advanceShallow(target);
|
w.scorer.advanceShallow(target);
|
||||||
w.maxScore = scaleMaxScore(w.scorer.getMaxScore(upTo), scalingFactor);
|
w.scaledMaxScore = scaleMaxScore(w.scorer.getMaxScore(upTo), scalingFactor);
|
||||||
upHeapMaxScore(tail, i); // the heap might need to be reordered
|
upHeapMaxScore(tail, i); // the heap might need to be reordered
|
||||||
tailMaxScore += w.maxScore;
|
tailMaxScore += w.scaledMaxScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need to make sure that entries in 'tail' alone cannot match
|
// We need to make sure that entries in 'tail' alone cannot match
|
||||||
@ -480,7 +480,7 @@ final class WANDScorer extends Scorer {
|
|||||||
// pop all documents which are on this doc
|
// pop all documents which are on this doc
|
||||||
lead = head.pop();
|
lead = head.pop();
|
||||||
lead.next = null;
|
lead.next = null;
|
||||||
leadMaxScore = lead.maxScore;
|
leadMaxScore = lead.scaledMaxScore;
|
||||||
freq = 1;
|
freq = 1;
|
||||||
doc = lead.doc;
|
doc = lead.doc;
|
||||||
while (head.size() > 0 && head.top().doc == doc) {
|
while (head.size() > 0 && head.top().doc == doc) {
|
||||||
@ -552,10 +552,10 @@ final class WANDScorer extends Scorer {
|
|||||||
|
|
||||||
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
|
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
|
||||||
private DisiWrapper insertTailWithOverFlow(DisiWrapper s) {
|
private DisiWrapper insertTailWithOverFlow(DisiWrapper s) {
|
||||||
if (tailMaxScore + s.maxScore < minCompetitiveScore || tailSize + 1 < minShouldMatch) {
|
if (tailMaxScore + s.scaledMaxScore < minCompetitiveScore || tailSize + 1 < minShouldMatch) {
|
||||||
// we have free room for this new entry
|
// we have free room for this new entry
|
||||||
addTail(s);
|
addTail(s);
|
||||||
tailMaxScore += s.maxScore;
|
tailMaxScore += s.scaledMaxScore;
|
||||||
return null;
|
return null;
|
||||||
} else if (tailSize == 0) {
|
} else if (tailSize == 0) {
|
||||||
return s;
|
return s;
|
||||||
@ -567,7 +567,7 @@ final class WANDScorer extends Scorer {
|
|||||||
// Swap top and s
|
// Swap top and s
|
||||||
tail[0] = s;
|
tail[0] = s;
|
||||||
downHeapMaxScore(tail, tailSize);
|
downHeapMaxScore(tail, tailSize);
|
||||||
tailMaxScore = tailMaxScore - top.maxScore + s.maxScore;
|
tailMaxScore = tailMaxScore - top.scaledMaxScore + s.scaledMaxScore;
|
||||||
return top;
|
return top;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -585,7 +585,7 @@ final class WANDScorer extends Scorer {
|
|||||||
final DisiWrapper result = tail[0];
|
final DisiWrapper result = tail[0];
|
||||||
tail[0] = tail[--tailSize];
|
tail[0] = tail[--tailSize];
|
||||||
downHeapMaxScore(tail, tailSize);
|
downHeapMaxScore(tail, tailSize);
|
||||||
tailMaxScore -= result.maxScore;
|
tailMaxScore -= result.scaledMaxScore;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -631,9 +631,9 @@ final class WANDScorer extends Scorer {
|
|||||||
* further.
|
* further.
|
||||||
*/
|
*/
|
||||||
private static boolean greaterMaxScore(DisiWrapper w1, DisiWrapper w2) {
|
private static boolean greaterMaxScore(DisiWrapper w1, DisiWrapper w2) {
|
||||||
if (w1.maxScore > w2.maxScore) {
|
if (w1.scaledMaxScore > w2.scaledMaxScore) {
|
||||||
return true;
|
return true;
|
||||||
} else if (w1.maxScore < w2.maxScore) {
|
} else if (w1.scaledMaxScore < w2.scaledMaxScore) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
return w1.cost < w2.cost;
|
return w1.cost < w2.cost;
|
||||||
|
@ -0,0 +1,255 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.tests.search.AssertingScorer;
|
||||||
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
|
||||||
|
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
||||||
|
public class TestBlockMaxMaxscoreScorer extends LuceneTestCase {
|
||||||
|
private void writeDocuments(Directory dir) throws IOException {
|
||||||
|
try (IndexWriter w =
|
||||||
|
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||||
|
|
||||||
|
for (String[] values :
|
||||||
|
Arrays.asList(
|
||||||
|
new String[] {"A", "B"}, // 0
|
||||||
|
new String[] {"A"}, // 1
|
||||||
|
new String[] {}, // 2
|
||||||
|
new String[] {"A", "B", "C"}, // 3
|
||||||
|
new String[] {"B"}, // 4
|
||||||
|
new String[] {"B", "C"} // 5
|
||||||
|
)) {
|
||||||
|
Document doc = new Document();
|
||||||
|
for (String value : values) {
|
||||||
|
doc.add(new StringField("foo", value, Field.Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
w.forceMerge(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithTwoDisjunctionClauses() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Scorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
if (scorer instanceof AssertingScorer) {
|
||||||
|
assertTrue(((AssertingScorer) scorer).getIn() instanceof BlockMaxMaxscoreScorer);
|
||||||
|
} else {
|
||||||
|
assertTrue(scorer instanceof BlockMaxMaxscoreScorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(0, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(1, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(3, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(4, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(5, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithThreeDisjunctionClausesNotUseBMMScorer() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Scorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
if (scorer instanceof AssertingScorer) {
|
||||||
|
assertTrue(((AssertingScorer) scorer).getIn() instanceof WANDScorer);
|
||||||
|
} else {
|
||||||
|
assertTrue(scorer instanceof WANDScorer);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(0, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(1, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(3, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1 + 3, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(4, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(5, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(1 + 3, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithFilteredDisjunction() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.build(),
|
||||||
|
BooleanClause.Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Scorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
assertEquals(3, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(5, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
|
||||||
|
scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
scorer.setMinCompetitiveScore(2);
|
||||||
|
|
||||||
|
assertEquals(3, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithExclusion() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.MUST_NOT)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Scorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
assertEquals(0, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(1, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(4, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
|
||||||
|
scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.scorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
scorer.setMinCompetitiveScore(3);
|
||||||
|
|
||||||
|
assertEquals(0, scorer.iterator().nextDoc());
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
|
||||||
|
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user