mirror of https://github.com/apache/lucene.git
Change the MAXSCORE scorer to a bulk scorer. (#12361)
This commit is contained in:
parent
37b92adf6a
commit
8703e449ce
|
@ -128,8 +128,12 @@ Improvements
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
* GITHUB#12377: Avoid redundant loop for compute min value in DirectMonotonicWriter. (Chao Zhang)
|
* GITHUB#12377: Avoid redundant loop for compute min value in DirectMonotonicWriter. (Chao Zhang)
|
||||||
|
|
||||||
|
* GITHUB#12361: Faster top-level disjunctions sorted by descending score.
|
||||||
|
(Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
(No changes)
|
||||||
|
|
|
@ -1,318 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/** Scorer implementing Block-Max Maxscore algorithm */
|
|
||||||
class BlockMaxMaxscoreScorer extends Scorer {
|
|
||||||
// current doc ID of the leads
|
|
||||||
private int doc;
|
|
||||||
|
|
||||||
// doc id boundary that all scorers maxScore are valid
|
|
||||||
private int upTo;
|
|
||||||
|
|
||||||
// heap of scorers ordered by doc ID
|
|
||||||
private final DisiPriorityQueue essentialsScorers;
|
|
||||||
|
|
||||||
// array of scorers ordered by maxScore
|
|
||||||
private final DisiWrapper[] allScorers;
|
|
||||||
|
|
||||||
// index of the first essential scorer in the `allScorers` array. All scorers before this index
|
|
||||||
// are non-essential. All scorers on and after this index are essential.
|
|
||||||
private int firstEssentialScorerIndex;
|
|
||||||
|
|
||||||
// sum of max scores of scorers in nonEssentialScorers list
|
|
||||||
private double nonEssentialMaxScoreSum;
|
|
||||||
|
|
||||||
private final long cost;
|
|
||||||
|
|
||||||
private final MaxScoreSumPropagator maxScoreSumPropagator;
|
|
||||||
|
|
||||||
private float minCompetitiveScore;
|
|
||||||
|
|
||||||
private double score;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a Scorer that scores doc based on Block-Max-Maxscore (BMM) algorithm
|
|
||||||
* http://engineering.nyu.edu/~suel/papers/bmm.pdf . This algorithm has lower overhead compared to
|
|
||||||
* WANDScorer, and could be used for simple disjunction queries.
|
|
||||||
*
|
|
||||||
* @param weight The weight to be used.
|
|
||||||
* @param scorers The sub scorers this Scorer should iterate on for optional clauses.
|
|
||||||
*/
|
|
||||||
public BlockMaxMaxscoreScorer(Weight weight, List<Scorer> scorers) throws IOException {
|
|
||||||
super(weight);
|
|
||||||
|
|
||||||
this.upTo = -1;
|
|
||||||
this.doc = -1;
|
|
||||||
this.minCompetitiveScore = 0;
|
|
||||||
this.allScorers = new DisiWrapper[scorers.size()];
|
|
||||||
this.essentialsScorers = new DisiPriorityQueue(scorers.size());
|
|
||||||
this.firstEssentialScorerIndex = 0;
|
|
||||||
|
|
||||||
long cost = 0;
|
|
||||||
for (int i = 0; i < scorers.size(); i++) {
|
|
||||||
DisiWrapper w = new DisiWrapper(scorers.get(i));
|
|
||||||
cost += w.cost;
|
|
||||||
allScorers[i] = w;
|
|
||||||
}
|
|
||||||
|
|
||||||
this.cost = cost;
|
|
||||||
maxScoreSumPropagator = new MaxScoreSumPropagator(scorers);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSetIterator iterator() {
|
|
||||||
// twoPhaseIterator needed to honor scorer.setMinCompetitiveScore guarantee
|
|
||||||
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TwoPhaseIterator twoPhaseIterator() {
|
|
||||||
DocIdSetIterator approximation =
|
|
||||||
new DocIdSetIterator() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int nextDoc() throws IOException {
|
|
||||||
return advance(doc + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advance(int target) throws IOException {
|
|
||||||
while (true) {
|
|
||||||
|
|
||||||
if (target > upTo) {
|
|
||||||
updateMaxScoresAndLists(target);
|
|
||||||
} else {
|
|
||||||
// minCompetitiveScore might have increased,
|
|
||||||
// move potentially no-longer-competitive scorers from essential to non-essential
|
|
||||||
// list
|
|
||||||
movePotentiallyNonCompetitiveScorers();
|
|
||||||
}
|
|
||||||
|
|
||||||
assert target <= upTo;
|
|
||||||
|
|
||||||
DisiWrapper top = essentialsScorers.top();
|
|
||||||
|
|
||||||
if (top == null) {
|
|
||||||
// all scorers in non-essential list, skip to next boundary or return no_more_docs
|
|
||||||
if (upTo == NO_MORE_DOCS) {
|
|
||||||
return doc = NO_MORE_DOCS;
|
|
||||||
} else {
|
|
||||||
target = upTo + 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// position all scorers in essential list to on or after target
|
|
||||||
while (top.doc < target) {
|
|
||||||
top.doc = top.iterator.advance(target);
|
|
||||||
top = essentialsScorers.updateTop();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (top.doc == NO_MORE_DOCS) {
|
|
||||||
return doc = NO_MORE_DOCS;
|
|
||||||
} else if (top.doc > upTo) {
|
|
||||||
target = upTo + 1;
|
|
||||||
} else {
|
|
||||||
return doc = top.doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void movePotentiallyNonCompetitiveScorers() {
|
|
||||||
boolean removedEssentialScorer = false;
|
|
||||||
while (firstEssentialScorerIndex < allScorers.length
|
|
||||||
&& maxScoreSumPropagator.scoreSumUpperBound(
|
|
||||||
nonEssentialMaxScoreSum + allScorers[firstEssentialScorerIndex].maxScore)
|
|
||||||
< minCompetitiveScore) {
|
|
||||||
DisiWrapper nextLeastContributingScorer = allScorers[firstEssentialScorerIndex++];
|
|
||||||
nonEssentialMaxScoreSum += nextLeastContributingScorer.maxScore;
|
|
||||||
removedEssentialScorer = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// list adjusted
|
|
||||||
if (removedEssentialScorer) {
|
|
||||||
essentialsScorers.clear();
|
|
||||||
essentialsScorers.addAll(
|
|
||||||
allScorers,
|
|
||||||
firstEssentialScorerIndex,
|
|
||||||
allScorers.length - firstEssentialScorerIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateMaxScoresAndLists(int target) throws IOException {
|
|
||||||
assert target > upTo;
|
|
||||||
// Next candidate doc id is above interval boundary, or minCompetitive has increased.
|
|
||||||
// Find next interval boundary.
|
|
||||||
// Block boundary alignment strategy is adapted from "Optimizing Top-k Document
|
|
||||||
// Retrieval Strategies for Block-Max Indexes" by Dimopoulos, Nepomnyachiy and Suel.
|
|
||||||
// Find the block interval boundary by computing statistics (max, avg etc.) from all
|
|
||||||
// participating scorer's block boundary. Then run BMM within the boundary.
|
|
||||||
updateUpToAndMaxScore(target);
|
|
||||||
repartitionLists();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateUpToAndMaxScore(int target) throws IOException {
|
|
||||||
// reset upTo
|
|
||||||
upTo = -1;
|
|
||||||
for (DisiWrapper w : allScorers) {
|
|
||||||
// using Math.max here is a good approach when there are only two clauses,
|
|
||||||
// but when this scorer is used for more than two clauses, we may need to
|
|
||||||
// consider other approaches such as avg, as the further out the boundary,
|
|
||||||
// the higher maxScore would be for a scorer, which makes skipping based on
|
|
||||||
// comparison with minCompetitiveScore harder / less effective.
|
|
||||||
upTo = Math.max(w.scorer.advanceShallow(Math.max(w.doc, target)), upTo);
|
|
||||||
}
|
|
||||||
assert target <= upTo;
|
|
||||||
|
|
||||||
for (DisiWrapper w : allScorers) {
|
|
||||||
// The assertion below will hold as long as upTo was computed using Math.max
|
|
||||||
// However, when the upTo computation method changes (to Math.avg etc),
|
|
||||||
// we may need to also handle the scenario where w.doc > upTo
|
|
||||||
assert w.doc <= upTo;
|
|
||||||
w.maxScore = w.scorer.getMaxScore(upTo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void repartitionLists() {
|
|
||||||
firstEssentialScorerIndex = 0;
|
|
||||||
Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxScore));
|
|
||||||
|
|
||||||
// Re-partition the scorers into non-essential list and essential list, as defined in
|
|
||||||
// the "Optimizing Top-k Document Retrieval Strategies for Block-Max Indexes" paper.
|
|
||||||
nonEssentialMaxScoreSum = 0;
|
|
||||||
for (DisiWrapper w : allScorers) {
|
|
||||||
if (maxScoreSumPropagator.scoreSumUpperBound(nonEssentialMaxScoreSum + w.maxScore)
|
|
||||||
>= minCompetitiveScore) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
firstEssentialScorerIndex++;
|
|
||||||
nonEssentialMaxScoreSum += w.maxScore;
|
|
||||||
}
|
|
||||||
essentialsScorers.clear();
|
|
||||||
essentialsScorers.addAll(
|
|
||||||
allScorers,
|
|
||||||
firstEssentialScorerIndex,
|
|
||||||
allScorers.length - firstEssentialScorerIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
// fixed at initialization
|
|
||||||
return cost;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
return new TwoPhaseIterator(approximation) {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean matches() throws IOException {
|
|
||||||
// Start evaluating the score of the new document. It initially only includes essential
|
|
||||||
// clauses and abort / return early if a match is not possible.
|
|
||||||
// Scores of non-essential clauses get added later on to determine actual matches.
|
|
||||||
score = 0;
|
|
||||||
for (DisiWrapper w = essentialsScorers.topList(); w != null; w = w.next) {
|
|
||||||
score += w.scorer.score();
|
|
||||||
}
|
|
||||||
|
|
||||||
final double docScoreUpperBound = score + nonEssentialMaxScoreSum;
|
|
||||||
|
|
||||||
if (maxScoreSumPropagator.scoreSumUpperBound(docScoreUpperBound) < minCompetitiveScore) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Continue to add scores of non-essential scorers
|
|
||||||
for (int i = 0; i < firstEssentialScorerIndex; ++i) {
|
|
||||||
DisiWrapper w = allScorers[i];
|
|
||||||
if (w.doc < doc) {
|
|
||||||
w.doc = w.iterator.advance(doc);
|
|
||||||
}
|
|
||||||
if (w.doc == doc) {
|
|
||||||
score += allScorers[i].scorer.score();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return score() >= minCompetitiveScore;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float matchCost() {
|
|
||||||
// over-estimate
|
|
||||||
return allScorers.length;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advanceShallow(int target) throws IOException {
|
|
||||||
// Propagate to improve score bounds
|
|
||||||
maxScoreSumPropagator.advanceShallow(target);
|
|
||||||
|
|
||||||
int result = DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
for (DisiWrapper s : allScorers) {
|
|
||||||
if (s.doc < target) {
|
|
||||||
result = Math.min(result, s.scorer.advanceShallow(target));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
|
||||||
return maxScoreSumPropagator.getMaxScore(upTo);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float score() throws IOException {
|
|
||||||
return (float) score;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final Collection<ChildScorable> getChildren() {
|
|
||||||
List<ChildScorable> matchingChildren = new ArrayList<>();
|
|
||||||
for (DisiWrapper s : allScorers) {
|
|
||||||
if (s.doc == doc) {
|
|
||||||
matchingChildren.add(new ChildScorable(s.scorer, "SHOULD"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return matchingChildren;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setMinCompetitiveScore(float minScore) throws IOException {
|
|
||||||
assert minScore >= 0;
|
|
||||||
minCompetitiveScore = minScore;
|
|
||||||
maxScoreSumPropagator.setMinCompetitiveScore(minScore);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -218,34 +218,7 @@ final class BooleanWeight extends Weight {
|
||||||
optionalScorers.add(ss.get(Long.MAX_VALUE));
|
optionalScorers.add(ss.get(Long.MAX_VALUE));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new BulkScorer() {
|
return new MaxScoreBulkScorer(optionalScorers);
|
||||||
final Scorer bmmScorer = new BlockMaxMaxscoreScorer(BooleanWeight.this, optionalScorers);
|
|
||||||
final DocIdSetIterator iterator = bmmScorer.iterator();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
|
|
||||||
throws IOException {
|
|
||||||
collector.setScorer(bmmScorer);
|
|
||||||
|
|
||||||
int doc = bmmScorer.docID();
|
|
||||||
if (doc < min) {
|
|
||||||
doc = iterator.advance(min);
|
|
||||||
}
|
|
||||||
while (doc < max) {
|
|
||||||
if (acceptDocs == null || acceptDocs.get(doc)) {
|
|
||||||
collector.collect(doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
doc = iterator.nextDoc();
|
|
||||||
}
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return iterator.cost();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
List<BulkScorer> optional = new ArrayList<BulkScorer>();
|
List<BulkScorer> optional = new ArrayList<BulkScorer>();
|
||||||
|
|
|
@ -39,8 +39,8 @@ public class DisiWrapper {
|
||||||
// For WANDScorer
|
// For WANDScorer
|
||||||
long scaledMaxScore;
|
long scaledMaxScore;
|
||||||
|
|
||||||
// For BlockMaxMaxscoreScorer
|
// for MaxScoreBulkScorer
|
||||||
float maxScore;
|
float maxWindowScore;
|
||||||
|
|
||||||
public DisiWrapper(Scorer scorer) {
|
public DisiWrapper(Scorer scorer) {
|
||||||
this.scorer = scorer;
|
this.scorer = scorer;
|
||||||
|
|
|
@ -0,0 +1,222 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
final class MaxScoreBulkScorer extends BulkScorer {
|
||||||
|
|
||||||
|
// All scorers, sorted by increasing max score.
|
||||||
|
private final DisiWrapper[] allScorers;
|
||||||
|
// These are the last scorers from `allScorers` that are "essential", ie. required for a match to
|
||||||
|
// have a competitive score.
|
||||||
|
private final DisiPriorityQueue essentialQueue;
|
||||||
|
// Index of the first essential scorer, ie. essentialQueue contains all scorers from
|
||||||
|
// allScorers[firstEssentialScorer:]. All scorers below this index are non-essential.
|
||||||
|
private int firstEssentialScorer;
|
||||||
|
private final MaxScoreSumPropagator maxScorePropagator;
|
||||||
|
private final long cost;
|
||||||
|
private float minCompetitiveScore;
|
||||||
|
private boolean minCompetitiveScoreUpdated;
|
||||||
|
private ScoreAndDoc scorable = new ScoreAndDoc();
|
||||||
|
private final double[] maxScoreSums;
|
||||||
|
|
||||||
|
MaxScoreBulkScorer(List<Scorer> scorers) throws IOException {
|
||||||
|
allScorers = new DisiWrapper[scorers.size()];
|
||||||
|
int i = 0;
|
||||||
|
long cost = 0;
|
||||||
|
for (Scorer scorer : scorers) {
|
||||||
|
DisiWrapper w = new DisiWrapper(scorer);
|
||||||
|
cost += w.cost;
|
||||||
|
allScorers[i++] = w;
|
||||||
|
}
|
||||||
|
this.cost = cost;
|
||||||
|
maxScorePropagator = new MaxScoreSumPropagator(scorers);
|
||||||
|
essentialQueue = new DisiPriorityQueue(allScorers.length);
|
||||||
|
maxScoreSums = new double[allScorers.length];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
|
||||||
|
collector.setScorer(scorable);
|
||||||
|
|
||||||
|
int windowMin = min;
|
||||||
|
main:
|
||||||
|
while (windowMin < max) {
|
||||||
|
int windowMax = updateMaxWindowScores(windowMin);
|
||||||
|
windowMax = Math.min(windowMax, max);
|
||||||
|
if (partitionScorers() == false) {
|
||||||
|
// No matches in this window
|
||||||
|
windowMin = windowMax;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DisiWrapper top = essentialQueue.top();
|
||||||
|
while (top.doc < windowMin) {
|
||||||
|
top.doc = top.iterator.advance(windowMin);
|
||||||
|
top = essentialQueue.updateTop();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (top.doc < windowMax) {
|
||||||
|
if (acceptDocs == null || acceptDocs.get(top.doc)) {
|
||||||
|
DisiWrapper topList = essentialQueue.topList();
|
||||||
|
double score = topList.scorer.score();
|
||||||
|
for (DisiWrapper w = topList.next; w != null; w = w.next) {
|
||||||
|
score += w.scorer.score();
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean possibleMatch = true;
|
||||||
|
for (int i = firstEssentialScorer - 1; i >= 0; --i) {
|
||||||
|
float maxPossibleScore = maxScorePropagator.scoreSumUpperBound(score + maxScoreSums[i]);
|
||||||
|
if (maxPossibleScore < minCompetitiveScore) {
|
||||||
|
possibleMatch = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
DisiWrapper scorer = allScorers[i];
|
||||||
|
if (scorer.doc < top.doc) {
|
||||||
|
scorer.doc = scorer.iterator.advance(top.doc);
|
||||||
|
}
|
||||||
|
if (scorer.doc == top.doc) {
|
||||||
|
score += scorer.scorer.score();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (possibleMatch) {
|
||||||
|
scorable.doc = top.doc;
|
||||||
|
scorable.score = (float) score;
|
||||||
|
collector.collect(top.doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int doc = top.doc;
|
||||||
|
do {
|
||||||
|
top.doc = top.iterator.nextDoc();
|
||||||
|
top = essentialQueue.updateTop();
|
||||||
|
} while (top.doc == doc);
|
||||||
|
|
||||||
|
if (minCompetitiveScoreUpdated) {
|
||||||
|
minCompetitiveScoreUpdated = false;
|
||||||
|
if (partitionScorers()) {
|
||||||
|
top = essentialQueue.top();
|
||||||
|
} else {
|
||||||
|
windowMin = windowMax;
|
||||||
|
continue main;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
windowMin = windowMax;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nextCandidate(max);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int updateMaxWindowScores(int windowMin) throws IOException {
|
||||||
|
// Only use essential scorers to compute the window's max doc ID, in order to avoid constantly
|
||||||
|
// recomputing max scores over small windows
|
||||||
|
final int firstWindowLead = Math.min(firstEssentialScorer, allScorers.length - 1);
|
||||||
|
for (int i = 0; i < firstWindowLead; ++i) {
|
||||||
|
final DisiWrapper scorer = allScorers[i];
|
||||||
|
if (scorer.doc < windowMin) {
|
||||||
|
scorer.scorer.advanceShallow(windowMin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int windowMax = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
for (int i = firstWindowLead; i < allScorers.length; ++i) {
|
||||||
|
final DisiWrapper scorer = allScorers[i];
|
||||||
|
final int upTo = scorer.scorer.advanceShallow(Math.max(scorer.doc, windowMin));
|
||||||
|
windowMax = (int) Math.min(windowMax, upTo + 1L); // upTo is inclusive
|
||||||
|
}
|
||||||
|
for (DisiWrapper scorer : allScorers) {
|
||||||
|
if (scorer.doc < windowMax) {
|
||||||
|
scorer.maxWindowScore = scorer.scorer.getMaxScore(windowMax - 1);
|
||||||
|
} else {
|
||||||
|
scorer.maxWindowScore = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return windowMax;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean partitionScorers() {
|
||||||
|
Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxWindowScore));
|
||||||
|
double maxScoreSum = 0;
|
||||||
|
for (firstEssentialScorer = 0;
|
||||||
|
firstEssentialScorer < allScorers.length;
|
||||||
|
++firstEssentialScorer) {
|
||||||
|
maxScoreSum += allScorers[firstEssentialScorer].maxWindowScore;
|
||||||
|
maxScoreSums[firstEssentialScorer] = maxScoreSum;
|
||||||
|
float maxScoreSumFloat =
|
||||||
|
MaxScoreSumPropagator.scoreSumUpperBound(maxScoreSum, firstEssentialScorer + 1);
|
||||||
|
if (maxScoreSumFloat >= minCompetitiveScore) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (firstEssentialScorer == allScorers.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
essentialQueue.clear();
|
||||||
|
for (int i = firstEssentialScorer; i < allScorers.length; ++i) {
|
||||||
|
essentialQueue.add(allScorers[i]);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the next candidate on or after {@code rangeEnd}. */
|
||||||
|
private int nextCandidate(int rangeEnd) {
|
||||||
|
int next = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
for (DisiWrapper scorer : allScorers) {
|
||||||
|
if (scorer.doc < rangeEnd) {
|
||||||
|
return rangeEnd;
|
||||||
|
} else {
|
||||||
|
next = Math.min(next, scorer.doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ScoreAndDoc extends Scorable {
|
||||||
|
|
||||||
|
float score;
|
||||||
|
int doc = -1;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float score() {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setMinCompetitiveScore(float minScore) throws IOException {
|
||||||
|
MaxScoreBulkScorer.this.minCompetitiveScore = minScore;
|
||||||
|
maxScorePropagator.setMinCompetitiveScore(minScore);
|
||||||
|
minCompetitiveScoreUpdated = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -167,6 +167,10 @@ final class MaxScoreSumPropagator {
|
||||||
}
|
}
|
||||||
|
|
||||||
float scoreSumUpperBound(double sum) {
|
float scoreSumUpperBound(double sum) {
|
||||||
|
return scoreSumUpperBound(sum, numClauses);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float scoreSumUpperBound(double sum, int numClauses) {
|
||||||
if (numClauses <= 2) {
|
if (numClauses <= 2) {
|
||||||
// When there are only two clauses, the sum is always the same regardless
|
// When there are only two clauses, the sum is always the same regardless
|
||||||
// of the order.
|
// of the order.
|
||||||
|
|
|
@ -1,333 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.search;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.StringField;
|
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
|
||||||
|
|
||||||
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
|
||||||
public class TestBlockMaxMaxscoreScorer extends LuceneTestCase {
|
|
||||||
private void writeDocuments(Directory dir) throws IOException {
|
|
||||||
try (IndexWriter w =
|
|
||||||
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
|
||||||
|
|
||||||
for (String[] values :
|
|
||||||
Arrays.asList(
|
|
||||||
new String[] {"A", "B"}, // 0
|
|
||||||
new String[] {"A"}, // 1
|
|
||||||
new String[] {}, // 2
|
|
||||||
new String[] {"A", "B", "C"}, // 3
|
|
||||||
new String[] {"B"}, // 4
|
|
||||||
new String[] {"B", "C"} // 5
|
|
||||||
)) {
|
|
||||||
Document doc = new Document();
|
|
||||||
for (String value : values) {
|
|
||||||
doc.add(new StringField("foo", value, Field.Store.NO));
|
|
||||||
}
|
|
||||||
w.addDocument(doc);
|
|
||||||
}
|
|
||||||
w.forceMerge(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testBasicsWithTwoDisjunctionClauses() throws Exception {
|
|
||||||
try (Directory dir = newDirectory()) {
|
|
||||||
writeDocuments(dir);
|
|
||||||
|
|
||||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
|
|
||||||
Query query =
|
|
||||||
new BlockMaxMaxscoreQuery(
|
|
||||||
new BooleanQuery.Builder()
|
|
||||||
.add(
|
|
||||||
new BoostQuery(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.add(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.build());
|
|
||||||
|
|
||||||
Scorer scorer =
|
|
||||||
searcher
|
|
||||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
|
||||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
|
||||||
|
|
||||||
assertEquals(0, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(1, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(3, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(4, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(5, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testBasicsWithThreeDisjunctionClauses() throws Exception {
|
|
||||||
try (Directory dir = newDirectory()) {
|
|
||||||
writeDocuments(dir);
|
|
||||||
|
|
||||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
|
|
||||||
Query query =
|
|
||||||
new BlockMaxMaxscoreQuery(
|
|
||||||
new BooleanQuery.Builder()
|
|
||||||
.add(
|
|
||||||
new BoostQuery(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.add(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.add(
|
|
||||||
new BoostQuery(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.build());
|
|
||||||
|
|
||||||
Scorer scorer =
|
|
||||||
searcher
|
|
||||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
|
||||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
|
||||||
|
|
||||||
assertEquals(0, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(1, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(3, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1 + 3, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(4, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(5, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(1 + 3, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testBasicsWithFilteredDisjunction() throws Exception {
|
|
||||||
try (Directory dir = newDirectory()) {
|
|
||||||
writeDocuments(dir);
|
|
||||||
|
|
||||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
|
|
||||||
Query query =
|
|
||||||
new BooleanQuery.Builder()
|
|
||||||
.add(
|
|
||||||
new BlockMaxMaxscoreQuery(
|
|
||||||
new BooleanQuery.Builder()
|
|
||||||
.add(
|
|
||||||
new BoostQuery(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.add(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.build()),
|
|
||||||
BooleanClause.Occur.MUST)
|
|
||||||
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.FILTER)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
Scorer scorer =
|
|
||||||
searcher
|
|
||||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
|
||||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
|
||||||
|
|
||||||
assertEquals(3, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(5, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
|
||||||
|
|
||||||
scorer =
|
|
||||||
searcher
|
|
||||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
|
||||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
|
||||||
|
|
||||||
scorer.setMinCompetitiveScore(2);
|
|
||||||
|
|
||||||
assertEquals(3, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testBasicsWithExclusion() throws Exception {
|
|
||||||
try (Directory dir = newDirectory()) {
|
|
||||||
writeDocuments(dir);
|
|
||||||
|
|
||||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
|
|
||||||
Query query =
|
|
||||||
new BooleanQuery.Builder()
|
|
||||||
.add(
|
|
||||||
new BlockMaxMaxscoreQuery(
|
|
||||||
new BooleanQuery.Builder()
|
|
||||||
.add(
|
|
||||||
new BoostQuery(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.add(
|
|
||||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
|
||||||
BooleanClause.Occur.SHOULD)
|
|
||||||
.build()),
|
|
||||||
BooleanClause.Occur.MUST)
|
|
||||||
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.MUST_NOT)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
Scorer scorer =
|
|
||||||
searcher
|
|
||||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
|
||||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
|
||||||
|
|
||||||
assertEquals(0, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(1, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(4, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
|
||||||
|
|
||||||
scorer =
|
|
||||||
searcher
|
|
||||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
|
||||||
.scorer(searcher.getIndexReader().leaves().get(0));
|
|
||||||
|
|
||||||
scorer.setMinCompetitiveScore(3);
|
|
||||||
|
|
||||||
assertEquals(0, scorer.iterator().nextDoc());
|
|
||||||
assertEquals(2 + 1, scorer.score(), 0);
|
|
||||||
|
|
||||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class BlockMaxMaxscoreQuery extends Query {
|
|
||||||
private final BooleanQuery query;
|
|
||||||
|
|
||||||
private BlockMaxMaxscoreQuery(BooleanQuery query) {
|
|
||||||
assert query.isPureDisjunction()
|
|
||||||
: "This test utility query is only used to create BlockMaxMaxscoreScorer for disjunctions.";
|
|
||||||
assert query.clauses().size() >= 2
|
|
||||||
: "There must be at least two optional clauses to use this test utility query.";
|
|
||||||
this.query = query;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
|
|
||||||
throws IOException {
|
|
||||||
return new Weight(query) {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
|
||||||
// no-ops
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
|
||||||
BooleanWeight weight = (BooleanWeight) query.createWeight(searcher, scoreMode, boost);
|
|
||||||
List<Scorer> optionalScorers =
|
|
||||||
weight.weightedClauses.stream()
|
|
||||||
.map(wc -> wc.weight)
|
|
||||||
.map(
|
|
||||||
w -> {
|
|
||||||
try {
|
|
||||||
return w.scorerSupplier(context);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new AssertionError(e);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.map(
|
|
||||||
ss -> {
|
|
||||||
try {
|
|
||||||
return ss.get(Long.MAX_VALUE);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new AssertionError(e);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.toList();
|
|
||||||
|
|
||||||
return new BlockMaxMaxscoreScorer(weight, optionalScorers);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isCacheable(LeafReaderContext ctx) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString(String field) {
|
|
||||||
return "BlockMaxMaxscoreQuery";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void visit(QueryVisitor visitor) {
|
|
||||||
// no-ops
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object other) {
|
|
||||||
return sameClassAs(other) && query.equals(((BlockMaxMaxscoreQuery) other).query);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return 31 * classHash() + query.hashCode();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,325 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
|
||||||
|
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
||||||
|
public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||||
|
|
||||||
|
private void writeDocuments(Directory dir) throws IOException {
|
||||||
|
try (IndexWriter w =
|
||||||
|
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||||
|
|
||||||
|
for (String[] values :
|
||||||
|
Arrays.asList(
|
||||||
|
new String[] {"A", "B"}, // 0
|
||||||
|
new String[] {"A"}, // 1
|
||||||
|
new String[] {}, // 2
|
||||||
|
new String[] {"A", "B", "C"}, // 3
|
||||||
|
new String[] {"B"}, // 4
|
||||||
|
new String[] {"B", "C"} // 5
|
||||||
|
)) {
|
||||||
|
Document doc = new Document();
|
||||||
|
for (String value : values) {
|
||||||
|
doc.add(new StringField("foo", value, Field.Store.NO));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
w.forceMerge(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithTwoDisjunctionClauses() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
BulkScorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
scorer.score(
|
||||||
|
new LeafCollector() {
|
||||||
|
|
||||||
|
private int i;
|
||||||
|
private Scorable scorer;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
switch (i++) {
|
||||||
|
case 0:
|
||||||
|
assertEquals(0, doc);
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
assertEquals(1, doc);
|
||||||
|
assertEquals(2, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
assertEquals(3, doc);
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
assertEquals(4, doc);
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
assertEquals(5, doc);
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fail();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithTwoDisjunctionClausesAndSkipping() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
BulkScorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
scorer.score(
|
||||||
|
new LeafCollector() {
|
||||||
|
|
||||||
|
private int i;
|
||||||
|
private Scorable scorer;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
switch (i++) {
|
||||||
|
case 0:
|
||||||
|
assertEquals(0, doc);
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
assertEquals(1, doc);
|
||||||
|
assertEquals(2, scorer.score(), 0);
|
||||||
|
// simulate top-2 retrieval
|
||||||
|
scorer.setMinCompetitiveScore(Math.nextUp(2));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
assertEquals(3, doc);
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
scorer.setMinCompetitiveScore(Math.nextUp(2 + 1));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fail();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithThreeDisjunctionClauses() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
BulkScorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
scorer.score(
|
||||||
|
new LeafCollector() {
|
||||||
|
|
||||||
|
private int i;
|
||||||
|
private Scorable scorer;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
switch (i++) {
|
||||||
|
case 0:
|
||||||
|
assertEquals(0, doc);
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
assertEquals(1, doc);
|
||||||
|
assertEquals(2, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
assertEquals(3, doc);
|
||||||
|
assertEquals(2 + 1 + 3, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
assertEquals(4, doc);
|
||||||
|
assertEquals(1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
assertEquals(5, doc);
|
||||||
|
assertEquals(1 + 3, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fail();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBasicsWithThreeDisjunctionClausesAndSkipping() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
writeDocuments(dir);
|
||||||
|
|
||||||
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
Query query =
|
||||||
|
new BooleanQuery.Builder()
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.add(
|
||||||
|
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
||||||
|
BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
BulkScorer scorer =
|
||||||
|
searcher
|
||||||
|
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||||
|
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||||
|
|
||||||
|
scorer.score(
|
||||||
|
new LeafCollector() {
|
||||||
|
|
||||||
|
private int i;
|
||||||
|
private Scorable scorer;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setScorer(Scorable scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
switch (i++) {
|
||||||
|
case 0:
|
||||||
|
assertEquals(0, doc);
|
||||||
|
assertEquals(2 + 1, scorer.score(), 0);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
assertEquals(1, doc);
|
||||||
|
assertEquals(2, scorer.score(), 0);
|
||||||
|
// simulate top-2 retrieval
|
||||||
|
scorer.setMinCompetitiveScore(Math.nextUp(2));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
assertEquals(3, doc);
|
||||||
|
assertEquals(2 + 1 + 3, scorer.score(), 0);
|
||||||
|
scorer.setMinCompetitiveScore(Math.nextUp(2 + 1));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
assertEquals(5, doc);
|
||||||
|
assertEquals(1 + 3, scorer.score(), 0);
|
||||||
|
scorer.setMinCompetitiveScore(Math.nextUp(1 + 3));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fail();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue