Change the MAXSCORE scorer to a bulk scorer. (#12361)

This commit is contained in:
Adrien Grand 2023-06-20 18:55:03 +02:00 committed by GitHub
parent 37b92adf6a
commit 8703e449ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 558 additions and 681 deletions

View File

@ -128,8 +128,12 @@ Improvements
Optimizations
---------------------
* GITHUB#12377: Avoid redundant loop for compute min value in DirectMonotonicWriter. (Chao Zhang)
* GITHUB#12361: Faster top-level disjunctions sorted by descending score.
(Adrien Grand)
Bug Fixes
---------------------
(No changes)

View File

@ -1,318 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
/** Scorer implementing Block-Max Maxscore algorithm */
class BlockMaxMaxscoreScorer extends Scorer {
// current doc ID of the leads
private int doc;
// doc id boundary that all scorers maxScore are valid
private int upTo;
// heap of scorers ordered by doc ID
private final DisiPriorityQueue essentialsScorers;
// array of scorers ordered by maxScore
private final DisiWrapper[] allScorers;
// index of the first essential scorer in the `allScorers` array. All scorers before this index
// are non-essential. All scorers on and after this index are essential.
private int firstEssentialScorerIndex;
// sum of max scores of scorers in nonEssentialScorers list
private double nonEssentialMaxScoreSum;
private final long cost;
private final MaxScoreSumPropagator maxScoreSumPropagator;
private float minCompetitiveScore;
private double score;
/**
* Constructs a Scorer that scores doc based on Block-Max-Maxscore (BMM) algorithm
* http://engineering.nyu.edu/~suel/papers/bmm.pdf . This algorithm has lower overhead compared to
* WANDScorer, and could be used for simple disjunction queries.
*
* @param weight The weight to be used.
* @param scorers The sub scorers this Scorer should iterate on for optional clauses.
*/
public BlockMaxMaxscoreScorer(Weight weight, List<Scorer> scorers) throws IOException {
super(weight);
this.upTo = -1;
this.doc = -1;
this.minCompetitiveScore = 0;
this.allScorers = new DisiWrapper[scorers.size()];
this.essentialsScorers = new DisiPriorityQueue(scorers.size());
this.firstEssentialScorerIndex = 0;
long cost = 0;
for (int i = 0; i < scorers.size(); i++) {
DisiWrapper w = new DisiWrapper(scorers.get(i));
cost += w.cost;
allScorers[i] = w;
}
this.cost = cost;
maxScoreSumPropagator = new MaxScoreSumPropagator(scorers);
}
@Override
public DocIdSetIterator iterator() {
// twoPhaseIterator needed to honor scorer.setMinCompetitiveScore guarantee
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
}
@Override
public TwoPhaseIterator twoPhaseIterator() {
DocIdSetIterator approximation =
new DocIdSetIterator() {
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
while (true) {
if (target > upTo) {
updateMaxScoresAndLists(target);
} else {
// minCompetitiveScore might have increased,
// move potentially no-longer-competitive scorers from essential to non-essential
// list
movePotentiallyNonCompetitiveScorers();
}
assert target <= upTo;
DisiWrapper top = essentialsScorers.top();
if (top == null) {
// all scorers in non-essential list, skip to next boundary or return no_more_docs
if (upTo == NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
} else {
target = upTo + 1;
}
} else {
// position all scorers in essential list to on or after target
while (top.doc < target) {
top.doc = top.iterator.advance(target);
top = essentialsScorers.updateTop();
}
if (top.doc == NO_MORE_DOCS) {
return doc = NO_MORE_DOCS;
} else if (top.doc > upTo) {
target = upTo + 1;
} else {
return doc = top.doc;
}
}
}
}
private void movePotentiallyNonCompetitiveScorers() {
boolean removedEssentialScorer = false;
while (firstEssentialScorerIndex < allScorers.length
&& maxScoreSumPropagator.scoreSumUpperBound(
nonEssentialMaxScoreSum + allScorers[firstEssentialScorerIndex].maxScore)
< minCompetitiveScore) {
DisiWrapper nextLeastContributingScorer = allScorers[firstEssentialScorerIndex++];
nonEssentialMaxScoreSum += nextLeastContributingScorer.maxScore;
removedEssentialScorer = true;
}
// list adjusted
if (removedEssentialScorer) {
essentialsScorers.clear();
essentialsScorers.addAll(
allScorers,
firstEssentialScorerIndex,
allScorers.length - firstEssentialScorerIndex);
}
}
private void updateMaxScoresAndLists(int target) throws IOException {
assert target > upTo;
// Next candidate doc id is above interval boundary, or minCompetitive has increased.
// Find next interval boundary.
// Block boundary alignment strategy is adapted from "Optimizing Top-k Document
// Retrieval Strategies for Block-Max Indexes" by Dimopoulos, Nepomnyachiy and Suel.
// Find the block interval boundary by computing statistics (max, avg etc.) from all
// participating scorer's block boundary. Then run BMM within the boundary.
updateUpToAndMaxScore(target);
repartitionLists();
}
private void updateUpToAndMaxScore(int target) throws IOException {
// reset upTo
upTo = -1;
for (DisiWrapper w : allScorers) {
// using Math.max here is a good approach when there are only two clauses,
// but when this scorer is used for more than two clauses, we may need to
// consider other approaches such as avg, as the further out the boundary,
// the higher maxScore would be for a scorer, which makes skipping based on
// comparison with minCompetitiveScore harder / less effective.
upTo = Math.max(w.scorer.advanceShallow(Math.max(w.doc, target)), upTo);
}
assert target <= upTo;
for (DisiWrapper w : allScorers) {
// The assertion below will hold as long as upTo was computed using Math.max
// However, when the upTo computation method changes (to Math.avg etc),
// we may need to also handle the scenario where w.doc > upTo
assert w.doc <= upTo;
w.maxScore = w.scorer.getMaxScore(upTo);
}
}
private void repartitionLists() {
firstEssentialScorerIndex = 0;
Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxScore));
// Re-partition the scorers into non-essential list and essential list, as defined in
// the "Optimizing Top-k Document Retrieval Strategies for Block-Max Indexes" paper.
nonEssentialMaxScoreSum = 0;
for (DisiWrapper w : allScorers) {
if (maxScoreSumPropagator.scoreSumUpperBound(nonEssentialMaxScoreSum + w.maxScore)
>= minCompetitiveScore) {
break;
}
firstEssentialScorerIndex++;
nonEssentialMaxScoreSum += w.maxScore;
}
essentialsScorers.clear();
essentialsScorers.addAll(
allScorers,
firstEssentialScorerIndex,
allScorers.length - firstEssentialScorerIndex);
}
@Override
public long cost() {
// fixed at initialization
return cost;
}
};
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
// Start evaluating the score of the new document. It initially only includes essential
// clauses and abort / return early if a match is not possible.
// Scores of non-essential clauses get added later on to determine actual matches.
score = 0;
for (DisiWrapper w = essentialsScorers.topList(); w != null; w = w.next) {
score += w.scorer.score();
}
final double docScoreUpperBound = score + nonEssentialMaxScoreSum;
if (maxScoreSumPropagator.scoreSumUpperBound(docScoreUpperBound) < minCompetitiveScore) {
return false;
}
// Continue to add scores of non-essential scorers
for (int i = 0; i < firstEssentialScorerIndex; ++i) {
DisiWrapper w = allScorers[i];
if (w.doc < doc) {
w.doc = w.iterator.advance(doc);
}
if (w.doc == doc) {
score += allScorers[i].scorer.score();
}
}
return score() >= minCompetitiveScore;
}
@Override
public float matchCost() {
// over-estimate
return allScorers.length;
}
};
}
@Override
public int advanceShallow(int target) throws IOException {
// Propagate to improve score bounds
maxScoreSumPropagator.advanceShallow(target);
int result = DocIdSetIterator.NO_MORE_DOCS;
for (DisiWrapper s : allScorers) {
if (s.doc < target) {
result = Math.min(result, s.scorer.advanceShallow(target));
}
}
return result;
}
@Override
public float getMaxScore(int upTo) throws IOException {
return maxScoreSumPropagator.getMaxScore(upTo);
}
@Override
public float score() throws IOException {
return (float) score;
}
@Override
public int docID() {
return doc;
}
@Override
public final Collection<ChildScorable> getChildren() {
List<ChildScorable> matchingChildren = new ArrayList<>();
for (DisiWrapper s : allScorers) {
if (s.doc == doc) {
matchingChildren.add(new ChildScorable(s.scorer, "SHOULD"));
}
}
return matchingChildren;
}
@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
assert minScore >= 0;
minCompetitiveScore = minScore;
maxScoreSumPropagator.setMinCompetitiveScore(minScore);
}
}

View File

@ -218,34 +218,7 @@ final class BooleanWeight extends Weight {
optionalScorers.add(ss.get(Long.MAX_VALUE));
}
return new BulkScorer() {
final Scorer bmmScorer = new BlockMaxMaxscoreScorer(BooleanWeight.this, optionalScorers);
final DocIdSetIterator iterator = bmmScorer.iterator();
@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
throws IOException {
collector.setScorer(bmmScorer);
int doc = bmmScorer.docID();
if (doc < min) {
doc = iterator.advance(min);
}
while (doc < max) {
if (acceptDocs == null || acceptDocs.get(doc)) {
collector.collect(doc);
}
doc = iterator.nextDoc();
}
return doc;
}
@Override
public long cost() {
return iterator.cost();
}
};
return new MaxScoreBulkScorer(optionalScorers);
}
List<BulkScorer> optional = new ArrayList<BulkScorer>();

View File

@ -39,8 +39,8 @@ public class DisiWrapper {
// For WANDScorer
long scaledMaxScore;
// For BlockMaxMaxscoreScorer
float maxScore;
// for MaxScoreBulkScorer
float maxWindowScore;
public DisiWrapper(Scorer scorer) {
this.scorer = scorer;

View File

@ -0,0 +1,222 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.Bits;
final class MaxScoreBulkScorer extends BulkScorer {
// All scorers, sorted by increasing max score.
private final DisiWrapper[] allScorers;
// These are the last scorers from `allScorers` that are "essential", ie. required for a match to
// have a competitive score.
private final DisiPriorityQueue essentialQueue;
// Index of the first essential scorer, ie. essentialQueue contains all scorers from
// allScorers[firstEssentialScorer:]. All scorers below this index are non-essential.
private int firstEssentialScorer;
private final MaxScoreSumPropagator maxScorePropagator;
private final long cost;
private float minCompetitiveScore;
private boolean minCompetitiveScoreUpdated;
private ScoreAndDoc scorable = new ScoreAndDoc();
private final double[] maxScoreSums;
MaxScoreBulkScorer(List<Scorer> scorers) throws IOException {
allScorers = new DisiWrapper[scorers.size()];
int i = 0;
long cost = 0;
for (Scorer scorer : scorers) {
DisiWrapper w = new DisiWrapper(scorer);
cost += w.cost;
allScorers[i++] = w;
}
this.cost = cost;
maxScorePropagator = new MaxScoreSumPropagator(scorers);
essentialQueue = new DisiPriorityQueue(allScorers.length);
maxScoreSums = new double[allScorers.length];
}
@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
collector.setScorer(scorable);
int windowMin = min;
main:
while (windowMin < max) {
int windowMax = updateMaxWindowScores(windowMin);
windowMax = Math.min(windowMax, max);
if (partitionScorers() == false) {
// No matches in this window
windowMin = windowMax;
continue;
}
DisiWrapper top = essentialQueue.top();
while (top.doc < windowMin) {
top.doc = top.iterator.advance(windowMin);
top = essentialQueue.updateTop();
}
while (top.doc < windowMax) {
if (acceptDocs == null || acceptDocs.get(top.doc)) {
DisiWrapper topList = essentialQueue.topList();
double score = topList.scorer.score();
for (DisiWrapper w = topList.next; w != null; w = w.next) {
score += w.scorer.score();
}
boolean possibleMatch = true;
for (int i = firstEssentialScorer - 1; i >= 0; --i) {
float maxPossibleScore = maxScorePropagator.scoreSumUpperBound(score + maxScoreSums[i]);
if (maxPossibleScore < minCompetitiveScore) {
possibleMatch = false;
break;
}
DisiWrapper scorer = allScorers[i];
if (scorer.doc < top.doc) {
scorer.doc = scorer.iterator.advance(top.doc);
}
if (scorer.doc == top.doc) {
score += scorer.scorer.score();
}
}
if (possibleMatch) {
scorable.doc = top.doc;
scorable.score = (float) score;
collector.collect(top.doc);
}
}
int doc = top.doc;
do {
top.doc = top.iterator.nextDoc();
top = essentialQueue.updateTop();
} while (top.doc == doc);
if (minCompetitiveScoreUpdated) {
minCompetitiveScoreUpdated = false;
if (partitionScorers()) {
top = essentialQueue.top();
} else {
windowMin = windowMax;
continue main;
}
}
}
windowMin = windowMax;
}
return nextCandidate(max);
}
private int updateMaxWindowScores(int windowMin) throws IOException {
// Only use essential scorers to compute the window's max doc ID, in order to avoid constantly
// recomputing max scores over small windows
final int firstWindowLead = Math.min(firstEssentialScorer, allScorers.length - 1);
for (int i = 0; i < firstWindowLead; ++i) {
final DisiWrapper scorer = allScorers[i];
if (scorer.doc < windowMin) {
scorer.scorer.advanceShallow(windowMin);
}
}
int windowMax = DocIdSetIterator.NO_MORE_DOCS;
for (int i = firstWindowLead; i < allScorers.length; ++i) {
final DisiWrapper scorer = allScorers[i];
final int upTo = scorer.scorer.advanceShallow(Math.max(scorer.doc, windowMin));
windowMax = (int) Math.min(windowMax, upTo + 1L); // upTo is inclusive
}
for (DisiWrapper scorer : allScorers) {
if (scorer.doc < windowMax) {
scorer.maxWindowScore = scorer.scorer.getMaxScore(windowMax - 1);
} else {
scorer.maxWindowScore = 0;
}
}
return windowMax;
}
private boolean partitionScorers() {
Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxWindowScore));
double maxScoreSum = 0;
for (firstEssentialScorer = 0;
firstEssentialScorer < allScorers.length;
++firstEssentialScorer) {
maxScoreSum += allScorers[firstEssentialScorer].maxWindowScore;
maxScoreSums[firstEssentialScorer] = maxScoreSum;
float maxScoreSumFloat =
MaxScoreSumPropagator.scoreSumUpperBound(maxScoreSum, firstEssentialScorer + 1);
if (maxScoreSumFloat >= minCompetitiveScore) {
break;
}
}
if (firstEssentialScorer == allScorers.length) {
return false;
}
essentialQueue.clear();
for (int i = firstEssentialScorer; i < allScorers.length; ++i) {
essentialQueue.add(allScorers[i]);
}
return true;
}
/** Return the next candidate on or after {@code rangeEnd}. */
private int nextCandidate(int rangeEnd) {
int next = DocIdSetIterator.NO_MORE_DOCS;
for (DisiWrapper scorer : allScorers) {
if (scorer.doc < rangeEnd) {
return rangeEnd;
} else {
next = Math.min(next, scorer.doc);
}
}
return next;
}
@Override
public long cost() {
return cost;
}
private class ScoreAndDoc extends Scorable {
float score;
int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public float score() {
return score;
}
@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
MaxScoreBulkScorer.this.minCompetitiveScore = minScore;
maxScorePropagator.setMinCompetitiveScore(minScore);
minCompetitiveScoreUpdated = true;
}
}
}

View File

@ -167,6 +167,10 @@ final class MaxScoreSumPropagator {
}
float scoreSumUpperBound(double sum) {
return scoreSumUpperBound(sum, numClauses);
}
static float scoreSumUpperBound(double sum, int numClauses) {
if (numClauses <= 2) {
// When there are only two clauses, the sum is always the same regardless
// of the order.

View File

@ -1,333 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestBlockMaxMaxscoreScorer extends LuceneTestCase {
private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
for (String[] values :
Arrays.asList(
new String[] {"A", "B"}, // 0
new String[] {"A"}, // 1
new String[] {}, // 2
new String[] {"A", "B", "C"}, // 3
new String[] {"B"}, // 4
new String[] {"B", "C"} // 5
)) {
Document doc = new Document();
for (String value : values) {
doc.add(new StringField("foo", value, Field.Store.NO));
}
w.addDocument(doc);
}
w.forceMerge(1);
}
}
public void testBasicsWithTwoDisjunctionClauses() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BlockMaxMaxscoreQuery(
new BooleanQuery.Builder()
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.build());
Scorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(2 + 1, scorer.score(), 0);
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(2 + 1, scorer.score(), 0);
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(5, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
public void testBasicsWithThreeDisjunctionClauses() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BlockMaxMaxscoreQuery(
new BooleanQuery.Builder()
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
BooleanClause.Occur.SHOULD)
.build());
Scorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(2 + 1, scorer.score(), 0);
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(2 + 1 + 3, scorer.score(), 0);
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(5, scorer.iterator().nextDoc());
assertEquals(1 + 3, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
public void testBasicsWithFilteredDisjunction() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BlockMaxMaxscoreQuery(
new BooleanQuery.Builder()
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.build()),
BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.FILTER)
.build();
Scorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(2 + 1, scorer.score(), 0);
assertEquals(5, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
scorer.setMinCompetitiveScore(2);
assertEquals(3, scorer.iterator().nextDoc());
assertEquals(2 + 1, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
public void testBasicsWithExclusion() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BlockMaxMaxscoreQuery(
new BooleanQuery.Builder()
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.build()),
BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("foo", "C")), BooleanClause.Occur.MUST_NOT)
.build();
Scorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(2 + 1, scorer.score(), 0);
assertEquals(1, scorer.iterator().nextDoc());
assertEquals(2, scorer.score(), 0);
assertEquals(4, scorer.iterator().nextDoc());
assertEquals(1, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.scorer(searcher.getIndexReader().leaves().get(0));
scorer.setMinCompetitiveScore(3);
assertEquals(0, scorer.iterator().nextDoc());
assertEquals(2 + 1, scorer.score(), 0);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
}
}
}
private static class BlockMaxMaxscoreQuery extends Query {
private final BooleanQuery query;
private BlockMaxMaxscoreQuery(BooleanQuery query) {
assert query.isPureDisjunction()
: "This test utility query is only used to create BlockMaxMaxscoreScorer for disjunctions.";
assert query.clauses().size() >= 2
: "There must be at least two optional clauses to use this test utility query.";
this.query = query;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new Weight(query) {
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
// no-ops
return null;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
BooleanWeight weight = (BooleanWeight) query.createWeight(searcher, scoreMode, boost);
List<Scorer> optionalScorers =
weight.weightedClauses.stream()
.map(wc -> wc.weight)
.map(
w -> {
try {
return w.scorerSupplier(context);
} catch (IOException e) {
throw new AssertionError(e);
}
})
.map(
ss -> {
try {
return ss.get(Long.MAX_VALUE);
} catch (IOException e) {
throw new AssertionError(e);
}
})
.toList();
return new BlockMaxMaxscoreScorer(weight, optionalScorers);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}
@Override
public String toString(String field) {
return "BlockMaxMaxscoreQuery";
}
@Override
public void visit(QueryVisitor visitor) {
// no-ops
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && query.equals(((BlockMaxMaxscoreQuery) other).query);
}
@Override
public int hashCode() {
return 31 * classHash() + query.hashCode();
}
}
}

View File

@ -0,0 +1,325 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestMaxScoreBulkScorer extends LuceneTestCase {
private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
for (String[] values :
Arrays.asList(
new String[] {"A", "B"}, // 0
new String[] {"A"}, // 1
new String[] {}, // 2
new String[] {"A", "B", "C"}, // 3
new String[] {"B"}, // 4
new String[] {"B", "C"} // 5
)) {
Document doc = new Document();
for (String value : values) {
doc.add(new StringField("foo", value, Field.Store.NO));
}
w.addDocument(doc);
}
w.forceMerge(1);
}
}
public void testBasicsWithTwoDisjunctionClauses() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.build();
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
scorer.score(
new LeafCollector() {
private int i;
private Scorable scorer;
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
switch (i++) {
case 0:
assertEquals(0, doc);
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(1, doc);
assertEquals(2, scorer.score(), 0);
break;
case 2:
assertEquals(3, doc);
assertEquals(2 + 1, scorer.score(), 0);
break;
case 3:
assertEquals(4, doc);
assertEquals(1, scorer.score(), 0);
break;
case 4:
assertEquals(5, doc);
assertEquals(1, scorer.score(), 0);
break;
default:
fail();
break;
}
}
},
null);
}
}
}
public void testBasicsWithTwoDisjunctionClausesAndSkipping() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.build();
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
scorer.score(
new LeafCollector() {
private int i;
private Scorable scorer;
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
switch (i++) {
case 0:
assertEquals(0, doc);
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(1, doc);
assertEquals(2, scorer.score(), 0);
// simulate top-2 retrieval
scorer.setMinCompetitiveScore(Math.nextUp(2));
break;
case 2:
assertEquals(3, doc);
assertEquals(2 + 1, scorer.score(), 0);
scorer.setMinCompetitiveScore(Math.nextUp(2 + 1));
break;
default:
fail();
break;
}
}
},
null);
}
}
}
public void testBasicsWithThreeDisjunctionClauses() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
BooleanClause.Occur.SHOULD)
.build();
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
scorer.score(
new LeafCollector() {
private int i;
private Scorable scorer;
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
switch (i++) {
case 0:
assertEquals(0, doc);
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(1, doc);
assertEquals(2, scorer.score(), 0);
break;
case 2:
assertEquals(3, doc);
assertEquals(2 + 1 + 3, scorer.score(), 0);
break;
case 3:
assertEquals(4, doc);
assertEquals(1, scorer.score(), 0);
break;
case 4:
assertEquals(5, doc);
assertEquals(1 + 3, scorer.score(), 0);
break;
default:
fail();
break;
}
}
},
null);
}
}
}
public void testBasicsWithThreeDisjunctionClausesAndSkipping() throws Exception {
try (Directory dir = newDirectory()) {
writeDocuments(dir);
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
BooleanClause.Occur.SHOULD)
.build();
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
scorer.score(
new LeafCollector() {
private int i;
private Scorable scorer;
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
switch (i++) {
case 0:
assertEquals(0, doc);
assertEquals(2 + 1, scorer.score(), 0);
break;
case 1:
assertEquals(1, doc);
assertEquals(2, scorer.score(), 0);
// simulate top-2 retrieval
scorer.setMinCompetitiveScore(Math.nextUp(2));
break;
case 2:
assertEquals(3, doc);
assertEquals(2 + 1 + 3, scorer.score(), 0);
scorer.setMinCompetitiveScore(Math.nextUp(2 + 1));
break;
case 3:
assertEquals(5, doc);
assertEquals(1 + 3, scorer.score(), 0);
scorer.setMinCompetitiveScore(Math.nextUp(1 + 3));
break;
default:
fail();
break;
}
}
},
null);
}
}
}
}