Sometimes intersect the essential clause and the best non-essential clause. (#12589)

The idea behind MAXSCORE is to run disjunctions as `+(essentialClause1 ...
essentialClauseM) nonEssentialClause1 ... nonEssentialClauseN`, moving more and
more clauses from the essential list to the non-essential list as the minimum
competitive score increases. For instance, a query such as `the book of life`
which I found in the Tantivy benchmark ends up running as `+book the of life`
after some time, ie. with one required clause and other clauses optional. This
is because matching `the`, `of` and `life` alone is not good enough for
yielding a match.

Here some statistics in that case:
 - min competitive score: 3.4781857
 - max_window_score(book): 2.8796153
 - max_window_score(life): 2.037863
 - max_window_score(the): 0.103848875
 - max_window_score(of): 0.19427927

Actually if you look at these statistics, we could do better, because a match
may only be competitive if it matches both `book` and `life`, so this query
could actually execute as `+book +life the of`, which may help evaluate fewer
documents compared to `+book the of life`. Especially if you enable recursive
graph bisection.

This is what this PR tries to achieve: in the event when there is a single
essential clause and matching all clauses but the best non-essential clause
cannot produce a competitive match, then the scorer will only evaluate
documents that match the intersection of the essential clause and the best
non-essential clause.

It's worth noting that this optimization would kick in very frequently on
2-clauses disjunctions.
This commit is contained in:
Adrien Grand 2023-10-24 17:54:23 +02:00 committed by GitHub
parent 5bca0aa391
commit 611bbbd951
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 534 additions and 99 deletions

View File

@ -101,6 +101,9 @@ Optimizations
* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)
* GITHUB#12589: Disjunctions now sometimes run as conjunctions when the minimum
competitive score requires multiple clauses to match. (Adrien Grand)
Bug Fixes
---------------------

View File

@ -30,19 +30,21 @@ final class MaxScoreBulkScorer extends BulkScorer {
private final int maxDoc;
// All scorers, sorted by increasing max score.
private final DisiWrapper[] allScorers;
final DisiWrapper[] allScorers;
private final DisiWrapper[] scratch;
// These are the last scorers from `allScorers` that are "essential", ie. required for a match to
// have a competitive score.
private final DisiPriorityQueue essentialQueue;
// Index of the first essential scorer, ie. essentialQueue contains all scorers from
// allScorers[firstEssentialScorer:]. All scorers below this index are non-essential.
private int firstEssentialScorer;
int firstEssentialScorer;
// Index of the first scorer that is required, this scorer and all following scorers are required
// for a document to match.
int firstRequiredScorer;
private final long cost;
private float minCompetitiveScore;
private boolean minCompetitiveScoreUpdated;
float minCompetitiveScore;
private Score scorable = new Score();
private final double[] maxScoreSums;
final double[] maxScoreSums;
private final long[] windowMatches = new long[FixedBitSet.bits2words(INNER_WINDOW_SIZE)];
private final double[] windowScores = new double[INNER_WINDOW_SIZE];
@ -113,25 +115,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
while (top.doc < outerWindowMax) {
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
top = essentialQueue.top();
if (minCompetitiveScoreUpdated) {
minCompetitiveScoreUpdated = false;
if (partitionScorers() == false) {
outerWindowMin = outerWindowMax;
continue outer;
} else {
// Partitioning may have swapped essential and non-essential scorers, and some of the
// non-essential scorers may be behind the last scored doc. So let's advance to the next
// candidate match.
final int nextCandidateMatch = top.doc;
top = essentialQueue.top();
while (top.doc < nextCandidateMatch) {
top.doc = top.iterator.advance(nextCandidateMatch);
top = essentialQueue.updateTop();
}
}
}
}
outerWindowMin = outerWindowMax;
}
@ -140,17 +125,20 @@ final class MaxScoreBulkScorer extends BulkScorer {
private void scoreInnerWindow(LeafCollector collector, Bits acceptDocs, int max)
throws IOException {
DisiWrapper top = essentialQueue.top();
DisiWrapper top2 = essentialQueue.top2();
if (top2 == null) {
scoreInnerWindowSingleEssentialClause(collector, acceptDocs, max);
} else if (top2.doc - INNER_WINDOW_SIZE / 2 >= top.doc) {
// The first half of the window would match a single clause. Let's collect this single clause
// until the next doc ID of the next clause.
scoreInnerWindowSingleEssentialClause(collector, acceptDocs, Math.min(max, top2.doc));
if (allScorers.length - firstRequiredScorer >= 2) {
scoreInnerWindowAsConjunction(collector, acceptDocs, max);
} else {
scoreInnerWindowMultipleEssentialClauses(collector, acceptDocs, max);
DisiWrapper top = essentialQueue.top();
DisiWrapper top2 = essentialQueue.top2();
if (top2 == null) {
scoreInnerWindowSingleEssentialClause(collector, acceptDocs, max);
} else if (top2.doc - INNER_WINDOW_SIZE / 2 >= top.doc) {
// The first half of the window would match a single clause. Let's collect this single
// clause until the next doc ID of the next clause.
scoreInnerWindowSingleEssentialClause(collector, acceptDocs, Math.min(max, top2.doc));
} else {
scoreInnerWindowMultipleEssentialClauses(collector, acceptDocs, max);
}
}
}
@ -164,17 +152,79 @@ final class MaxScoreBulkScorer extends BulkScorer {
if (acceptDocs != null && acceptDocs.get(doc) == false) {
continue;
}
scoreNonEssentialClauses(collector, doc, top.scorer.score());
if (minCompetitiveScoreUpdated) {
// force scorers to be partitioned again before collecting more hits
top.iterator.nextDoc();
break;
}
scoreNonEssentialClauses(collector, doc, top.scorer.score(), firstEssentialScorer);
}
top.doc = top.iterator.docID();
essentialQueue.updateTop();
}
private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptDocs, int max)
throws IOException {
assert firstEssentialScorer == allScorers.length - 1;
assert firstRequiredScorer <= allScorers.length - 2;
DisiWrapper lead1 = allScorers[allScorers.length - 1];
assert essentialQueue.size() == 1;
assert lead1 == essentialQueue.top();
DisiWrapper lead2 = allScorers[allScorers.length - 2];
if (lead1.doc < lead2.doc) {
lead1.doc = lead1.iterator.advance(Math.min(lead2.doc, max));
}
// maximum score contribution of all scorers but the lead
double maxScoreSumAtLead2 = maxScoreSums[allScorers.length - 2];
outer:
while (lead1.doc < max) {
if (acceptDocs != null && acceptDocs.get(lead1.doc) == false) {
lead1.doc = lead1.iterator.nextDoc();
continue;
}
double score = lead1.scorer.score();
// We specialize handling the second best scorer, which seems to help a bit with performance.
// But this is the exact same logic as in the below for loop.
if ((float) MathUtil.sumUpperBound(score + maxScoreSumAtLead2, allScorers.length)
< minCompetitiveScore) {
// a competitive match is not possible according to max scores, skip to the next candidate
lead1.doc = lead1.iterator.nextDoc();
continue;
}
if (lead2.doc < lead1.doc) {
lead2.doc = lead2.iterator.advance(lead1.doc);
}
if (lead2.doc != lead1.doc) {
lead1.doc = lead1.iterator.advance(Math.min(lead2.doc, max));
continue;
}
score += lead2.scorer.score();
for (int i = allScorers.length - 3; i >= firstRequiredScorer; --i) {
if ((float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length)
< minCompetitiveScore) {
// a competitive match is not possible according to max scores, skip to the next candidate
lead1.doc = lead1.iterator.nextDoc();
continue outer;
}
DisiWrapper w = allScorers[i];
if (w.doc < lead1.doc) {
w.doc = w.iterator.advance(lead1.doc);
}
if (w.doc != lead1.doc) {
lead1.doc = lead1.iterator.advance(Math.min(w.doc, max));
continue outer;
}
score += w.scorer.score();
}
scoreNonEssentialClauses(collector, lead1.doc, score, firstRequiredScorer);
lead1.doc = lead1.iterator.nextDoc();
}
}
private void scoreInnerWindowMultipleEssentialClauses(
LeafCollector collector, Bits acceptDocs, int max) throws IOException {
DisiWrapper top = essentialQueue.top();
@ -206,7 +256,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
double score = windowScores[index];
windowScores[index] = 0d;
scoreNonEssentialClauses(collector, doc, score);
scoreNonEssentialClauses(collector, doc, score, firstEssentialScorer);
}
}
}
@ -230,7 +280,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
return windowMax;
}
private void updateMaxWindowScores(int windowMin, int windowMax) throws IOException {
void updateMaxWindowScores(int windowMin, int windowMax) throws IOException {
for (DisiWrapper scorer : allScorers) {
if (scorer.doc < windowMax) {
if (scorer.doc < windowMin) {
@ -246,10 +296,11 @@ final class MaxScoreBulkScorer extends BulkScorer {
}
}
private void scoreNonEssentialClauses(LeafCollector collector, int doc, double essentialScore)
private void scoreNonEssentialClauses(
LeafCollector collector, int doc, double essentialScore, int numNonEssentialClauses)
throws IOException {
double score = essentialScore;
for (int i = firstEssentialScorer - 1; i >= 0; --i) {
for (int i = numNonEssentialClauses - 1; i >= 0; --i) {
float maxPossibleScore =
(float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length);
if (maxPossibleScore < minCompetitiveScore) {
@ -270,7 +321,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
collector.collect(doc);
}
private boolean partitionScorers() {
boolean partitionScorers() {
// Partitioning scorers is an optimization problem: the optimal set of non-essential scorers is
// the subset of scorers whose sum of max window scores is less than the minimum competitive
// score that maximizes the sum of costs.
@ -303,6 +354,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
}
}
firstRequiredScorer = allScorers.length;
if (firstEssentialScorer == allScorers.length) {
return false;
}
@ -311,6 +364,33 @@ final class MaxScoreBulkScorer extends BulkScorer {
for (int i = firstEssentialScorer; i < allScorers.length; ++i) {
essentialQueue.add(allScorers[i]);
}
if (firstEssentialScorer == allScorers.length - 1) { // single essential clause
// If there is a single essential clause and matching it plus all non-essential clauses but
// the best one is not enough to yield a competitive match, the we know that hits must match
// both the essential clause and the best non-essential clause. Here are some examples when
// this optimization would kick in:
// `quick fox` when maxscore(quick) = 1, maxscore(fox) = 1, minCompetitiveScore = 1.5
// `the quick fox` when maxscore (the) = 0.1, maxscore(quick) = 1, maxscore(fox) = 1,
// minCompetitiveScore = 1.5
firstRequiredScorer = allScorers.length - 1;
double maxRequiredScore = allScorers[firstEssentialScorer].maxWindowScore;
while (firstRequiredScorer > 0) {
double maxPossibleScoreWithoutPreviousClause = maxRequiredScore;
if (firstRequiredScorer > 1) {
maxPossibleScoreWithoutPreviousClause += maxScoreSums[firstRequiredScorer - 2];
}
if ((float) maxPossibleScoreWithoutPreviousClause >= minCompetitiveScore) {
break;
}
// The sum of maximum scores ignoring the previous clause is less than the minimum
// competitive
--firstRequiredScorer;
maxRequiredScore += allScorers[firstRequiredScorer].maxWindowScore;
}
}
return true;
}
@ -348,7 +428,6 @@ final class MaxScoreBulkScorer extends BulkScorer {
@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
MaxScoreBulkScorer.this.minCompetitiveScore = minScore;
minCompetitiveScoreUpdated = true;
}
}
}

View File

@ -18,19 +18,43 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.Bits;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestMaxScoreBulkScorer extends LuceneTestCase {
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
super(in);
}
@Override
public int advanceShallow(int target) throws IOException {
return Math.min(target | 0x7FF, in.advanceShallow(target));
}
@Override
public float getMaxScore(int upTo) throws IOException {
return in.getMaxScore(upTo);
}
}
private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
@ -64,20 +88,23 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.build();
Query clause1 =
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
Scorer scorer1 =
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
scorer.score(
new LeafCollector() {
@ -131,20 +158,23 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.build();
Query clause1 =
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
Scorer scorer1 =
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
scorer.score(
new LeafCollector() {
@ -193,23 +223,31 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
BooleanClause.Occur.SHOULD)
.build();
Query clause1 =
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
Query clause3 =
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3);
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
Scorer scorer1 =
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
new MaxScoreBulkScorer(
context.reader().maxDoc(), Arrays.asList(scorer1, scorer2, scorer3));
scorer.score(
new LeafCollector() {
@ -263,23 +301,31 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
BooleanClause.Occur.SHOULD)
.add(
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
BooleanClause.Occur.SHOULD)
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
BooleanClause.Occur.SHOULD)
.build();
Query clause1 =
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
Query clause3 =
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3);
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
Scorer scorer1 =
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
searcher
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
.bulkScorer(searcher.getIndexReader().leaves().get(0));
new MaxScoreBulkScorer(
context.reader().maxDoc(), Arrays.asList(scorer1, scorer2, scorer3));
scorer.score(
new LeafCollector() {
@ -325,4 +371,311 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
}
}
}
private static class FakeWeight extends Weight {
protected FakeWeight() {
super(null);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}
private static class FakeScorer extends Scorer {
final String toString;
int docID = -1;
int maxScoreUpTo = DocIdSetIterator.NO_MORE_DOCS;
float maxScore = 1f;
int cost = 10;
protected FakeScorer(String toString) {
super(new FakeWeight());
this.toString = toString;
}
@Override
public int docID() {
return docID;
}
@Override
public DocIdSetIterator iterator() {
return DocIdSetIterator.all(cost); // just so that it exposes the right cost
}
@Override
public int advanceShallow(int target) throws IOException {
return maxScoreUpTo;
}
@Override
public float getMaxScore(int upTo) throws IOException {
return maxScore;
}
@Override
public float score() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public String toString() {
return toString;
}
}
public void testDeletes() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(newLogMergePolicy());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc1 = new Document();
doc1.add(new StringField("field", "foo", Store.NO));
doc1.add(new StringField("field", "bar", Store.NO));
doc1.add(new StringField("field", "quux", Store.NO));
Document doc2 = new Document();
Document doc3 = new Document();
for (IndexableField field : doc1) {
doc2.add(field);
doc3.add(field);
}
doc1.add(new StringField("id", "1", Store.NO));
doc2.add(new StringField("id", "2", Store.NO));
doc3.add(new StringField("id", "3", Store.NO));
w.addDocument(doc1);
w.addDocument(doc2);
w.addDocument(doc3);
w.forceMerge(1);
IndexReader reader = DirectoryReader.open(w);
w.close();
Query query =
new BooleanQuery.Builder()
.add(
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), 1f),
Occur.SHOULD)
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("field", "bar"))), 1.5f),
Occur.SHOULD)
.add(
new BoostQuery(
new ConstantScoreQuery(new TermQuery(new Term("field", "quux"))), 0.1f),
Occur.SHOULD)
.build();
IndexSearcher searcher = newSearcher(reader);
Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1f);
Bits liveDocs =
new Bits() {
@Override
public boolean get(int index) {
return index == 1;
}
@Override
public int length() {
return 3;
}
};
// Test min competitive scores that exercise different execution modes
for (float minCompetitiveScore :
new float[] {
0f, // 3 essential clauses
1f, // 2 essential clauses
1.2f, // 1 essential clause
2f // two required clauses
}) {
BulkScorer scorer = weight.bulkScorer(searcher.getIndexReader().leaves().get(0));
LeafCollector collector =
new LeafCollector() {
int i = 0;
@Override
public void setScorer(Scorable scorer) throws IOException {
scorer.setMinCompetitiveScore(minCompetitiveScore);
}
@Override
public void collect(int doc) throws IOException {
assertEquals(1, doc);
assertEquals(0, i++);
}
@Override
public void finish() throws IOException {
assertEquals(1, i);
}
};
scorer.score(collector, liveDocs);
collector.finish();
}
reader.close();
dir.close();
}
// This test simulates what happens over time for the query `the quick fox` as collection
// progresses and the minimum competitive score increases.
public void testPartition() throws IOException {
FakeScorer the = new FakeScorer("the");
the.cost = 9_000;
the.maxScore = 0.1f;
FakeScorer quick = new FakeScorer("quick");
quick.cost = 1_000;
quick.maxScore = 1f;
FakeScorer fox = new FakeScorer("fox");
fox.cost = 900;
fox.maxScore = 1.1f;
MaxScoreBulkScorer scorer = new MaxScoreBulkScorer(10_000, Arrays.asList(the, quick, fox));
the.docID = 4;
the.maxScoreUpTo = 130;
quick.docID = 4;
quick.maxScoreUpTo = 999;
fox.docID = 10;
fox.maxScoreUpTo = 1_200;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(0, scorer.firstEssentialScorer); // all clauses are essential
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
// less than the minimum score of every clause
scorer.minCompetitiveScore = 0.09f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(0, scorer.firstEssentialScorer); // all clauses are still essential
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
// equal to the maximum score of `the`
scorer.minCompetitiveScore = 0.1f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(0, scorer.firstEssentialScorer); // all clauses are still essential
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
// gt than the minimum score of `the`
scorer.minCompetitiveScore = 0.11f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(1, scorer.firstEssentialScorer); // the is non essential
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
assertSame(the, scorer.allScorers[0].scorer);
// equal to the sum of the max scores of the and quick
scorer.minCompetitiveScore = 1.1f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(1, scorer.firstEssentialScorer); // the is non essential
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
assertSame(the, scorer.allScorers[0].scorer);
// greater than the sum of the max scores of the and quick
scorer.minCompetitiveScore = 1.11f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
assertEquals(2, scorer.firstRequiredScorer); // fox is required
assertSame(the, scorer.allScorers[0].scorer);
assertSame(quick, scorer.allScorers[1].scorer);
assertSame(fox, scorer.allScorers[2].scorer);
// equal to the sum of the max scores of the and fox
scorer.minCompetitiveScore = 1.2f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
assertEquals(2, scorer.firstRequiredScorer); // fox is required
assertSame(the, scorer.allScorers[0].scorer);
assertSame(quick, scorer.allScorers[1].scorer);
assertSame(fox, scorer.allScorers[2].scorer);
// greater than the sum of the max scores of the and fox
scorer.minCompetitiveScore = 1.21f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
assertEquals(1, scorer.firstRequiredScorer); // quick and fox are required
assertSame(the, scorer.allScorers[0].scorer);
assertSame(quick, scorer.allScorers[1].scorer);
assertSame(fox, scorer.allScorers[2].scorer);
// equal to the sum of the max scores of quick and fox
scorer.minCompetitiveScore = 2.1f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
assertEquals(1, scorer.firstRequiredScorer); // quick and fox are required
assertSame(the, scorer.allScorers[0].scorer);
assertSame(quick, scorer.allScorers[1].scorer);
assertSame(fox, scorer.allScorers[2].scorer);
// greater than the sum of the max scores of quick and fox
scorer.minCompetitiveScore = 2.11f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
assertEquals(0, scorer.firstRequiredScorer); // all terms are required
assertSame(the, scorer.allScorers[0].scorer);
assertSame(quick, scorer.allScorers[1].scorer);
assertSame(fox, scorer.allScorers[2].scorer);
// greater than the sum of the max scores of quick and fox
scorer.minCompetitiveScore = 2.11f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
assertEquals(0, scorer.firstRequiredScorer); // all terms are required
assertSame(the, scorer.allScorers[0].scorer);
assertSame(quick, scorer.allScorers[1].scorer);
assertSame(fox, scorer.allScorers[2].scorer);
// equal to the sum of the max scores of all terms
scorer.minCompetitiveScore = 2.2f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertTrue(scorer.partitionScorers());
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
assertEquals(0, scorer.firstRequiredScorer); // all terms are required
assertSame(the, scorer.allScorers[0].scorer);
assertSame(quick, scorer.allScorers[1].scorer);
assertSame(fox, scorer.allScorers[2].scorer);
// greater than the sum of the max scores of all terms
scorer.minCompetitiveScore = 2.21f;
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
scorer.updateMaxWindowScores(4, 100);
assertFalse(scorer.partitionScorers()); // no possible match in this window
}
}