mirror of https://github.com/apache/lucene.git
Sometimes intersect the essential clause and the best non-essential clause. (#12589)
The idea behind MAXSCORE is to run disjunctions as `+(essentialClause1 ... essentialClauseM) nonEssentialClause1 ... nonEssentialClauseN`, moving more and more clauses from the essential list to the non-essential list as the minimum competitive score increases. For instance, a query such as `the book of life` which I found in the Tantivy benchmark ends up running as `+book the of life` after some time, ie. with one required clause and other clauses optional. This is because matching `the`, `of` and `life` alone is not good enough for yielding a match. Here some statistics in that case: - min competitive score: 3.4781857 - max_window_score(book): 2.8796153 - max_window_score(life): 2.037863 - max_window_score(the): 0.103848875 - max_window_score(of): 0.19427927 Actually if you look at these statistics, we could do better, because a match may only be competitive if it matches both `book` and `life`, so this query could actually execute as `+book +life the of`, which may help evaluate fewer documents compared to `+book the of life`. Especially if you enable recursive graph bisection. This is what this PR tries to achieve: in the event when there is a single essential clause and matching all clauses but the best non-essential clause cannot produce a competitive match, then the scorer will only evaluate documents that match the intersection of the essential clause and the best non-essential clause. It's worth noting that this optimization would kick in very frequently on 2-clauses disjunctions.
This commit is contained in:
parent
5bca0aa391
commit
611bbbd951
|
@ -101,6 +101,9 @@ Optimizations
|
|||
|
||||
* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)
|
||||
|
||||
* GITHUB#12589: Disjunctions now sometimes run as conjunctions when the minimum
|
||||
competitive score requires multiple clauses to match. (Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -30,19 +30,21 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
|
||||
private final int maxDoc;
|
||||
// All scorers, sorted by increasing max score.
|
||||
private final DisiWrapper[] allScorers;
|
||||
final DisiWrapper[] allScorers;
|
||||
private final DisiWrapper[] scratch;
|
||||
// These are the last scorers from `allScorers` that are "essential", ie. required for a match to
|
||||
// have a competitive score.
|
||||
private final DisiPriorityQueue essentialQueue;
|
||||
// Index of the first essential scorer, ie. essentialQueue contains all scorers from
|
||||
// allScorers[firstEssentialScorer:]. All scorers below this index are non-essential.
|
||||
private int firstEssentialScorer;
|
||||
int firstEssentialScorer;
|
||||
// Index of the first scorer that is required, this scorer and all following scorers are required
|
||||
// for a document to match.
|
||||
int firstRequiredScorer;
|
||||
private final long cost;
|
||||
private float minCompetitiveScore;
|
||||
private boolean minCompetitiveScoreUpdated;
|
||||
float minCompetitiveScore;
|
||||
private Score scorable = new Score();
|
||||
private final double[] maxScoreSums;
|
||||
final double[] maxScoreSums;
|
||||
|
||||
private final long[] windowMatches = new long[FixedBitSet.bits2words(INNER_WINDOW_SIZE)];
|
||||
private final double[] windowScores = new double[INNER_WINDOW_SIZE];
|
||||
|
@ -113,25 +115,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
while (top.doc < outerWindowMax) {
|
||||
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
|
||||
top = essentialQueue.top();
|
||||
}
|
||||
|
||||
if (minCompetitiveScoreUpdated) {
|
||||
minCompetitiveScoreUpdated = false;
|
||||
if (partitionScorers() == false) {
|
||||
outerWindowMin = outerWindowMax;
|
||||
continue outer;
|
||||
} else {
|
||||
// Partitioning may have swapped essential and non-essential scorers, and some of the
|
||||
// non-essential scorers may be behind the last scored doc. So let's advance to the next
|
||||
// candidate match.
|
||||
final int nextCandidateMatch = top.doc;
|
||||
top = essentialQueue.top();
|
||||
while (top.doc < nextCandidateMatch) {
|
||||
top.doc = top.iterator.advance(nextCandidateMatch);
|
||||
top = essentialQueue.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
outerWindowMin = outerWindowMax;
|
||||
}
|
||||
|
||||
|
@ -140,19 +125,22 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
|
||||
private void scoreInnerWindow(LeafCollector collector, Bits acceptDocs, int max)
|
||||
throws IOException {
|
||||
if (allScorers.length - firstRequiredScorer >= 2) {
|
||||
scoreInnerWindowAsConjunction(collector, acceptDocs, max);
|
||||
} else {
|
||||
DisiWrapper top = essentialQueue.top();
|
||||
|
||||
DisiWrapper top2 = essentialQueue.top2();
|
||||
if (top2 == null) {
|
||||
scoreInnerWindowSingleEssentialClause(collector, acceptDocs, max);
|
||||
} else if (top2.doc - INNER_WINDOW_SIZE / 2 >= top.doc) {
|
||||
// The first half of the window would match a single clause. Let's collect this single clause
|
||||
// until the next doc ID of the next clause.
|
||||
// The first half of the window would match a single clause. Let's collect this single
|
||||
// clause until the next doc ID of the next clause.
|
||||
scoreInnerWindowSingleEssentialClause(collector, acceptDocs, Math.min(max, top2.doc));
|
||||
} else {
|
||||
scoreInnerWindowMultipleEssentialClauses(collector, acceptDocs, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void scoreInnerWindowSingleEssentialClause(
|
||||
LeafCollector collector, Bits acceptDocs, int upTo) throws IOException {
|
||||
|
@ -164,17 +152,79 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
if (acceptDocs != null && acceptDocs.get(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
scoreNonEssentialClauses(collector, doc, top.scorer.score());
|
||||
if (minCompetitiveScoreUpdated) {
|
||||
// force scorers to be partitioned again before collecting more hits
|
||||
top.iterator.nextDoc();
|
||||
break;
|
||||
}
|
||||
scoreNonEssentialClauses(collector, doc, top.scorer.score(), firstEssentialScorer);
|
||||
}
|
||||
top.doc = top.iterator.docID();
|
||||
essentialQueue.updateTop();
|
||||
}
|
||||
|
||||
private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptDocs, int max)
|
||||
throws IOException {
|
||||
assert firstEssentialScorer == allScorers.length - 1;
|
||||
assert firstRequiredScorer <= allScorers.length - 2;
|
||||
DisiWrapper lead1 = allScorers[allScorers.length - 1];
|
||||
assert essentialQueue.size() == 1;
|
||||
assert lead1 == essentialQueue.top();
|
||||
DisiWrapper lead2 = allScorers[allScorers.length - 2];
|
||||
if (lead1.doc < lead2.doc) {
|
||||
lead1.doc = lead1.iterator.advance(Math.min(lead2.doc, max));
|
||||
}
|
||||
// maximum score contribution of all scorers but the lead
|
||||
double maxScoreSumAtLead2 = maxScoreSums[allScorers.length - 2];
|
||||
|
||||
outer:
|
||||
while (lead1.doc < max) {
|
||||
|
||||
if (acceptDocs != null && acceptDocs.get(lead1.doc) == false) {
|
||||
lead1.doc = lead1.iterator.nextDoc();
|
||||
continue;
|
||||
}
|
||||
|
||||
double score = lead1.scorer.score();
|
||||
|
||||
// We specialize handling the second best scorer, which seems to help a bit with performance.
|
||||
// But this is the exact same logic as in the below for loop.
|
||||
if ((float) MathUtil.sumUpperBound(score + maxScoreSumAtLead2, allScorers.length)
|
||||
< minCompetitiveScore) {
|
||||
// a competitive match is not possible according to max scores, skip to the next candidate
|
||||
lead1.doc = lead1.iterator.nextDoc();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (lead2.doc < lead1.doc) {
|
||||
lead2.doc = lead2.iterator.advance(lead1.doc);
|
||||
}
|
||||
if (lead2.doc != lead1.doc) {
|
||||
lead1.doc = lead1.iterator.advance(Math.min(lead2.doc, max));
|
||||
continue;
|
||||
}
|
||||
|
||||
score += lead2.scorer.score();
|
||||
|
||||
for (int i = allScorers.length - 3; i >= firstRequiredScorer; --i) {
|
||||
if ((float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length)
|
||||
< minCompetitiveScore) {
|
||||
// a competitive match is not possible according to max scores, skip to the next candidate
|
||||
lead1.doc = lead1.iterator.nextDoc();
|
||||
continue outer;
|
||||
}
|
||||
|
||||
DisiWrapper w = allScorers[i];
|
||||
if (w.doc < lead1.doc) {
|
||||
w.doc = w.iterator.advance(lead1.doc);
|
||||
}
|
||||
if (w.doc != lead1.doc) {
|
||||
lead1.doc = lead1.iterator.advance(Math.min(w.doc, max));
|
||||
continue outer;
|
||||
}
|
||||
score += w.scorer.score();
|
||||
}
|
||||
|
||||
scoreNonEssentialClauses(collector, lead1.doc, score, firstRequiredScorer);
|
||||
lead1.doc = lead1.iterator.nextDoc();
|
||||
}
|
||||
}
|
||||
|
||||
private void scoreInnerWindowMultipleEssentialClauses(
|
||||
LeafCollector collector, Bits acceptDocs, int max) throws IOException {
|
||||
DisiWrapper top = essentialQueue.top();
|
||||
|
@ -206,7 +256,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
double score = windowScores[index];
|
||||
windowScores[index] = 0d;
|
||||
|
||||
scoreNonEssentialClauses(collector, doc, score);
|
||||
scoreNonEssentialClauses(collector, doc, score, firstEssentialScorer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -230,7 +280,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
return windowMax;
|
||||
}
|
||||
|
||||
private void updateMaxWindowScores(int windowMin, int windowMax) throws IOException {
|
||||
void updateMaxWindowScores(int windowMin, int windowMax) throws IOException {
|
||||
for (DisiWrapper scorer : allScorers) {
|
||||
if (scorer.doc < windowMax) {
|
||||
if (scorer.doc < windowMin) {
|
||||
|
@ -246,10 +296,11 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
}
|
||||
}
|
||||
|
||||
private void scoreNonEssentialClauses(LeafCollector collector, int doc, double essentialScore)
|
||||
private void scoreNonEssentialClauses(
|
||||
LeafCollector collector, int doc, double essentialScore, int numNonEssentialClauses)
|
||||
throws IOException {
|
||||
double score = essentialScore;
|
||||
for (int i = firstEssentialScorer - 1; i >= 0; --i) {
|
||||
for (int i = numNonEssentialClauses - 1; i >= 0; --i) {
|
||||
float maxPossibleScore =
|
||||
(float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length);
|
||||
if (maxPossibleScore < minCompetitiveScore) {
|
||||
|
@ -270,7 +321,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
collector.collect(doc);
|
||||
}
|
||||
|
||||
private boolean partitionScorers() {
|
||||
boolean partitionScorers() {
|
||||
// Partitioning scorers is an optimization problem: the optimal set of non-essential scorers is
|
||||
// the subset of scorers whose sum of max window scores is less than the minimum competitive
|
||||
// score that maximizes the sum of costs.
|
||||
|
@ -303,6 +354,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
}
|
||||
}
|
||||
|
||||
firstRequiredScorer = allScorers.length;
|
||||
|
||||
if (firstEssentialScorer == allScorers.length) {
|
||||
return false;
|
||||
}
|
||||
|
@ -311,6 +364,33 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
for (int i = firstEssentialScorer; i < allScorers.length; ++i) {
|
||||
essentialQueue.add(allScorers[i]);
|
||||
}
|
||||
|
||||
if (firstEssentialScorer == allScorers.length - 1) { // single essential clause
|
||||
// If there is a single essential clause and matching it plus all non-essential clauses but
|
||||
// the best one is not enough to yield a competitive match, the we know that hits must match
|
||||
// both the essential clause and the best non-essential clause. Here are some examples when
|
||||
// this optimization would kick in:
|
||||
// `quick fox` when maxscore(quick) = 1, maxscore(fox) = 1, minCompetitiveScore = 1.5
|
||||
// `the quick fox` when maxscore (the) = 0.1, maxscore(quick) = 1, maxscore(fox) = 1,
|
||||
// minCompetitiveScore = 1.5
|
||||
firstRequiredScorer = allScorers.length - 1;
|
||||
double maxRequiredScore = allScorers[firstEssentialScorer].maxWindowScore;
|
||||
|
||||
while (firstRequiredScorer > 0) {
|
||||
double maxPossibleScoreWithoutPreviousClause = maxRequiredScore;
|
||||
if (firstRequiredScorer > 1) {
|
||||
maxPossibleScoreWithoutPreviousClause += maxScoreSums[firstRequiredScorer - 2];
|
||||
}
|
||||
if ((float) maxPossibleScoreWithoutPreviousClause >= minCompetitiveScore) {
|
||||
break;
|
||||
}
|
||||
// The sum of maximum scores ignoring the previous clause is less than the minimum
|
||||
// competitive
|
||||
--firstRequiredScorer;
|
||||
maxRequiredScore += allScorers[firstRequiredScorer].maxWindowScore;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -348,7 +428,6 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) throws IOException {
|
||||
MaxScoreBulkScorer.this.minCompetitiveScore = minScore;
|
||||
minCompetitiveScoreUpdated = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,19 +18,43 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
||||
public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||
|
||||
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
|
||||
|
||||
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return Math.min(target | 0x7FF, in.advanceShallow(target));
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return in.getMaxScore(upTo);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeDocuments(Directory dir) throws IOException {
|
||||
try (IndexWriter w =
|
||||
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||
|
@ -64,20 +88,23 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
Query clause1 =
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
|
||||
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
|
||||
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
|
||||
Scorer scorer1 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
|
||||
BulkScorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
||||
|
||||
scorer.score(
|
||||
new LeafCollector() {
|
||||
|
@ -131,20 +158,23 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
Query clause1 =
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
|
||||
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
|
||||
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
|
||||
Scorer scorer1 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
|
||||
BulkScorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
||||
|
||||
scorer.score(
|
||||
new LeafCollector() {
|
||||
|
@ -193,23 +223,31 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
Query clause1 =
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
|
||||
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
|
||||
Query clause3 =
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3);
|
||||
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
|
||||
Scorer scorer1 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
Scorer scorer3 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
|
||||
|
||||
BulkScorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||
new MaxScoreBulkScorer(
|
||||
context.reader().maxDoc(), Arrays.asList(scorer1, scorer2, scorer3));
|
||||
|
||||
scorer.score(
|
||||
new LeafCollector() {
|
||||
|
@ -263,23 +301,31 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3),
|
||||
BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
Query clause1 =
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2);
|
||||
Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
|
||||
Query clause3 =
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3);
|
||||
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
|
||||
Scorer scorer1 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
Scorer scorer3 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
|
||||
|
||||
BulkScorer scorer =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1)
|
||||
.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||
new MaxScoreBulkScorer(
|
||||
context.reader().maxDoc(), Arrays.asList(scorer1, scorer2, scorer3));
|
||||
|
||||
scorer.score(
|
||||
new LeafCollector() {
|
||||
|
@ -325,4 +371,311 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class FakeWeight extends Weight {
|
||||
|
||||
protected FakeWeight() {
|
||||
super(null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
private static class FakeScorer extends Scorer {
|
||||
|
||||
final String toString;
|
||||
int docID = -1;
|
||||
int maxScoreUpTo = DocIdSetIterator.NO_MORE_DOCS;
|
||||
float maxScore = 1f;
|
||||
int cost = 10;
|
||||
|
||||
protected FakeScorer(String toString) {
|
||||
super(new FakeWeight());
|
||||
this.toString = toString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return DocIdSetIterator.all(cost); // just so that it exposes the right cost
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return maxScoreUpTo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toString;
|
||||
}
|
||||
}
|
||||
|
||||
public void testDeletes() throws IOException {
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(newLogMergePolicy());
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
Document doc1 = new Document();
|
||||
doc1.add(new StringField("field", "foo", Store.NO));
|
||||
doc1.add(new StringField("field", "bar", Store.NO));
|
||||
doc1.add(new StringField("field", "quux", Store.NO));
|
||||
Document doc2 = new Document();
|
||||
Document doc3 = new Document();
|
||||
for (IndexableField field : doc1) {
|
||||
doc2.add(field);
|
||||
doc3.add(field);
|
||||
}
|
||||
doc1.add(new StringField("id", "1", Store.NO));
|
||||
doc2.add(new StringField("id", "2", Store.NO));
|
||||
doc3.add(new StringField("id", "3", Store.NO));
|
||||
w.addDocument(doc1);
|
||||
w.addDocument(doc2);
|
||||
w.addDocument(doc3);
|
||||
|
||||
w.forceMerge(1);
|
||||
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
w.close();
|
||||
|
||||
Query query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(
|
||||
new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), 1f),
|
||||
Occur.SHOULD)
|
||||
.add(
|
||||
new BoostQuery(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("field", "bar"))), 1.5f),
|
||||
Occur.SHOULD)
|
||||
.add(
|
||||
new BoostQuery(
|
||||
new ConstantScoreQuery(new TermQuery(new Term("field", "quux"))), 0.1f),
|
||||
Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1f);
|
||||
|
||||
Bits liveDocs =
|
||||
new Bits() {
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return index == 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return 3;
|
||||
}
|
||||
};
|
||||
|
||||
// Test min competitive scores that exercise different execution modes
|
||||
for (float minCompetitiveScore :
|
||||
new float[] {
|
||||
0f, // 3 essential clauses
|
||||
1f, // 2 essential clauses
|
||||
1.2f, // 1 essential clause
|
||||
2f // two required clauses
|
||||
}) {
|
||||
BulkScorer scorer = weight.bulkScorer(searcher.getIndexReader().leaves().get(0));
|
||||
LeafCollector collector =
|
||||
new LeafCollector() {
|
||||
|
||||
int i = 0;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
scorer.setMinCompetitiveScore(minCompetitiveScore);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
assertEquals(1, doc);
|
||||
assertEquals(0, i++);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
assertEquals(1, i);
|
||||
}
|
||||
};
|
||||
scorer.score(collector, liveDocs);
|
||||
collector.finish();
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// This test simulates what happens over time for the query `the quick fox` as collection
|
||||
// progresses and the minimum competitive score increases.
|
||||
public void testPartition() throws IOException {
|
||||
FakeScorer the = new FakeScorer("the");
|
||||
the.cost = 9_000;
|
||||
the.maxScore = 0.1f;
|
||||
FakeScorer quick = new FakeScorer("quick");
|
||||
quick.cost = 1_000;
|
||||
quick.maxScore = 1f;
|
||||
FakeScorer fox = new FakeScorer("fox");
|
||||
fox.cost = 900;
|
||||
fox.maxScore = 1.1f;
|
||||
|
||||
MaxScoreBulkScorer scorer = new MaxScoreBulkScorer(10_000, Arrays.asList(the, quick, fox));
|
||||
the.docID = 4;
|
||||
the.maxScoreUpTo = 130;
|
||||
quick.docID = 4;
|
||||
quick.maxScoreUpTo = 999;
|
||||
fox.docID = 10;
|
||||
fox.maxScoreUpTo = 1_200;
|
||||
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(0, scorer.firstEssentialScorer); // all clauses are essential
|
||||
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
|
||||
|
||||
// less than the minimum score of every clause
|
||||
scorer.minCompetitiveScore = 0.09f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(0, scorer.firstEssentialScorer); // all clauses are still essential
|
||||
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
|
||||
|
||||
// equal to the maximum score of `the`
|
||||
scorer.minCompetitiveScore = 0.1f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(0, scorer.firstEssentialScorer); // all clauses are still essential
|
||||
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
|
||||
|
||||
// gt than the minimum score of `the`
|
||||
scorer.minCompetitiveScore = 0.11f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(1, scorer.firstEssentialScorer); // the is non essential
|
||||
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
|
||||
// equal to the sum of the max scores of the and quick
|
||||
scorer.minCompetitiveScore = 1.1f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(1, scorer.firstEssentialScorer); // the is non essential
|
||||
assertEquals(3, scorer.firstRequiredScorer); // no required clauses
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
|
||||
// greater than the sum of the max scores of the and quick
|
||||
scorer.minCompetitiveScore = 1.11f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
|
||||
assertEquals(2, scorer.firstRequiredScorer); // fox is required
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
assertSame(quick, scorer.allScorers[1].scorer);
|
||||
assertSame(fox, scorer.allScorers[2].scorer);
|
||||
|
||||
// equal to the sum of the max scores of the and fox
|
||||
scorer.minCompetitiveScore = 1.2f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
|
||||
assertEquals(2, scorer.firstRequiredScorer); // fox is required
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
assertSame(quick, scorer.allScorers[1].scorer);
|
||||
assertSame(fox, scorer.allScorers[2].scorer);
|
||||
|
||||
// greater than the sum of the max scores of the and fox
|
||||
scorer.minCompetitiveScore = 1.21f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
|
||||
assertEquals(1, scorer.firstRequiredScorer); // quick and fox are required
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
assertSame(quick, scorer.allScorers[1].scorer);
|
||||
assertSame(fox, scorer.allScorers[2].scorer);
|
||||
|
||||
// equal to the sum of the max scores of quick and fox
|
||||
scorer.minCompetitiveScore = 2.1f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
|
||||
assertEquals(1, scorer.firstRequiredScorer); // quick and fox are required
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
assertSame(quick, scorer.allScorers[1].scorer);
|
||||
assertSame(fox, scorer.allScorers[2].scorer);
|
||||
|
||||
// greater than the sum of the max scores of quick and fox
|
||||
scorer.minCompetitiveScore = 2.11f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
|
||||
assertEquals(0, scorer.firstRequiredScorer); // all terms are required
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
assertSame(quick, scorer.allScorers[1].scorer);
|
||||
assertSame(fox, scorer.allScorers[2].scorer);
|
||||
|
||||
// greater than the sum of the max scores of quick and fox
|
||||
scorer.minCompetitiveScore = 2.11f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
|
||||
assertEquals(0, scorer.firstRequiredScorer); // all terms are required
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
assertSame(quick, scorer.allScorers[1].scorer);
|
||||
assertSame(fox, scorer.allScorers[2].scorer);
|
||||
|
||||
// equal to the sum of the max scores of all terms
|
||||
scorer.minCompetitiveScore = 2.2f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertTrue(scorer.partitionScorers());
|
||||
assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential
|
||||
assertEquals(0, scorer.firstRequiredScorer); // all terms are required
|
||||
assertSame(the, scorer.allScorers[0].scorer);
|
||||
assertSame(quick, scorer.allScorers[1].scorer);
|
||||
assertSame(fox, scorer.allScorers[2].scorer);
|
||||
|
||||
// greater than the sum of the max scores of all terms
|
||||
scorer.minCompetitiveScore = 2.21f;
|
||||
Collections.shuffle(Arrays.asList(scorer.allScorers), random());
|
||||
scorer.updateMaxWindowScores(4, 100);
|
||||
assertFalse(scorer.partitionScorers()); // no possible match in this window
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue