From 611bbbd9512d8e8b4fbc141882e0194f8077a22e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 24 Oct 2023 17:54:23 +0200 Subject: [PATCH] Sometimes intersect the essential clause and the best non-essential clause. (#12589) The idea behind MAXSCORE is to run disjunctions as `+(essentialClause1 ... essentialClauseM) nonEssentialClause1 ... nonEssentialClauseN`, moving more and more clauses from the essential list to the non-essential list as the minimum competitive score increases. For instance, a query such as `the book of life` which I found in the Tantivy benchmark ends up running as `+book the of life` after some time, ie. with one required clause and other clauses optional. This is because matching `the`, `of` and `life` alone is not good enough for yielding a match. Here some statistics in that case: - min competitive score: 3.4781857 - max_window_score(book): 2.8796153 - max_window_score(life): 2.037863 - max_window_score(the): 0.103848875 - max_window_score(of): 0.19427927 Actually if you look at these statistics, we could do better, because a match may only be competitive if it matches both `book` and `life`, so this query could actually execute as `+book +life the of`, which may help evaluate fewer documents compared to `+book the of life`. Especially if you enable recursive graph bisection. This is what this PR tries to achieve: in the event when there is a single essential clause and matching all clauses but the best non-essential clause cannot produce a competitive match, then the scorer will only evaluate documents that match the intersection of the essential clause and the best non-essential clause. It's worth noting that this optimization would kick in very frequently on 2-clauses disjunctions. --- lucene/CHANGES.txt | 3 + .../lucene/search/MaxScoreBulkScorer.java | 169 +++++-- .../lucene/search/TestMaxScoreBulkScorer.java | 461 ++++++++++++++++-- 3 files changed, 534 insertions(+), 99 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b21c6d807da..eea75bc7104 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -101,6 +101,9 @@ Optimizations * GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X) +* GITHUB#12589: Disjunctions now sometimes run as conjunctions when the minimum + competitive score requires multiple clauses to match. (Adrien Grand) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index 3c3f3db7491..026bf1f7d53 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -30,19 +30,21 @@ final class MaxScoreBulkScorer extends BulkScorer { private final int maxDoc; // All scorers, sorted by increasing max score. - private final DisiWrapper[] allScorers; + final DisiWrapper[] allScorers; private final DisiWrapper[] scratch; // These are the last scorers from `allScorers` that are "essential", ie. required for a match to // have a competitive score. private final DisiPriorityQueue essentialQueue; // Index of the first essential scorer, ie. essentialQueue contains all scorers from // allScorers[firstEssentialScorer:]. All scorers below this index are non-essential. - private int firstEssentialScorer; + int firstEssentialScorer; + // Index of the first scorer that is required, this scorer and all following scorers are required + // for a document to match. + int firstRequiredScorer; private final long cost; - private float minCompetitiveScore; - private boolean minCompetitiveScoreUpdated; + float minCompetitiveScore; private Score scorable = new Score(); - private final double[] maxScoreSums; + final double[] maxScoreSums; private final long[] windowMatches = new long[FixedBitSet.bits2words(INNER_WINDOW_SIZE)]; private final double[] windowScores = new double[INNER_WINDOW_SIZE]; @@ -113,25 +115,8 @@ final class MaxScoreBulkScorer extends BulkScorer { while (top.doc < outerWindowMax) { scoreInnerWindow(collector, acceptDocs, outerWindowMax); top = essentialQueue.top(); - - if (minCompetitiveScoreUpdated) { - minCompetitiveScoreUpdated = false; - if (partitionScorers() == false) { - outerWindowMin = outerWindowMax; - continue outer; - } else { - // Partitioning may have swapped essential and non-essential scorers, and some of the - // non-essential scorers may be behind the last scored doc. So let's advance to the next - // candidate match. - final int nextCandidateMatch = top.doc; - top = essentialQueue.top(); - while (top.doc < nextCandidateMatch) { - top.doc = top.iterator.advance(nextCandidateMatch); - top = essentialQueue.updateTop(); - } - } - } } + outerWindowMin = outerWindowMax; } @@ -140,17 +125,20 @@ final class MaxScoreBulkScorer extends BulkScorer { private void scoreInnerWindow(LeafCollector collector, Bits acceptDocs, int max) throws IOException { - DisiWrapper top = essentialQueue.top(); - - DisiWrapper top2 = essentialQueue.top2(); - if (top2 == null) { - scoreInnerWindowSingleEssentialClause(collector, acceptDocs, max); - } else if (top2.doc - INNER_WINDOW_SIZE / 2 >= top.doc) { - // The first half of the window would match a single clause. Let's collect this single clause - // until the next doc ID of the next clause. - scoreInnerWindowSingleEssentialClause(collector, acceptDocs, Math.min(max, top2.doc)); + if (allScorers.length - firstRequiredScorer >= 2) { + scoreInnerWindowAsConjunction(collector, acceptDocs, max); } else { - scoreInnerWindowMultipleEssentialClauses(collector, acceptDocs, max); + DisiWrapper top = essentialQueue.top(); + DisiWrapper top2 = essentialQueue.top2(); + if (top2 == null) { + scoreInnerWindowSingleEssentialClause(collector, acceptDocs, max); + } else if (top2.doc - INNER_WINDOW_SIZE / 2 >= top.doc) { + // The first half of the window would match a single clause. Let's collect this single + // clause until the next doc ID of the next clause. + scoreInnerWindowSingleEssentialClause(collector, acceptDocs, Math.min(max, top2.doc)); + } else { + scoreInnerWindowMultipleEssentialClauses(collector, acceptDocs, max); + } } } @@ -164,17 +152,79 @@ final class MaxScoreBulkScorer extends BulkScorer { if (acceptDocs != null && acceptDocs.get(doc) == false) { continue; } - scoreNonEssentialClauses(collector, doc, top.scorer.score()); - if (minCompetitiveScoreUpdated) { - // force scorers to be partitioned again before collecting more hits - top.iterator.nextDoc(); - break; - } + scoreNonEssentialClauses(collector, doc, top.scorer.score(), firstEssentialScorer); } top.doc = top.iterator.docID(); essentialQueue.updateTop(); } + private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptDocs, int max) + throws IOException { + assert firstEssentialScorer == allScorers.length - 1; + assert firstRequiredScorer <= allScorers.length - 2; + DisiWrapper lead1 = allScorers[allScorers.length - 1]; + assert essentialQueue.size() == 1; + assert lead1 == essentialQueue.top(); + DisiWrapper lead2 = allScorers[allScorers.length - 2]; + if (lead1.doc < lead2.doc) { + lead1.doc = lead1.iterator.advance(Math.min(lead2.doc, max)); + } + // maximum score contribution of all scorers but the lead + double maxScoreSumAtLead2 = maxScoreSums[allScorers.length - 2]; + + outer: + while (lead1.doc < max) { + + if (acceptDocs != null && acceptDocs.get(lead1.doc) == false) { + lead1.doc = lead1.iterator.nextDoc(); + continue; + } + + double score = lead1.scorer.score(); + + // We specialize handling the second best scorer, which seems to help a bit with performance. + // But this is the exact same logic as in the below for loop. + if ((float) MathUtil.sumUpperBound(score + maxScoreSumAtLead2, allScorers.length) + < minCompetitiveScore) { + // a competitive match is not possible according to max scores, skip to the next candidate + lead1.doc = lead1.iterator.nextDoc(); + continue; + } + + if (lead2.doc < lead1.doc) { + lead2.doc = lead2.iterator.advance(lead1.doc); + } + if (lead2.doc != lead1.doc) { + lead1.doc = lead1.iterator.advance(Math.min(lead2.doc, max)); + continue; + } + + score += lead2.scorer.score(); + + for (int i = allScorers.length - 3; i >= firstRequiredScorer; --i) { + if ((float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length) + < minCompetitiveScore) { + // a competitive match is not possible according to max scores, skip to the next candidate + lead1.doc = lead1.iterator.nextDoc(); + continue outer; + } + + DisiWrapper w = allScorers[i]; + if (w.doc < lead1.doc) { + w.doc = w.iterator.advance(lead1.doc); + } + if (w.doc != lead1.doc) { + lead1.doc = lead1.iterator.advance(Math.min(w.doc, max)); + continue outer; + } + score += w.scorer.score(); + } + + scoreNonEssentialClauses(collector, lead1.doc, score, firstRequiredScorer); + lead1.doc = lead1.iterator.nextDoc(); + } + } + private void scoreInnerWindowMultipleEssentialClauses( LeafCollector collector, Bits acceptDocs, int max) throws IOException { DisiWrapper top = essentialQueue.top(); @@ -206,7 +256,7 @@ final class MaxScoreBulkScorer extends BulkScorer { double score = windowScores[index]; windowScores[index] = 0d; - scoreNonEssentialClauses(collector, doc, score); + scoreNonEssentialClauses(collector, doc, score, firstEssentialScorer); } } } @@ -230,7 +280,7 @@ final class MaxScoreBulkScorer extends BulkScorer { return windowMax; } - private void updateMaxWindowScores(int windowMin, int windowMax) throws IOException { + void updateMaxWindowScores(int windowMin, int windowMax) throws IOException { for (DisiWrapper scorer : allScorers) { if (scorer.doc < windowMax) { if (scorer.doc < windowMin) { @@ -246,10 +296,11 @@ final class MaxScoreBulkScorer extends BulkScorer { } } - private void scoreNonEssentialClauses(LeafCollector collector, int doc, double essentialScore) + private void scoreNonEssentialClauses( + LeafCollector collector, int doc, double essentialScore, int numNonEssentialClauses) throws IOException { double score = essentialScore; - for (int i = firstEssentialScorer - 1; i >= 0; --i) { + for (int i = numNonEssentialClauses - 1; i >= 0; --i) { float maxPossibleScore = (float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length); if (maxPossibleScore < minCompetitiveScore) { @@ -270,7 +321,7 @@ final class MaxScoreBulkScorer extends BulkScorer { collector.collect(doc); } - private boolean partitionScorers() { + boolean partitionScorers() { // Partitioning scorers is an optimization problem: the optimal set of non-essential scorers is // the subset of scorers whose sum of max window scores is less than the minimum competitive // score that maximizes the sum of costs. @@ -303,6 +354,8 @@ final class MaxScoreBulkScorer extends BulkScorer { } } + firstRequiredScorer = allScorers.length; + if (firstEssentialScorer == allScorers.length) { return false; } @@ -311,6 +364,33 @@ final class MaxScoreBulkScorer extends BulkScorer { for (int i = firstEssentialScorer; i < allScorers.length; ++i) { essentialQueue.add(allScorers[i]); } + + if (firstEssentialScorer == allScorers.length - 1) { // single essential clause + // If there is a single essential clause and matching it plus all non-essential clauses but + // the best one is not enough to yield a competitive match, the we know that hits must match + // both the essential clause and the best non-essential clause. Here are some examples when + // this optimization would kick in: + // `quick fox` when maxscore(quick) = 1, maxscore(fox) = 1, minCompetitiveScore = 1.5 + // `the quick fox` when maxscore (the) = 0.1, maxscore(quick) = 1, maxscore(fox) = 1, + // minCompetitiveScore = 1.5 + firstRequiredScorer = allScorers.length - 1; + double maxRequiredScore = allScorers[firstEssentialScorer].maxWindowScore; + + while (firstRequiredScorer > 0) { + double maxPossibleScoreWithoutPreviousClause = maxRequiredScore; + if (firstRequiredScorer > 1) { + maxPossibleScoreWithoutPreviousClause += maxScoreSums[firstRequiredScorer - 2]; + } + if ((float) maxPossibleScoreWithoutPreviousClause >= minCompetitiveScore) { + break; + } + // The sum of maximum scores ignoring the previous clause is less than the minimum + // competitive + --firstRequiredScorer; + maxRequiredScore += allScorers[firstRequiredScorer].maxWindowScore; + } + } + return true; } @@ -348,7 +428,6 @@ final class MaxScoreBulkScorer extends BulkScorer { @Override public void setMinCompetitiveScore(float minScore) throws IOException { MaxScoreBulkScorer.this.minCompetitiveScore = minScore; - minCompetitiveScoreUpdated = true; } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java index 3ec87d14000..c000c82eb13 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java @@ -18,19 +18,43 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.Bits; // These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept public class TestMaxScoreBulkScorer extends LuceneTestCase { + private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer { + + public CapMaxScoreWindowAt2048Scorer(Scorer in) { + super(in); + } + + @Override + public int advanceShallow(int target) throws IOException { + return Math.min(target | 0x7FF, in.advanceShallow(target)); + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return in.getMaxScore(upTo); + } + } + private void writeDocuments(Directory dir) throws IOException { try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { @@ -64,20 +88,23 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { try (IndexReader reader = DirectoryReader.open(dir)) { IndexSearcher searcher = newSearcher(reader); - Query query = - new BooleanQuery.Builder() - .add( - new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .build(); + Query clause1 = + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2); + Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))); + LeafReaderContext context = searcher.getIndexReader().leaves().get(0); + Scorer scorer1 = + searcher + .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); + Scorer scorer2 = + searcher + .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); BulkScorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .bulkScorer(searcher.getIndexReader().leaves().get(0)); + new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); scorer.score( new LeafCollector() { @@ -131,20 +158,23 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { try (IndexReader reader = DirectoryReader.open(dir)) { IndexSearcher searcher = newSearcher(reader); - Query query = - new BooleanQuery.Builder() - .add( - new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .build(); + Query clause1 = + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2); + Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))); + LeafReaderContext context = searcher.getIndexReader().leaves().get(0); + Scorer scorer1 = + searcher + .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); + Scorer scorer2 = + searcher + .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); BulkScorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .bulkScorer(searcher.getIndexReader().leaves().get(0)); + new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); scorer.score( new LeafCollector() { @@ -193,23 +223,31 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { try (IndexReader reader = DirectoryReader.open(dir)) { IndexSearcher searcher = newSearcher(reader); - Query query = - new BooleanQuery.Builder() - .add( - new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .add( - new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3), - BooleanClause.Occur.SHOULD) - .build(); + Query clause1 = + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2); + Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))); + Query clause3 = + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3); + LeafReaderContext context = searcher.getIndexReader().leaves().get(0); + Scorer scorer1 = + searcher + .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); + Scorer scorer2 = + searcher + .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); + Scorer scorer3 = + searcher + .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3); BulkScorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .bulkScorer(searcher.getIndexReader().leaves().get(0)); + new MaxScoreBulkScorer( + context.reader().maxDoc(), Arrays.asList(scorer1, scorer2, scorer3)); scorer.score( new LeafCollector() { @@ -263,23 +301,31 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { try (IndexReader reader = DirectoryReader.open(dir)) { IndexSearcher searcher = newSearcher(reader); - Query query = - new BooleanQuery.Builder() - .add( - new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2), - BooleanClause.Occur.SHOULD) - .add( - new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))), - BooleanClause.Occur.SHOULD) - .add( - new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3), - BooleanClause.Occur.SHOULD) - .build(); + Query clause1 = + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))), 2); + Query clause2 = new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))); + Query clause3 = + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("foo", "C"))), 3); + LeafReaderContext context = searcher.getIndexReader().leaves().get(0); + Scorer scorer1 = + searcher + .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1); + Scorer scorer2 = + searcher + .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2); + Scorer scorer3 = + searcher + .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) + .scorer(context); + scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3); BulkScorer scorer = - searcher - .createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1) - .bulkScorer(searcher.getIndexReader().leaves().get(0)); + new MaxScoreBulkScorer( + context.reader().maxDoc(), Arrays.asList(scorer1, scorer2, scorer3)); scorer.score( new LeafCollector() { @@ -325,4 +371,311 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase { } } } + + private static class FakeWeight extends Weight { + + protected FakeWeight() { + super(null); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return false; + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + throw new UnsupportedOperationException(); + } + } + + private static class FakeScorer extends Scorer { + + final String toString; + int docID = -1; + int maxScoreUpTo = DocIdSetIterator.NO_MORE_DOCS; + float maxScore = 1f; + int cost = 10; + + protected FakeScorer(String toString) { + super(new FakeWeight()); + this.toString = toString; + } + + @Override + public int docID() { + return docID; + } + + @Override + public DocIdSetIterator iterator() { + return DocIdSetIterator.all(cost); // just so that it exposes the right cost + } + + @Override + public int advanceShallow(int target) throws IOException { + return maxScoreUpTo; + } + + @Override + public float getMaxScore(int upTo) throws IOException { + return maxScore; + } + + @Override + public float score() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return toString; + } + } + + public void testDeletes() throws IOException { + + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(newLogMergePolicy()); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc1 = new Document(); + doc1.add(new StringField("field", "foo", Store.NO)); + doc1.add(new StringField("field", "bar", Store.NO)); + doc1.add(new StringField("field", "quux", Store.NO)); + Document doc2 = new Document(); + Document doc3 = new Document(); + for (IndexableField field : doc1) { + doc2.add(field); + doc3.add(field); + } + doc1.add(new StringField("id", "1", Store.NO)); + doc2.add(new StringField("id", "2", Store.NO)); + doc3.add(new StringField("id", "3", Store.NO)); + w.addDocument(doc1); + w.addDocument(doc2); + w.addDocument(doc3); + + w.forceMerge(1); + + IndexReader reader = DirectoryReader.open(w); + w.close(); + + Query query = + new BooleanQuery.Builder() + .add( + new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), 1f), + Occur.SHOULD) + .add( + new BoostQuery( + new ConstantScoreQuery(new TermQuery(new Term("field", "bar"))), 1.5f), + Occur.SHOULD) + .add( + new BoostQuery( + new ConstantScoreQuery(new TermQuery(new Term("field", "quux"))), 0.1f), + Occur.SHOULD) + .build(); + + IndexSearcher searcher = newSearcher(reader); + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1f); + + Bits liveDocs = + new Bits() { + @Override + public boolean get(int index) { + return index == 1; + } + + @Override + public int length() { + return 3; + } + }; + + // Test min competitive scores that exercise different execution modes + for (float minCompetitiveScore : + new float[] { + 0f, // 3 essential clauses + 1f, // 2 essential clauses + 1.2f, // 1 essential clause + 2f // two required clauses + }) { + BulkScorer scorer = weight.bulkScorer(searcher.getIndexReader().leaves().get(0)); + LeafCollector collector = + new LeafCollector() { + + int i = 0; + + @Override + public void setScorer(Scorable scorer) throws IOException { + scorer.setMinCompetitiveScore(minCompetitiveScore); + } + + @Override + public void collect(int doc) throws IOException { + assertEquals(1, doc); + assertEquals(0, i++); + } + + @Override + public void finish() throws IOException { + assertEquals(1, i); + } + }; + scorer.score(collector, liveDocs); + collector.finish(); + } + + reader.close(); + dir.close(); + } + + // This test simulates what happens over time for the query `the quick fox` as collection + // progresses and the minimum competitive score increases. + public void testPartition() throws IOException { + FakeScorer the = new FakeScorer("the"); + the.cost = 9_000; + the.maxScore = 0.1f; + FakeScorer quick = new FakeScorer("quick"); + quick.cost = 1_000; + quick.maxScore = 1f; + FakeScorer fox = new FakeScorer("fox"); + fox.cost = 900; + fox.maxScore = 1.1f; + + MaxScoreBulkScorer scorer = new MaxScoreBulkScorer(10_000, Arrays.asList(the, quick, fox)); + the.docID = 4; + the.maxScoreUpTo = 130; + quick.docID = 4; + quick.maxScoreUpTo = 999; + fox.docID = 10; + fox.maxScoreUpTo = 1_200; + + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(0, scorer.firstEssentialScorer); // all clauses are essential + assertEquals(3, scorer.firstRequiredScorer); // no required clauses + + // less than the minimum score of every clause + scorer.minCompetitiveScore = 0.09f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(0, scorer.firstEssentialScorer); // all clauses are still essential + assertEquals(3, scorer.firstRequiredScorer); // no required clauses + + // equal to the maximum score of `the` + scorer.minCompetitiveScore = 0.1f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(0, scorer.firstEssentialScorer); // all clauses are still essential + assertEquals(3, scorer.firstRequiredScorer); // no required clauses + + // gt than the minimum score of `the` + scorer.minCompetitiveScore = 0.11f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(1, scorer.firstEssentialScorer); // the is non essential + assertEquals(3, scorer.firstRequiredScorer); // no required clauses + assertSame(the, scorer.allScorers[0].scorer); + + // equal to the sum of the max scores of the and quick + scorer.minCompetitiveScore = 1.1f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(1, scorer.firstEssentialScorer); // the is non essential + assertEquals(3, scorer.firstRequiredScorer); // no required clauses + assertSame(the, scorer.allScorers[0].scorer); + + // greater than the sum of the max scores of the and quick + scorer.minCompetitiveScore = 1.11f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential + assertEquals(2, scorer.firstRequiredScorer); // fox is required + assertSame(the, scorer.allScorers[0].scorer); + assertSame(quick, scorer.allScorers[1].scorer); + assertSame(fox, scorer.allScorers[2].scorer); + + // equal to the sum of the max scores of the and fox + scorer.minCompetitiveScore = 1.2f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential + assertEquals(2, scorer.firstRequiredScorer); // fox is required + assertSame(the, scorer.allScorers[0].scorer); + assertSame(quick, scorer.allScorers[1].scorer); + assertSame(fox, scorer.allScorers[2].scorer); + + // greater than the sum of the max scores of the and fox + scorer.minCompetitiveScore = 1.21f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential + assertEquals(1, scorer.firstRequiredScorer); // quick and fox are required + assertSame(the, scorer.allScorers[0].scorer); + assertSame(quick, scorer.allScorers[1].scorer); + assertSame(fox, scorer.allScorers[2].scorer); + + // equal to the sum of the max scores of quick and fox + scorer.minCompetitiveScore = 2.1f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential + assertEquals(1, scorer.firstRequiredScorer); // quick and fox are required + assertSame(the, scorer.allScorers[0].scorer); + assertSame(quick, scorer.allScorers[1].scorer); + assertSame(fox, scorer.allScorers[2].scorer); + + // greater than the sum of the max scores of quick and fox + scorer.minCompetitiveScore = 2.11f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential + assertEquals(0, scorer.firstRequiredScorer); // all terms are required + assertSame(the, scorer.allScorers[0].scorer); + assertSame(quick, scorer.allScorers[1].scorer); + assertSame(fox, scorer.allScorers[2].scorer); + + // greater than the sum of the max scores of quick and fox + scorer.minCompetitiveScore = 2.11f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential + assertEquals(0, scorer.firstRequiredScorer); // all terms are required + assertSame(the, scorer.allScorers[0].scorer); + assertSame(quick, scorer.allScorers[1].scorer); + assertSame(fox, scorer.allScorers[2].scorer); + + // equal to the sum of the max scores of all terms + scorer.minCompetitiveScore = 2.2f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertTrue(scorer.partitionScorers()); + assertEquals(2, scorer.firstEssentialScorer); // the and quick are non essential + assertEquals(0, scorer.firstRequiredScorer); // all terms are required + assertSame(the, scorer.allScorers[0].scorer); + assertSame(quick, scorer.allScorers[1].scorer); + assertSame(fox, scorer.allScorers[2].scorer); + + // greater than the sum of the max scores of all terms + scorer.minCompetitiveScore = 2.21f; + Collections.shuffle(Arrays.asList(scorer.allScorers), random()); + scorer.updateMaxWindowScores(4, 100); + assertFalse(scorer.partitionScorers()); // no possible match in this window + } }