mirror of https://github.com/apache/lucene.git
LUCENE-8770: BlockMaxConjunctionScorer now leverages two-phase iterators in order to avoid executing the second phase when scorers don't intersect
This commit is contained in:
parent
01d70ea4ca
commit
91e7576c45
|
@ -19,6 +19,9 @@ Improvements
|
||||||
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
|
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
|
||||||
as opposed to eliminating them during scoring construction. (Atri Sharma via Jim Ferenczi)
|
as opposed to eliminating them during scoring construction. (Atri Sharma via Jim Ferenczi)
|
||||||
|
|
||||||
|
* LUCENE-8770: BlockMaxConjunctionScorer now leverages two-phase iterators in order to avoid
|
||||||
|
executing the second phase when scorers don't intersect. (Adrien Grand, Jim Ferenczi)
|
||||||
|
|
||||||
======================= Lucene 8.1.1 =======================
|
======================= Lucene 8.1.1 =======================
|
||||||
(No Changes)
|
(No Changes)
|
||||||
|
|
||||||
|
|
|
@ -21,54 +21,80 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scorer for conjunctions that checks the maximum scores of each clause in
|
* Scorer for conjunctions that checks the maximum scores of each clause in
|
||||||
* order to potentially skip over blocks that can'h have competitive matches.
|
* order to potentially skip over blocks that can't have competitive matches.
|
||||||
*/
|
*/
|
||||||
final class BlockMaxConjunctionScorer extends Scorer {
|
final class BlockMaxConjunctionScorer extends Scorer {
|
||||||
|
|
||||||
final Scorer[] scorers;
|
final Scorer[] scorers;
|
||||||
|
final DocIdSetIterator[] approximations;
|
||||||
|
final TwoPhaseIterator[] twoPhases;
|
||||||
final MaxScoreSumPropagator maxScorePropagator;
|
final MaxScoreSumPropagator maxScorePropagator;
|
||||||
float minScore;
|
float minScore;
|
||||||
final double[] minScores; // stores the min value of the sum of scores between 0..i for a hit to be competitive
|
|
||||||
double score;
|
|
||||||
|
|
||||||
/** Create a new {@link BlockMaxConjunctionScorer} from scoring clauses. */
|
/** Create a new {@link BlockMaxConjunctionScorer} from scoring clauses. */
|
||||||
BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException {
|
BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
|
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
|
||||||
for (Scorer scorer : scorers) {
|
// Sort scorer by cost
|
||||||
|
Arrays.sort(this.scorers, Comparator.comparingLong(s -> s.iterator().cost()));
|
||||||
|
this.maxScorePropagator = new MaxScoreSumPropagator(Arrays.asList(scorers));
|
||||||
|
|
||||||
|
this.approximations = new DocIdSetIterator[scorers.length];
|
||||||
|
List<TwoPhaseIterator> twoPhaseList = new ArrayList<>();
|
||||||
|
for (int i = 0; i < scorers.length; i++) {
|
||||||
|
Scorer scorer = scorers[i];
|
||||||
|
TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
|
||||||
|
if (twoPhase != null) {
|
||||||
|
twoPhaseList.add(twoPhase);
|
||||||
|
approximations[i] = twoPhase.approximation();
|
||||||
|
} else {
|
||||||
|
approximations[i] = scorer.iterator();
|
||||||
|
}
|
||||||
scorer.advanceShallow(0);
|
scorer.advanceShallow(0);
|
||||||
}
|
}
|
||||||
this.maxScorePropagator = new MaxScoreSumPropagator(scorersList);
|
this.twoPhases = twoPhaseList.toArray(new TwoPhaseIterator[twoPhaseList.size()]);
|
||||||
|
Arrays.sort(this.twoPhases, Comparator.comparingDouble(TwoPhaseIterator::matchCost));
|
||||||
|
}
|
||||||
|
|
||||||
// Put scorers with the higher max scores first
|
@Override
|
||||||
// We tie-break on cost
|
public TwoPhaseIterator twoPhaseIterator() {
|
||||||
Comparator<Scorer> comparator = (s1, s2) -> {
|
if (twoPhases.length == 0) {
|
||||||
int cmp;
|
return null;
|
||||||
try {
|
|
||||||
cmp = Float.compare(s2.getMaxScore(DocIdSetIterator.NO_MORE_DOCS), s1.getMaxScore(DocIdSetIterator.NO_MORE_DOCS));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
}
|
||||||
if (cmp == 0) {
|
float matchCost = (float) Arrays.stream(twoPhases)
|
||||||
cmp = Long.compare(s1.iterator().cost(), s2.iterator().cost());
|
.mapToDouble(TwoPhaseIterator::matchCost)
|
||||||
|
.sum();
|
||||||
|
final DocIdSetIterator approx = approximation();
|
||||||
|
return new TwoPhaseIterator(approx) {
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
for (TwoPhaseIterator twoPhase : twoPhases) {
|
||||||
|
assert twoPhase.approximation().docID() == docID();
|
||||||
|
if (twoPhase.matches() == false) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return matchCost;
|
||||||
}
|
}
|
||||||
return cmp;
|
|
||||||
};
|
};
|
||||||
Arrays.sort(this.scorers, comparator);
|
|
||||||
minScores = new double[this.scorers.length];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocIdSetIterator iterator() {
|
public DocIdSetIterator iterator() {
|
||||||
// TODO: support two-phase
|
return twoPhases.length == 0 ? approximation() :
|
||||||
final Scorer leadScorer = this.scorers[0]; // higher max score
|
TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
|
||||||
final DocIdSetIterator[] iterators = Arrays.stream(this.scorers)
|
}
|
||||||
.map(Scorer::iterator)
|
|
||||||
.toArray(DocIdSetIterator[]::new);
|
private DocIdSetIterator approximation() {
|
||||||
final DocIdSetIterator lead = iterators[0];
|
final DocIdSetIterator lead = approximations[0];
|
||||||
|
|
||||||
return new DocIdSetIterator() {
|
return new DocIdSetIterator() {
|
||||||
|
|
||||||
|
@ -88,21 +114,6 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
||||||
private void moveToNextBlock(int target) throws IOException {
|
private void moveToNextBlock(int target) throws IOException {
|
||||||
upTo = advanceShallow(target);
|
upTo = advanceShallow(target);
|
||||||
maxScore = getMaxScore(upTo);
|
maxScore = getMaxScore(upTo);
|
||||||
|
|
||||||
// Also compute the minimum required scores for a hit to be competitive
|
|
||||||
// A double that is less than 'score' might still be converted to 'score'
|
|
||||||
// when casted to a float, so we go to the previous float to avoid this issue
|
|
||||||
minScores[minScores.length - 1] = minScore > 0 ? Math.nextDown(minScore) : 0;
|
|
||||||
for (int i = scorers.length - 1; i > 0; --i) {
|
|
||||||
double minScore = minScores[i];
|
|
||||||
float clauseMaxScore = scorers[i].getMaxScore(upTo);
|
|
||||||
if (minScore > clauseMaxScore) {
|
|
||||||
minScores[i - 1] = minScore - clauseMaxScore;
|
|
||||||
assert minScores[i - 1] + clauseMaxScore <= minScore;
|
|
||||||
} else {
|
|
||||||
minScores[i - 1] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private int advanceTarget(int target) throws IOException {
|
private int advanceTarget(int target) throws IOException {
|
||||||
|
@ -159,18 +170,9 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
||||||
|
|
||||||
assert doc <= upTo;
|
assert doc <= upTo;
|
||||||
|
|
||||||
if (minScore > 0) {
|
|
||||||
score = leadScorer.score();
|
|
||||||
if (score < minScores[0]) {
|
|
||||||
// computing a score is usually less costly than advancing other clauses
|
|
||||||
doc = lead.advance(advanceTarget(doc + 1));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// then find agreement with other iterators
|
// then find agreement with other iterators
|
||||||
for (int i = 1; i < iterators.length; ++i) {
|
for (int i = 1; i < approximations.length; ++i) {
|
||||||
final DocIdSetIterator other = iterators[i];
|
final DocIdSetIterator other = approximations[i];
|
||||||
// other.doc may already be equal to doc if we "continued advanceHead"
|
// other.doc may already be equal to doc if we "continued advanceHead"
|
||||||
// on the previous iteration and the advance on the lead scorer exactly matched.
|
// on the previous iteration and the advance on the lead scorer exactly matched.
|
||||||
if (other.docID() < doc) {
|
if (other.docID() < doc) {
|
||||||
|
@ -184,23 +186,6 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
assert other.docID() == doc;
|
assert other.docID() == doc;
|
||||||
if (minScore > 0) {
|
|
||||||
score += scorers[i].score();
|
|
||||||
|
|
||||||
if (score < minScores[i]) {
|
|
||||||
// computing a score is usually less costly than advancing the next clause
|
|
||||||
doc = lead.advance(advanceTarget(doc + 1));
|
|
||||||
continue advanceHead;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minScore > 0 == false) {
|
|
||||||
// the score hasn't been computed on the fly, do it now
|
|
||||||
score = 0;
|
|
||||||
for (Scorer scorer : scorers) {
|
|
||||||
score += scorer.score();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// success - all iterators are on the same doc and the score is competitive
|
// success - all iterators are on the same doc and the score is competitive
|
||||||
|
@ -217,6 +202,10 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public float score() throws IOException {
|
||||||
|
double score = 0;
|
||||||
|
for (Scorer scorer : scorers) {
|
||||||
|
score += scorer.score();
|
||||||
|
}
|
||||||
return (float) score;
|
return (float) score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,5 +246,4 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
||||||
}
|
}
|
||||||
return children;
|
return children;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,14 @@ public class TestBlockMaxConjunction extends LuceneTestCase {
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Query maybeWrapTwoPhase(Query query) {
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
query = new RandomApproximationQuery(query, random());
|
||||||
|
query = new AssertingQuery(random(), query);
|
||||||
|
}
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
public void testRandom() throws IOException {
|
public void testRandom() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||||
|
@ -75,6 +83,18 @@ public class TestBlockMaxConjunction extends LuceneTestCase {
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
CheckHits.checkTopScores(random(), filteredQuery, searcher);
|
CheckHits.checkTopScores(random(), filteredQuery, searcher);
|
||||||
|
|
||||||
|
builder = new BooleanQuery.Builder();
|
||||||
|
for (int i = 0; i < numClauses; ++i) {
|
||||||
|
builder.add(maybeWrapTwoPhase(new TermQuery(new Term("foo", Integer.toString(start + i)))), Occur.MUST);
|
||||||
|
}
|
||||||
|
|
||||||
|
Query twoPhaseQuery = new BooleanQuery.Builder()
|
||||||
|
.add(query, Occur.MUST)
|
||||||
|
.add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
CheckHits.checkTopScores(random(), twoPhaseQuery, searcher);
|
||||||
}
|
}
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
|
|
Loading…
Reference in New Issue