LUCENE:8770: BlockMaxConjunctionScorer now leverages two-phase iterators in order to avoid executing the second phase when scorers don't intersect

This commit is contained in:
jimczi 2019-05-21 11:35:44 +02:00
parent 0cb92993db
commit 4640a527a4
3 changed files with 80 additions and 69 deletions

View File

@ -49,6 +49,9 @@ Improvements
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier * LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
as opposed to eliminating them during scoring construction. (Atri Sharma via Jim Ferenczi) as opposed to eliminating them during scoring construction. (Atri Sharma via Jim Ferenczi)
* LUCENE-8770: BlockMaxConjunctionScorer now leverages two-phase iterators in order to avoid
executing the second phase when scorers don't intersect. (Adrien Grand, Jim Ferenczi)
======================= Lucene 8.1.1 ======================= ======================= Lucene 8.1.1 =======================
(No Changes) (No Changes)

View File

@ -21,54 +21,80 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.List;
/** /**
* Scorer for conjunctions that checks the maximum scores of each clause in * Scorer for conjunctions that checks the maximum scores of each clause in
* order to potentially skip over blocks that can'h have competitive matches. * order to potentially skip over blocks that can't have competitive matches.
*/ */
final class BlockMaxConjunctionScorer extends Scorer { final class BlockMaxConjunctionScorer extends Scorer {
final Scorer[] scorers; final Scorer[] scorers;
final DocIdSetIterator[] approximations;
final TwoPhaseIterator[] twoPhases;
final MaxScoreSumPropagator maxScorePropagator; final MaxScoreSumPropagator maxScorePropagator;
float minScore; float minScore;
final double[] minScores; // stores the min value of the sum of scores between 0..i for a hit to be competitive
double score;
/** Create a new {@link BlockMaxConjunctionScorer} from scoring clauses. */ /** Create a new {@link BlockMaxConjunctionScorer} from scoring clauses. */
BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException { BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException {
super(weight); super(weight);
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]); this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
for (Scorer scorer : scorers) { // Sort scorer by cost
Arrays.sort(this.scorers, Comparator.comparingLong(s -> s.iterator().cost()));
this.maxScorePropagator = new MaxScoreSumPropagator(Arrays.asList(scorers));
this.approximations = new DocIdSetIterator[scorers.length];
List<TwoPhaseIterator> twoPhaseList = new ArrayList<>();
for (int i = 0; i < scorers.length; i++) {
Scorer scorer = scorers[i];
TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
if (twoPhase != null) {
twoPhaseList.add(twoPhase);
approximations[i] = twoPhase.approximation();
} else {
approximations[i] = scorer.iterator();
}
scorer.advanceShallow(0); scorer.advanceShallow(0);
} }
this.maxScorePropagator = new MaxScoreSumPropagator(scorersList); this.twoPhases = twoPhaseList.toArray(new TwoPhaseIterator[twoPhaseList.size()]);
Arrays.sort(this.twoPhases, Comparator.comparingDouble(TwoPhaseIterator::matchCost));
}
// Put scorers with the higher max scores first @Override
// We tie-break on cost public TwoPhaseIterator twoPhaseIterator() {
Comparator<Scorer> comparator = (s1, s2) -> { if (twoPhases.length == 0) {
int cmp; return null;
try {
cmp = Float.compare(s2.getMaxScore(DocIdSetIterator.NO_MORE_DOCS), s1.getMaxScore(DocIdSetIterator.NO_MORE_DOCS));
} catch (IOException e) {
throw new RuntimeException(e);
} }
if (cmp == 0) { float matchCost = (float) Arrays.stream(twoPhases)
cmp = Long.compare(s1.iterator().cost(), s2.iterator().cost()); .mapToDouble(TwoPhaseIterator::matchCost)
.sum();
final DocIdSetIterator approx = approximation();
return new TwoPhaseIterator(approx) {
@Override
public boolean matches() throws IOException {
for (TwoPhaseIterator twoPhase : twoPhases) {
assert twoPhase.approximation().docID() == docID();
if (twoPhase.matches() == false) {
return false;
}
}
return true;
}
@Override
public float matchCost() {
return matchCost;
} }
return cmp;
}; };
Arrays.sort(this.scorers, comparator);
minScores = new double[this.scorers.length];
} }
@Override @Override
public DocIdSetIterator iterator() { public DocIdSetIterator iterator() {
// TODO: support two-phase return twoPhases.length == 0 ? approximation() :
final Scorer leadScorer = this.scorers[0]; // higher max score TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
final DocIdSetIterator[] iterators = Arrays.stream(this.scorers) }
.map(Scorer::iterator)
.toArray(DocIdSetIterator[]::new); private DocIdSetIterator approximation() {
final DocIdSetIterator lead = iterators[0]; final DocIdSetIterator lead = approximations[0];
return new DocIdSetIterator() { return new DocIdSetIterator() {
@ -88,21 +114,6 @@ final class BlockMaxConjunctionScorer extends Scorer {
private void moveToNextBlock(int target) throws IOException { private void moveToNextBlock(int target) throws IOException {
upTo = advanceShallow(target); upTo = advanceShallow(target);
maxScore = getMaxScore(upTo); maxScore = getMaxScore(upTo);
// Also compute the minimum required scores for a hit to be competitive
// A double that is less than 'score' might still be converted to 'score'
// when casted to a float, so we go to the previous float to avoid this issue
minScores[minScores.length - 1] = minScore > 0 ? Math.nextDown(minScore) : 0;
for (int i = scorers.length - 1; i > 0; --i) {
double minScore = minScores[i];
float clauseMaxScore = scorers[i].getMaxScore(upTo);
if (minScore > clauseMaxScore) {
minScores[i - 1] = minScore - clauseMaxScore;
assert minScores[i - 1] + clauseMaxScore <= minScore;
} else {
minScores[i - 1] = 0;
}
}
} }
private int advanceTarget(int target) throws IOException { private int advanceTarget(int target) throws IOException {
@ -159,18 +170,9 @@ final class BlockMaxConjunctionScorer extends Scorer {
assert doc <= upTo; assert doc <= upTo;
if (minScore > 0) {
score = leadScorer.score();
if (score < minScores[0]) {
// computing a score is usually less costly than advancing other clauses
doc = lead.advance(advanceTarget(doc + 1));
continue;
}
}
// then find agreement with other iterators // then find agreement with other iterators
for (int i = 1; i < iterators.length; ++i) { for (int i = 1; i < approximations.length; ++i) {
final DocIdSetIterator other = iterators[i]; final DocIdSetIterator other = approximations[i];
// other.doc may already be equal to doc if we "continued advanceHead" // other.doc may already be equal to doc if we "continued advanceHead"
// on the previous iteration and the advance on the lead scorer exactly matched. // on the previous iteration and the advance on the lead scorer exactly matched.
if (other.docID() < doc) { if (other.docID() < doc) {
@ -184,23 +186,6 @@ final class BlockMaxConjunctionScorer extends Scorer {
} }
assert other.docID() == doc; assert other.docID() == doc;
if (minScore > 0) {
score += scorers[i].score();
if (score < minScores[i]) {
// computing a score is usually less costly than advancing the next clause
doc = lead.advance(advanceTarget(doc + 1));
continue advanceHead;
}
}
}
if (minScore > 0 == false) {
// the score hasn't been computed on the fly, do it now
score = 0;
for (Scorer scorer : scorers) {
score += scorer.score();
}
} }
// success - all iterators are on the same doc and the score is competitive // success - all iterators are on the same doc and the score is competitive
@ -217,6 +202,10 @@ final class BlockMaxConjunctionScorer extends Scorer {
@Override @Override
public float score() throws IOException { public float score() throws IOException {
double score = 0;
for (Scorer scorer : scorers) {
score += scorer.score();
}
return (float) score; return (float) score;
} }
@ -257,5 +246,4 @@ final class BlockMaxConjunctionScorer extends Scorer {
} }
return children; return children;
} }
} }

View File

@ -40,6 +40,14 @@ public class TestBlockMaxConjunction extends LuceneTestCase {
return query; return query;
} }
private Query maybeWrapTwoPhase(Query query) {
if (random().nextBoolean()) {
query = new RandomApproximationQuery(query, random());
query = new AssertingQuery(random(), query);
}
return query;
}
public void testRandom() throws IOException { public void testRandom() throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
@ -75,6 +83,18 @@ public class TestBlockMaxConjunction extends LuceneTestCase {
.build(); .build();
CheckHits.checkTopScores(random(), filteredQuery, searcher); CheckHits.checkTopScores(random(), filteredQuery, searcher);
builder = new BooleanQuery.Builder();
for (int i = 0; i < numClauses; ++i) {
builder.add(maybeWrapTwoPhase(new TermQuery(new Term("foo", Integer.toString(start + i)))), Occur.MUST);
}
Query twoPhaseQuery = new BooleanQuery.Builder()
.add(query, Occur.MUST)
.add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
.build();
CheckHits.checkTopScores(random(), twoPhaseQuery, searcher);
} }
reader.close(); reader.close();
dir.close(); dir.close();