Remove scoreAll() optimization from DefaultBulkScorer.

I cannot see benefits from this optimization anymore when running luceneutil.
However, I do see some benefits from specializing cases when the collector
produces a competitive iterator or when the scorer produces a two-phase
iterator.
This commit is contained in:
Adrien Grand 2024-12-04 16:54:31 +01:00
parent 6c48b404cd
commit dadd548936
1 changed files with 89 additions and 83 deletions

View File

@ -234,12 +234,13 @@ public abstract class Weight implements SegmentCacheable {
/** Sole constructor. */ /** Sole constructor. */
public DefaultBulkScorer(Scorer scorer) { public DefaultBulkScorer(Scorer scorer) {
if (scorer == null) { this.scorer = Objects.requireNonNull(scorer);
throw new NullPointerException();
}
this.scorer = scorer;
this.iterator = scorer.iterator();
this.twoPhase = scorer.twoPhaseIterator(); this.twoPhase = scorer.twoPhaseIterator();
if (twoPhase == null) {
this.iterator = scorer.iterator();
} else {
this.iterator = twoPhase.approximation();
}
} }
@Override @Override
@ -251,36 +252,8 @@ public abstract class Weight implements SegmentCacheable {
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
throws IOException { throws IOException {
collector.setScorer(scorer); collector.setScorer(scorer);
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
DocIdSetIterator competitiveIterator = collector.competitiveIterator(); DocIdSetIterator competitiveIterator = collector.competitiveIterator();
if (competitiveIterator == null
&& scorerIterator.docID() == -1
&& min == 0
&& max == DocIdSetIterator.NO_MORE_DOCS) {
scoreAll(collector, scorerIterator, twoPhase, acceptDocs);
return DocIdSetIterator.NO_MORE_DOCS;
} else {
return scoreRange(
collector, scorerIterator, twoPhase, competitiveIterator, acceptDocs, min, max);
}
}
/**
* Specialized method to bulk-score a range of hits; we separate this from {@link #scoreAll} to
* help out hotspot. See <a
* href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a>
*/
static int scoreRange(
LeafCollector collector,
DocIdSetIterator iterator,
TwoPhaseIterator twoPhase,
DocIdSetIterator competitiveIterator,
Bits acceptDocs,
int min,
int max)
throws IOException {
if (competitiveIterator != null) { if (competitiveIterator != null) {
if (competitiveIterator.docID() > min) { if (competitiveIterator.docID() > min) {
min = competitiveIterator.docID(); min = competitiveIterator.docID();
@ -289,75 +262,108 @@ public abstract class Weight implements SegmentCacheable {
} }
} }
int doc = iterator.docID(); if (iterator.docID() < min) {
if (doc < min) { if (iterator.docID() == min - 1) {
if (doc == min - 1) { iterator.nextDoc();
doc = iterator.nextDoc();
} else { } else {
doc = iterator.advance(min); iterator.advance(min);
} }
} }
// These various specializations help save some null checks in a hot loop, but as importantly
// if not more importantly, they help reduce the polymorphism of calls sites to nextDoc() and
// collect() because only a subset of collectors produce a competitive iterator, and the set
// of implementing classes for two-phase approximations is smaller than the set of doc id set
// iterator implementations.
if (twoPhase == null && competitiveIterator == null) { if (twoPhase == null && competitiveIterator == null) {
// Optimize simple iterators with collectors that can't skip // Optimize simple iterators with collectors that can't skip
while (doc < max) { scoreIterator(collector, acceptDocs, iterator, max);
if (acceptDocs == null || acceptDocs.get(doc)) { } else if (competitiveIterator == null) {
collector.collect(doc); scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max);
} } else if (twoPhase == null) {
doc = iterator.nextDoc(); scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max);
}
} else { } else {
while (doc < max) { scoreTwoPhaseOrCompetitiveIterator(
if (competitiveIterator != null) { collector, acceptDocs, iterator, twoPhase, competitiveIterator, max);
assert competitiveIterator.docID() <= doc;
if (competitiveIterator.docID() < doc) {
competitiveIterator.advance(doc);
}
if (competitiveIterator.docID() != doc) {
doc = iterator.advance(competitiveIterator.docID());
continue;
}
}
if ((acceptDocs == null || acceptDocs.get(doc))
&& (twoPhase == null || twoPhase.matches())) {
collector.collect(doc);
}
doc = iterator.nextDoc();
}
} }
return doc; return iterator.docID();
} }
/** private static void scoreIterator(
* Specialized method to bulk-score all hits; we separate this from {@link #scoreRange} to help LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max)
* out hotspot. See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> throws IOException {
*/ for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
static void scoreAll( if (acceptDocs == null || acceptDocs.get(doc)) {
collector.collect(doc);
}
}
}
private static void scoreTwoPhaseIterator(
LeafCollector collector, LeafCollector collector,
Bits acceptDocs,
DocIdSetIterator iterator, DocIdSetIterator iterator,
TwoPhaseIterator twoPhase, TwoPhaseIterator twoPhase,
Bits acceptDocs) int max)
throws IOException { throws IOException {
if (twoPhase == null) { for (int doc = iterator.docID(); doc < max; ) {
for (int doc = iterator.nextDoc(); if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
doc != DocIdSetIterator.NO_MORE_DOCS; collector.collect(doc);
doc = iterator.nextDoc()) { }
if (acceptDocs == null || acceptDocs.get(doc)) {
collector.collect(doc); doc = iterator.nextDoc();
}
}
private static void scoreCompetitiveIterator(
LeafCollector collector,
Bits acceptDocs,
DocIdSetIterator iterator,
DocIdSetIterator competitiveIterator,
int max)
throws IOException {
for (int doc = iterator.docID(); doc < max; ) {
assert competitiveIterator.docID() <= doc; // invariant
if (competitiveIterator.docID() < doc) {
int competitiveNext = competitiveIterator.advance(doc);
if (competitiveNext != doc) {
doc = iterator.advance(competitiveNext);
continue;
} }
} }
} else {
// The scorer has an approximation, so run the approximation first, then check acceptDocs, if ((acceptDocs == null || acceptDocs.get(doc))) {
// then confirm collector.collect(doc);
for (int doc = iterator.nextDoc(); }
doc != DocIdSetIterator.NO_MORE_DOCS;
doc = iterator.nextDoc()) { doc = iterator.nextDoc();
if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) { }
collector.collect(doc); }
private static void scoreTwoPhaseOrCompetitiveIterator(
LeafCollector collector,
Bits acceptDocs,
DocIdSetIterator iterator,
TwoPhaseIterator twoPhase,
DocIdSetIterator competitiveIterator,
int max)
throws IOException {
for (int doc = iterator.docID(); doc < max; ) {
assert competitiveIterator.docID() <= doc; // invariant
if (competitiveIterator.docID() < doc) {
int competitiveNext = competitiveIterator.advance(doc);
if (competitiveNext != doc) {
doc = iterator.advance(competitiveNext);
continue;
} }
} }
if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
collector.collect(doc);
}
doc = iterator.nextDoc();
} }
} }
} }