mirror of https://github.com/apache/lucene.git
Reduce the compiled size of the collect() method on `TopScoreDocCollector`. (#13939)
This comes from observations on https://tantivy-search.github.io/bench/ for exhaustive evaluation like `TOP_100_COUNT`. `collect()` is often inlined, but other methods that we'd like to see inlined like `PostingsEnum#nextDoc()` are not always inlined. This PR decreases the compiled size of `collect()` to make more room for other methods to be inlined. It does so by moving an assertion to `AssertingScorable` and extracting an uncommon code path to a method.
This commit is contained in:
parent
a779a64d7b
commit
f8ea130514
|
@ -70,17 +70,16 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
public void collect(int doc) throws IOException {
|
||||
float score = scorer.score();
|
||||
|
||||
// This collector relies on the fact that scorers produce positive values:
|
||||
assert score >= 0; // NOTE: false for NaN
|
||||
|
||||
totalHits++;
|
||||
int hitCountSoFar = ++totalHits;
|
||||
hitsThresholdChecker.incrementHitCount();
|
||||
|
||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (score <= pqTop.score) {
|
||||
// Note: for queries that match lots of hits, this is the common case: most hits are not
|
||||
// competitive.
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
|
@ -89,8 +88,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
// Since docs are returned in-order (i.e., increasing doc Id), a document
|
||||
// with equal score to pqTop.score cannot compete since HitQueue favors
|
||||
// documents with lower doc Ids. Therefore reject those docs too.
|
||||
return;
|
||||
} else {
|
||||
collectCompetitiveHit(doc, score);
|
||||
}
|
||||
}
|
||||
|
||||
private void collectCompetitiveHit(int doc, float score) throws IOException {
|
||||
pqTop.doc = doc + docBase;
|
||||
pqTop.score = score;
|
||||
pqTop = pq.updateTop();
|
||||
|
@ -103,7 +106,6 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
static class PagingTopScoreDocCollector extends TopScoreDocCollector {
|
||||
|
||||
private final ScoreDoc after;
|
||||
private int collectedHits;
|
||||
|
||||
PagingTopScoreDocCollector(
|
||||
int numHits,
|
||||
|
@ -112,12 +114,19 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
MaxScoreAccumulator minScoreAcc) {
|
||||
super(numHits, hitsThresholdChecker, minScoreAcc);
|
||||
this.after = after;
|
||||
this.collectedHits = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int topDocsSize() {
|
||||
return collectedHits < pq.size() ? collectedHits : pq.size();
|
||||
// Note: this relies on sentinel values having Integer.MAX_VALUE as a doc ID.
|
||||
int[] validTopHitCount = new int[1];
|
||||
pq.forEach(
|
||||
scoreDoc -> {
|
||||
if (scoreDoc.doc != Integer.MAX_VALUE) {
|
||||
validTopHitCount[0]++;
|
||||
}
|
||||
});
|
||||
return validTopHitCount[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -148,17 +157,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
public void collect(int doc) throws IOException {
|
||||
float score = scorer.score();
|
||||
|
||||
// This collector relies on the fact that scorers produce positive values:
|
||||
assert score >= 0; // NOTE: false for NaN
|
||||
|
||||
totalHits++;
|
||||
int hitCountSoFar = ++totalHits;
|
||||
hitsThresholdChecker.incrementHitCount();
|
||||
|
||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (score > after.score || (score == after.score && doc <= afterDoc)) {
|
||||
float afterScore = after.score;
|
||||
if (score > afterScore || (score == afterScore && doc <= afterDoc)) {
|
||||
// hit was collected on a previous page
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
|
@ -169,6 +176,8 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
}
|
||||
|
||||
if (score <= pqTop.score) {
|
||||
// Note: for queries that match lots of hits, this is the common case: most hits are not
|
||||
// competitive.
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
|
@ -178,9 +187,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
// Since docs are returned in-order (i.e., increasing doc Id), a document
|
||||
// with equal score to pqTop.score cannot compete since HitQueue favors
|
||||
// documents with lower doc Ids. Therefore reject those docs too.
|
||||
return;
|
||||
} else {
|
||||
collectCompetitiveHit(doc, score);
|
||||
}
|
||||
collectedHits++;
|
||||
}
|
||||
|
||||
private void collectCompetitiveHit(int doc, float score) throws IOException {
|
||||
pqTop.doc = doc + docBase;
|
||||
pqTop.score = score;
|
||||
pqTop = pq.updateTop();
|
||||
|
|
|
@ -33,7 +33,8 @@ public class AssertingScorable extends FilterScorable {
|
|||
@Override
|
||||
public float score() throws IOException {
|
||||
final float score = in.score();
|
||||
assert !Float.isNaN(score) : "NaN score for in=" + in;
|
||||
// Note: score >= 0 returns false for NaN
|
||||
assert score >= 0 : "score=" + score + " for in=" + in;
|
||||
return score;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue