Reduce the compiled size of the collect() method on `TopScoreDocCollector`. (#13939)

This comes from observations on https://tantivy-search.github.io/bench/ for
exhaustive evaluation like `TOP_100_COUNT`. `collect()` is often inlined, but
other methods that we'd like to see inlined like `PostingsEnum#nextDoc()` are
not always inlined. This PR decreases the compiled size of `collect()` to make
more room for other methods to be inlined.

It does so by moving an assertion to `AssertingScorable` and extracting an
uncommon code path to a method.
This commit is contained in:
Adrien Grand 2024-10-22 16:32:28 +02:00 committed by GitHub
parent a779a64d7b
commit f8ea130514
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 31 additions and 18 deletions

View File

@ -70,17 +70,16 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector relies on the fact that scorers produce positive values:
assert score >= 0; // NOTE: false for NaN
totalHits++;
int hitCountSoFar = ++totalHits;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (score <= pqTop.score) {
// Note: for queries that match lots of hits, this is the common case: most hits are not
// competitive.
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
@ -89,8 +88,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
return;
} else {
collectCompetitiveHit(doc, score);
}
}
private void collectCompetitiveHit(int doc, float score) throws IOException {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
@ -103,7 +106,6 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
static class PagingTopScoreDocCollector extends TopScoreDocCollector {
private final ScoreDoc after;
private int collectedHits;
PagingTopScoreDocCollector(
int numHits,
@ -112,12 +114,19 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
MaxScoreAccumulator minScoreAcc) {
super(numHits, hitsThresholdChecker, minScoreAcc);
this.after = after;
this.collectedHits = 0;
}
@Override
protected int topDocsSize() {
return collectedHits < pq.size() ? collectedHits : pq.size();
// Note: this relies on sentinel values having Integer.MAX_VALUE as a doc ID.
int[] validTopHitCount = new int[1];
pq.forEach(
scoreDoc -> {
if (scoreDoc.doc != Integer.MAX_VALUE) {
validTopHitCount[0]++;
}
});
return validTopHitCount[0];
}
@Override
@ -148,17 +157,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector relies on the fact that scorers produce positive values:
assert score >= 0; // NOTE: false for NaN
totalHits++;
int hitCountSoFar = ++totalHits;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (score > after.score || (score == after.score && doc <= afterDoc)) {
float afterScore = after.score;
if (score > afterScore || (score == afterScore && doc <= afterDoc)) {
// hit was collected on a previous page
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
@ -169,6 +176,8 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
}
if (score <= pqTop.score) {
// Note: for queries that match lots of hits, this is the common case: most hits are not
// competitive.
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
@ -178,9 +187,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
return;
} else {
collectCompetitiveHit(doc, score);
}
collectedHits++;
}
private void collectCompetitiveHit(int doc, float score) throws IOException {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();

View File

@ -33,7 +33,8 @@ public class AssertingScorable extends FilterScorable {
@Override
public float score() throws IOException {
final float score = in.score();
assert !Float.isNaN(score) : "NaN score for in=" + in;
// Note: score >= 0 returns false for NaN
assert score >= 0 : "score=" + score + " for in=" + in;
return score;
}