From ff03379eb6eb0fb7a4a14ea12e07cd60f15439c6 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 22 Oct 2024 16:32:28 +0200 Subject: [PATCH] Reduce the compiled size of the collect() method on `TopScoreDocCollector`. (#13939) This comes from observations on https://tantivy-search.github.io/bench/ for exhaustive evaluation like `TOP_100_COUNT`. `collect()` is often inlined, but other methods that we'd like to see inlined like `PostingsEnum#nextDoc()` are not always inlined. This PR decreases the compiled size of `collect()` to make more room for other methods to be inlined. It does so by moving an assertion to `AssertingScorable` and extracting an uncommon code path to a method. --- .../lucene/search/TopScoreDocCollector.java | 46 ++++++++++++------- .../tests/search/AssertingScorable.java | 3 +- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 3469276982b..1524036b20d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -70,17 +70,16 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { public void collect(int doc) throws IOException { float score = scorer.score(); - // This collector relies on the fact that scorers produce positive values: - assert score >= 0; // NOTE: false for NaN - - totalHits++; + int hitCountSoFar = ++totalHits; hitsThresholdChecker.incrementHitCount(); - if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) { + if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) { updateGlobalMinCompetitiveScore(scorer); } if (score <= pqTop.score) { + // Note: for queries that match lots of hits, this is the common case: most hits are not + // competitive. if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) { // we just reached totalHitsThreshold, we can start setting the min // competitive score now @@ -89,8 +88,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { // Since docs are returned in-order (i.e., increasing doc Id), a document // with equal score to pqTop.score cannot compete since HitQueue favors // documents with lower doc Ids. Therefore reject those docs too. - return; + } else { + collectCompetitiveHit(doc, score); } + } + + private void collectCompetitiveHit(int doc, float score) throws IOException { pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); @@ -103,7 +106,6 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { static class PagingTopScoreDocCollector extends TopScoreDocCollector { private final ScoreDoc after; - private int collectedHits; PagingTopScoreDocCollector( int numHits, @@ -112,12 +114,19 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { MaxScoreAccumulator minScoreAcc) { super(numHits, hitsThresholdChecker, minScoreAcc); this.after = after; - this.collectedHits = 0; } @Override protected int topDocsSize() { - return collectedHits < pq.size() ? collectedHits : pq.size(); + // Note: this relies on sentinel values having Integer.MAX_VALUE as a doc ID. + int[] validTopHitCount = new int[1]; + pq.forEach( + scoreDoc -> { + if (scoreDoc.doc != Integer.MAX_VALUE) { + validTopHitCount[0]++; + } + }); + return validTopHitCount[0]; } @Override @@ -148,17 +157,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { public void collect(int doc) throws IOException { float score = scorer.score(); - // This collector relies on the fact that scorers produce positive values: - assert score >= 0; // NOTE: false for NaN - - totalHits++; + int hitCountSoFar = ++totalHits; hitsThresholdChecker.incrementHitCount(); - if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) { + if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) { updateGlobalMinCompetitiveScore(scorer); } - if (score > after.score || (score == after.score && doc <= afterDoc)) { + float afterScore = after.score; + if (score > afterScore || (score == afterScore && doc <= afterDoc)) { // hit was collected on a previous page if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) { // we just reached totalHitsThreshold, we can start setting the min @@ -169,6 +176,8 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { } if (score <= pqTop.score) { + // Note: for queries that match lots of hits, this is the common case: most hits are not + // competitive. if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) { // we just reached totalHitsThreshold, we can start setting the min // competitive score now @@ -178,9 +187,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { // Since docs are returned in-order (i.e., increasing doc Id), a document // with equal score to pqTop.score cannot compete since HitQueue favors // documents with lower doc Ids. Therefore reject those docs too. - return; + } else { + collectCompetitiveHit(doc, score); } - collectedHits++; + } + + private void collectCompetitiveHit(int doc, float score) throws IOException { pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/search/AssertingScorable.java b/lucene/test-framework/src/java/org/apache/lucene/tests/search/AssertingScorable.java index 3a87d4c0f1a..2f3d604ad25 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/search/AssertingScorable.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/search/AssertingScorable.java @@ -33,7 +33,8 @@ public class AssertingScorable extends FilterScorable { @Override public float score() throws IOException { final float score = in.score(); - assert !Float.isNaN(score) : "NaN score for in=" + in; + // Note: score >= 0 returns false for NaN + assert score >= 0 : "score=" + score + " for in=" + in; return score; }