Speedup MaxScoreCache.computeMaxScore (#13865)

This shows up as allocating tens of GB for iterators in the nightly
benchmarks. We should go the zero-allocation route for RandomAccess
lists, which I'd expect 100% of them will be here for a bit of a speedup.
This commit is contained in:
Armin Braun 2024-10-07 14:39:22 +02:00 committed by GitHub
parent 486b3b4e4a
commit 0fb7f9f89d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 3 deletions

View File

@ -40,7 +40,8 @@ public abstract class Impacts {
/** /**
* Return impacts on the given level. These impacts are sorted by increasing frequency and * Return impacts on the given level. These impacts are sorted by increasing frequency and
* increasing unsigned norm, and only valid until the doc ID returned by {@link * increasing unsigned norm, and only valid until the doc ID returned by {@link
* #getDocIdUpTo(int)} for the same level, included. The returned list is never empty. NOTE: There * #getDocIdUpTo(int)} for the same level, included. The returned list is never empty and should
* implement {@link java.util.RandomAccess} if it contains more than a single element. NOTE: There
* is no guarantee that these impacts actually appear in postings, only that they trigger scores * is no guarantee that these impacts actually appear in postings, only that they trigger scores
* that are greater than or equal to the impacts that actually appear in postings. * that are greater than or equal to the impacts that actually appear in postings.
*/ */

View File

@ -71,7 +71,9 @@ public final class MaxScoreCache {
private float computeMaxScore(List<Impact> impacts) { private float computeMaxScore(List<Impact> impacts) {
float maxScore = 0; float maxScore = 0;
for (Impact impact : impacts) { var scorer = this.scorer;
for (int i = 0, length = impacts.size(); i < length; i++) {
Impact impact = impacts.get(i);
maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore); maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
} }
return maxScore; return maxScore;

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.RandomAccess;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesSkipIndexType;
@ -700,7 +701,10 @@ public class AssertingLeafReader extends FilterLeafReader {
public List<Impact> getImpacts(int level) { public List<Impact> getImpacts(int level) {
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget)
: "Cannot reuse impacts after advancing the iterator"; : "Cannot reuse impacts after advancing the iterator";
return in.getImpacts(level); List<Impact> impacts = in.getImpacts(level);
assert impacts.size() <= 1 || impacts instanceof RandomAccess
: "impact lists longer than 1 should implement RandomAccess but saw impacts = " + impacts;
return impacts;
} }
} }