Speedup MaxScoreCache.computeMaxScore (#13865)

This shows up as allocating tens of GB for iterators in the nightly
benchmarks. We should go the zero-allocation route for RandomAccess
lists, which I'd expect 100% of them will be here for a bit of a speedup.
This commit is contained in:
Armin Braun 2024-10-07 14:39:22 +02:00 committed by GitHub
parent 486b3b4e4a
commit 0fb7f9f89d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 3 deletions

View File

@ -40,7 +40,8 @@ public abstract class Impacts {
/**
* Return impacts on the given level. These impacts are sorted by increasing frequency and
* increasing unsigned norm, and only valid until the doc ID returned by {@link
* #getDocIdUpTo(int)} for the same level, included. The returned list is never empty. NOTE: There
* #getDocIdUpTo(int)} for the same level, included. The returned list is never empty and should
* implement {@link java.util.RandomAccess} if it contains more than a single element. NOTE: There
* is no guarantee that these impacts actually appear in postings, only that they trigger scores
* that are greater than or equal to the impacts that actually appear in postings.
*/

View File

@ -71,7 +71,9 @@ public final class MaxScoreCache {
private float computeMaxScore(List<Impact> impacts) {
float maxScore = 0;
for (Impact impact : impacts) {
var scorer = this.scorer;
for (int i = 0, length = impacts.size(); i < length; i++) {
Impact impact = impacts.get(i);
maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
}
return maxScore;

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.RandomAccess;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
@ -700,7 +701,10 @@ public class AssertingLeafReader extends FilterLeafReader {
public List<Impact> getImpacts(int level) {
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget)
: "Cannot reuse impacts after advancing the iterator";
return in.getImpacts(level);
List<Impact> impacts = in.getImpacts(level);
assert impacts.size() <= 1 || impacts instanceof RandomAccess
: "impact lists longer than 1 should implement RandomAccess but saw impacts = " + impacts;
return impacts;
}
}