Replace docBase with actual docId in MaxScoreAccumulator (#13777)

We have been encoding docBase and the score in MaxScoreAccumulator#accumulate.
That makes the assumption that segments are going to be processed in doc order
and implements global max score accounting across segments searched concurrently.

With the introduction of intra-segment concurrency, the same segment may be seen
multiple times, once per segment partition. Partitions are all going to have the same
docBase, hence you may end up with topN results with higher docIds than expected,
because the search early terminates before docs with same score and lower doc ids
are seen.

This commit encodes the docId in the accumulator in place of the docBase to resolve
the described issue.
This commit is contained in:
Luca Cavanna 2024-09-13 08:58:10 +02:00 committed by GitHub
parent 5045d3c67b
commit fe64b04fda
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 28 deletions

View File

@ -51,9 +51,9 @@ final class MaxScoreAccumulator {
return v2; return v2;
} }
void accumulate(int docBase, float score) { void accumulate(int docId, float score) {
assert docBase >= 0 && score >= 0; assert docId >= 0 && score >= 0;
long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase; long encode = (((long) Float.floatToIntBits(score)) << 32) | docId;
acc.accumulate(encode); acc.accumulate(encode);
} }
@ -63,24 +63,18 @@ final class MaxScoreAccumulator {
return null; return null;
} }
float score = Float.intBitsToFloat((int) (value >> 32)); float score = Float.intBitsToFloat((int) (value >> 32));
int docBase = (int) value; int docId = (int) value;
return new DocAndScore(docBase, score); return new DocAndScore(docId, score);
} }
record DocAndScore(int docBase, float score) implements Comparable<DocAndScore> { record DocAndScore(int docId, float score) implements Comparable<DocAndScore> {
@Override @Override
public int compareTo(DocAndScore o) { public int compareTo(DocAndScore o) {
int cmp = Float.compare(score, o.score); int cmp = Float.compare(score, o.score);
if (cmp == 0) { if (cmp == 0) {
// tie-break on the minimum doc base // tie-break on doc id, lower id has the priority
// For a given minimum competitive score, we want to know the first segment return Integer.compare(o.docId, docId);
// where this score occurred, hence the reverse order here.
// On segments with a lower docBase, any document whose score is greater
// than or equal to this score would be competitive, while on segments with a
// higher docBase, documents need to have a strictly greater score to be
// competitive since we tie break on doc ID.
return Integer.compare(o.docBase, docBase);
} }
return cmp; return cmp;
} }

View File

@ -232,7 +232,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
// the next float if the global minimum score is set on a document id that is // the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf // smaller than the ids in the current leaf
float score = float score =
docBase >= maxMinScore.docBase() ? Math.nextUp(maxMinScore.score()) : maxMinScore.score(); docBase >= maxMinScore.docId() ? Math.nextUp(maxMinScore.score()) : maxMinScore.score();
if (score > minCompetitiveScore) { if (score > minCompetitiveScore) {
assert hitsThresholdChecker.isThresholdReached(); assert hitsThresholdChecker.isThresholdReached();
scorer.setMinCompetitiveScore(score); scorer.setMinCompetitiveScore(score);
@ -254,10 +254,9 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
minCompetitiveScore = localMinScore; minCompetitiveScore = localMinScore;
if (minScoreAcc != null) { if (minScoreAcc != null) {
// we don't use the next float but we register the document // we don't use the next float but we register the document id so that other leaves or
// id so that other leaves can require it if they are after // leaf partitions can require it if they are after the current maximum
// the current maximum minScoreAcc.accumulate(pqTop.doc, pqTop.score);
minScoreAcc.accumulate(docBase, pqTop.score);
} }
} }
} }

View File

@ -24,28 +24,28 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
MaxScoreAccumulator acc = new MaxScoreAccumulator(); MaxScoreAccumulator acc = new MaxScoreAccumulator();
acc.accumulate(0, 0f); acc.accumulate(0, 0f);
assertEquals(0f, acc.get().score(), 0); assertEquals(0f, acc.get().score(), 0);
assertEquals(0, acc.get().docBase(), 0); assertEquals(0, acc.get().docId(), 0);
acc.accumulate(10, 0f); acc.accumulate(10, 0f);
assertEquals(0f, acc.get().score(), 0); assertEquals(0f, acc.get().score(), 0);
assertEquals(0, acc.get().docBase(), 0); assertEquals(0, acc.get().docId(), 0);
acc.accumulate(100, 1000f); acc.accumulate(100, 1000f);
assertEquals(1000f, acc.get().score(), 0); assertEquals(1000f, acc.get().score(), 0);
assertEquals(100, acc.get().docBase(), 0); assertEquals(100, acc.get().docId(), 0);
acc.accumulate(1000, 5f); acc.accumulate(1000, 5f);
assertEquals(1000f, acc.get().score(), 0); assertEquals(1000f, acc.get().score(), 0);
assertEquals(100, acc.get().docBase(), 0); assertEquals(100, acc.get().docId(), 0);
acc.accumulate(99, 1000f); acc.accumulate(99, 1000f);
assertEquals(1000f, acc.get().score(), 0); assertEquals(1000f, acc.get().score(), 0);
assertEquals(99, acc.get().docBase(), 0); assertEquals(99, acc.get().docId(), 0);
acc.accumulate(1000, 1001f); acc.accumulate(1000, 1001f);
assertEquals(1001f, acc.get().score(), 0); assertEquals(1001f, acc.get().score(), 0);
assertEquals(1000, acc.get().docBase(), 0); assertEquals(1000, acc.get().docId(), 0);
acc.accumulate(10, 1001f); acc.accumulate(10, 1001f);
assertEquals(1001f, acc.get().score(), 0); assertEquals(1001f, acc.get().score(), 0);
assertEquals(10, acc.get().docBase(), 0); assertEquals(10, acc.get().docId(), 0);
acc.accumulate(100, 1001f); acc.accumulate(100, 1001f);
assertEquals(1001f, acc.get().score(), 0); assertEquals(1001f, acc.get().score(), 0);
assertEquals(10, acc.get().docBase(), 0); assertEquals(10, acc.get().docId(), 0);
} }
public void testRandom() { public void testRandom() {
@ -56,7 +56,7 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
for (int i = 0; i < numDocs; i++) { for (int i = 0; i < numDocs; i++) {
MaxScoreAccumulator.DocAndScore res = MaxScoreAccumulator.DocAndScore res =
new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat()); new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
acc.accumulate(res.docBase(), res.score()); acc.accumulate(res.docId(), res.score());
if (res.compareTo(max) > 0) { if (res.compareTo(max) > 0) {
max = res; max = res;
} }