mirror of https://github.com/apache/lucene.git
Replace docBase with actual docId in MaxScoreAccumulator (#13777)
We have been encoding docBase and the score in MaxScoreAccumulator#accumulate. That makes the assumption that segments are going to be processed in doc order and implements global max score accounting across segments searched concurrently. With the introduction of intra-segment concurrency, the same segment may be seen multiple times, once per segment partition. Partitions are all going to have the same docBase, hence you may end up with topN results with higher docIds than expected, because the search early terminates before docs with same score and lower doc ids are seen. This commit encodes the docId in the accumulator in place of the docBase to resolve the described issue.
This commit is contained in:
parent
5045d3c67b
commit
fe64b04fda
|
@ -51,9 +51,9 @@ final class MaxScoreAccumulator {
|
|||
return v2;
|
||||
}
|
||||
|
||||
void accumulate(int docBase, float score) {
|
||||
assert docBase >= 0 && score >= 0;
|
||||
long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase;
|
||||
void accumulate(int docId, float score) {
|
||||
assert docId >= 0 && score >= 0;
|
||||
long encode = (((long) Float.floatToIntBits(score)) << 32) | docId;
|
||||
acc.accumulate(encode);
|
||||
}
|
||||
|
||||
|
@ -63,24 +63,18 @@ final class MaxScoreAccumulator {
|
|||
return null;
|
||||
}
|
||||
float score = Float.intBitsToFloat((int) (value >> 32));
|
||||
int docBase = (int) value;
|
||||
return new DocAndScore(docBase, score);
|
||||
int docId = (int) value;
|
||||
return new DocAndScore(docId, score);
|
||||
}
|
||||
|
||||
record DocAndScore(int docBase, float score) implements Comparable<DocAndScore> {
|
||||
record DocAndScore(int docId, float score) implements Comparable<DocAndScore> {
|
||||
|
||||
@Override
|
||||
public int compareTo(DocAndScore o) {
|
||||
int cmp = Float.compare(score, o.score);
|
||||
if (cmp == 0) {
|
||||
// tie-break on the minimum doc base
|
||||
// For a given minimum competitive score, we want to know the first segment
|
||||
// where this score occurred, hence the reverse order here.
|
||||
// On segments with a lower docBase, any document whose score is greater
|
||||
// than or equal to this score would be competitive, while on segments with a
|
||||
// higher docBase, documents need to have a strictly greater score to be
|
||||
// competitive since we tie break on doc ID.
|
||||
return Integer.compare(o.docBase, docBase);
|
||||
// tie-break on doc id, lower id has the priority
|
||||
return Integer.compare(o.docId, docId);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
|
|
@ -232,7 +232,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
// the next float if the global minimum score is set on a document id that is
|
||||
// smaller than the ids in the current leaf
|
||||
float score =
|
||||
docBase >= maxMinScore.docBase() ? Math.nextUp(maxMinScore.score()) : maxMinScore.score();
|
||||
docBase >= maxMinScore.docId() ? Math.nextUp(maxMinScore.score()) : maxMinScore.score();
|
||||
if (score > minCompetitiveScore) {
|
||||
assert hitsThresholdChecker.isThresholdReached();
|
||||
scorer.setMinCompetitiveScore(score);
|
||||
|
@ -254,10 +254,9 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
minCompetitiveScore = localMinScore;
|
||||
if (minScoreAcc != null) {
|
||||
// we don't use the next float but we register the document
|
||||
// id so that other leaves can require it if they are after
|
||||
// the current maximum
|
||||
minScoreAcc.accumulate(docBase, pqTop.score);
|
||||
// we don't use the next float but we register the document id so that other leaves or
|
||||
// leaf partitions can require it if they are after the current maximum
|
||||
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,28 +24,28 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
|
|||
MaxScoreAccumulator acc = new MaxScoreAccumulator();
|
||||
acc.accumulate(0, 0f);
|
||||
assertEquals(0f, acc.get().score(), 0);
|
||||
assertEquals(0, acc.get().docBase(), 0);
|
||||
assertEquals(0, acc.get().docId(), 0);
|
||||
acc.accumulate(10, 0f);
|
||||
assertEquals(0f, acc.get().score(), 0);
|
||||
assertEquals(0, acc.get().docBase(), 0);
|
||||
assertEquals(0, acc.get().docId(), 0);
|
||||
acc.accumulate(100, 1000f);
|
||||
assertEquals(1000f, acc.get().score(), 0);
|
||||
assertEquals(100, acc.get().docBase(), 0);
|
||||
assertEquals(100, acc.get().docId(), 0);
|
||||
acc.accumulate(1000, 5f);
|
||||
assertEquals(1000f, acc.get().score(), 0);
|
||||
assertEquals(100, acc.get().docBase(), 0);
|
||||
assertEquals(100, acc.get().docId(), 0);
|
||||
acc.accumulate(99, 1000f);
|
||||
assertEquals(1000f, acc.get().score(), 0);
|
||||
assertEquals(99, acc.get().docBase(), 0);
|
||||
assertEquals(99, acc.get().docId(), 0);
|
||||
acc.accumulate(1000, 1001f);
|
||||
assertEquals(1001f, acc.get().score(), 0);
|
||||
assertEquals(1000, acc.get().docBase(), 0);
|
||||
assertEquals(1000, acc.get().docId(), 0);
|
||||
acc.accumulate(10, 1001f);
|
||||
assertEquals(1001f, acc.get().score(), 0);
|
||||
assertEquals(10, acc.get().docBase(), 0);
|
||||
assertEquals(10, acc.get().docId(), 0);
|
||||
acc.accumulate(100, 1001f);
|
||||
assertEquals(1001f, acc.get().score(), 0);
|
||||
assertEquals(10, acc.get().docBase(), 0);
|
||||
assertEquals(10, acc.get().docId(), 0);
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
|
@ -56,7 +56,7 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
|
|||
for (int i = 0; i < numDocs; i++) {
|
||||
MaxScoreAccumulator.DocAndScore res =
|
||||
new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
|
||||
acc.accumulate(res.docBase(), res.score());
|
||||
acc.accumulate(res.docId(), res.score());
|
||||
if (res.compareTo(max) > 0) {
|
||||
max = res;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue