mirror of https://github.com/apache/lucene.git
Replace docBase with actual docId in MaxScoreAccumulator (#13777)
We have been encoding docBase and the score in MaxScoreAccumulator#accumulate. That makes the assumption that segments are going to be processed in doc order and implements global max score accounting across segments searched concurrently. With the introduction of intra-segment concurrency, the same segment may be seen multiple times, once per segment partition. Partitions are all going to have the same docBase, hence you may end up with topN results with higher docIds than expected, because the search early terminates before docs with same score and lower doc ids are seen. This commit encodes the docId in the accumulator in place of the docBase to resolve the described issue.
This commit is contained in:
parent
5045d3c67b
commit
fe64b04fda
|
@ -51,9 +51,9 @@ final class MaxScoreAccumulator {
|
||||||
return v2;
|
return v2;
|
||||||
}
|
}
|
||||||
|
|
||||||
void accumulate(int docBase, float score) {
|
void accumulate(int docId, float score) {
|
||||||
assert docBase >= 0 && score >= 0;
|
assert docId >= 0 && score >= 0;
|
||||||
long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase;
|
long encode = (((long) Float.floatToIntBits(score)) << 32) | docId;
|
||||||
acc.accumulate(encode);
|
acc.accumulate(encode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,24 +63,18 @@ final class MaxScoreAccumulator {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
float score = Float.intBitsToFloat((int) (value >> 32));
|
float score = Float.intBitsToFloat((int) (value >> 32));
|
||||||
int docBase = (int) value;
|
int docId = (int) value;
|
||||||
return new DocAndScore(docBase, score);
|
return new DocAndScore(docId, score);
|
||||||
}
|
}
|
||||||
|
|
||||||
record DocAndScore(int docBase, float score) implements Comparable<DocAndScore> {
|
record DocAndScore(int docId, float score) implements Comparable<DocAndScore> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(DocAndScore o) {
|
public int compareTo(DocAndScore o) {
|
||||||
int cmp = Float.compare(score, o.score);
|
int cmp = Float.compare(score, o.score);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
// tie-break on the minimum doc base
|
// tie-break on doc id, lower id has the priority
|
||||||
// For a given minimum competitive score, we want to know the first segment
|
return Integer.compare(o.docId, docId);
|
||||||
// where this score occurred, hence the reverse order here.
|
|
||||||
// On segments with a lower docBase, any document whose score is greater
|
|
||||||
// than or equal to this score would be competitive, while on segments with a
|
|
||||||
// higher docBase, documents need to have a strictly greater score to be
|
|
||||||
// competitive since we tie break on doc ID.
|
|
||||||
return Integer.compare(o.docBase, docBase);
|
|
||||||
}
|
}
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
|
@ -232,7 +232,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
||||||
// the next float if the global minimum score is set on a document id that is
|
// the next float if the global minimum score is set on a document id that is
|
||||||
// smaller than the ids in the current leaf
|
// smaller than the ids in the current leaf
|
||||||
float score =
|
float score =
|
||||||
docBase >= maxMinScore.docBase() ? Math.nextUp(maxMinScore.score()) : maxMinScore.score();
|
docBase >= maxMinScore.docId() ? Math.nextUp(maxMinScore.score()) : maxMinScore.score();
|
||||||
if (score > minCompetitiveScore) {
|
if (score > minCompetitiveScore) {
|
||||||
assert hitsThresholdChecker.isThresholdReached();
|
assert hitsThresholdChecker.isThresholdReached();
|
||||||
scorer.setMinCompetitiveScore(score);
|
scorer.setMinCompetitiveScore(score);
|
||||||
|
@ -254,10 +254,9 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
||||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||||
minCompetitiveScore = localMinScore;
|
minCompetitiveScore = localMinScore;
|
||||||
if (minScoreAcc != null) {
|
if (minScoreAcc != null) {
|
||||||
// we don't use the next float but we register the document
|
// we don't use the next float but we register the document id so that other leaves or
|
||||||
// id so that other leaves can require it if they are after
|
// leaf partitions can require it if they are after the current maximum
|
||||||
// the current maximum
|
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
|
||||||
minScoreAcc.accumulate(docBase, pqTop.score);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,28 +24,28 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
|
||||||
MaxScoreAccumulator acc = new MaxScoreAccumulator();
|
MaxScoreAccumulator acc = new MaxScoreAccumulator();
|
||||||
acc.accumulate(0, 0f);
|
acc.accumulate(0, 0f);
|
||||||
assertEquals(0f, acc.get().score(), 0);
|
assertEquals(0f, acc.get().score(), 0);
|
||||||
assertEquals(0, acc.get().docBase(), 0);
|
assertEquals(0, acc.get().docId(), 0);
|
||||||
acc.accumulate(10, 0f);
|
acc.accumulate(10, 0f);
|
||||||
assertEquals(0f, acc.get().score(), 0);
|
assertEquals(0f, acc.get().score(), 0);
|
||||||
assertEquals(0, acc.get().docBase(), 0);
|
assertEquals(0, acc.get().docId(), 0);
|
||||||
acc.accumulate(100, 1000f);
|
acc.accumulate(100, 1000f);
|
||||||
assertEquals(1000f, acc.get().score(), 0);
|
assertEquals(1000f, acc.get().score(), 0);
|
||||||
assertEquals(100, acc.get().docBase(), 0);
|
assertEquals(100, acc.get().docId(), 0);
|
||||||
acc.accumulate(1000, 5f);
|
acc.accumulate(1000, 5f);
|
||||||
assertEquals(1000f, acc.get().score(), 0);
|
assertEquals(1000f, acc.get().score(), 0);
|
||||||
assertEquals(100, acc.get().docBase(), 0);
|
assertEquals(100, acc.get().docId(), 0);
|
||||||
acc.accumulate(99, 1000f);
|
acc.accumulate(99, 1000f);
|
||||||
assertEquals(1000f, acc.get().score(), 0);
|
assertEquals(1000f, acc.get().score(), 0);
|
||||||
assertEquals(99, acc.get().docBase(), 0);
|
assertEquals(99, acc.get().docId(), 0);
|
||||||
acc.accumulate(1000, 1001f);
|
acc.accumulate(1000, 1001f);
|
||||||
assertEquals(1001f, acc.get().score(), 0);
|
assertEquals(1001f, acc.get().score(), 0);
|
||||||
assertEquals(1000, acc.get().docBase(), 0);
|
assertEquals(1000, acc.get().docId(), 0);
|
||||||
acc.accumulate(10, 1001f);
|
acc.accumulate(10, 1001f);
|
||||||
assertEquals(1001f, acc.get().score(), 0);
|
assertEquals(1001f, acc.get().score(), 0);
|
||||||
assertEquals(10, acc.get().docBase(), 0);
|
assertEquals(10, acc.get().docId(), 0);
|
||||||
acc.accumulate(100, 1001f);
|
acc.accumulate(100, 1001f);
|
||||||
assertEquals(1001f, acc.get().score(), 0);
|
assertEquals(1001f, acc.get().score(), 0);
|
||||||
assertEquals(10, acc.get().docBase(), 0);
|
assertEquals(10, acc.get().docId(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandom() {
|
public void testRandom() {
|
||||||
|
@ -56,7 +56,7 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
|
||||||
for (int i = 0; i < numDocs; i++) {
|
for (int i = 0; i < numDocs; i++) {
|
||||||
MaxScoreAccumulator.DocAndScore res =
|
MaxScoreAccumulator.DocAndScore res =
|
||||||
new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
|
new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
|
||||||
acc.accumulate(res.docBase(), res.score());
|
acc.accumulate(res.docId(), res.score());
|
||||||
if (res.compareTo(max) > 0) {
|
if (res.compareTo(max) > 0) {
|
||||||
max = res;
|
max = res;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue