Compare the missing value with the top value even after the hit queue is full (#13644)

This commit is contained in:
panguixin 2024-08-15 06:33:56 +08:00 committed by GitHub
parent 1cfa697c06
commit 7cd2eb20cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 42 additions and 9 deletions

View File

@ -360,6 +360,9 @@ Optimizations
* GITHUB#13636: Optimizations to the decoding logic of blocks of postings.
(Adrien Grand, Uwe Schindler)
* GITHUB##13644: Improve NumericComparator competitive iterator logic by comparing the missing value with the top
value even after the hit queue is full (Pan Guixin)
Changes in runtime behavior
---------------------

View File

@ -342,25 +342,31 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
}
private boolean isMissingValueCompetitive() {
// if queue is full, always compare with bottom,
// if not, check if we can compare with topValue
// if queue is full, compare with bottom first,
// if competitive, then check if we can compare with topValue
if (queueFull) {
int result = Long.compare(missingValueAsLong, bottomAsComparableLong());
// in reverse (desc) sort missingValue is competitive when it's greater or equal to bottom,
// in asc sort missingValue is competitive when it's smaller or equal to bottom
return reverse
? (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO ? result > 0 : result >= 0)
: (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO ? result < 0 : result <= 0);
} else if (leafTopSet) {
final boolean competitive =
reverse
? (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO ? result > 0 : result >= 0)
: (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO ? result < 0 : result <= 0);
if (competitive == false) {
return false;
}
}
if (leafTopSet) {
int result = Long.compare(missingValueAsLong, topAsComparableLong());
// in reverse (desc) sort missingValue is competitive when it's smaller or equal to
// topValue,
// in asc sort missingValue is competitive when it's greater or equal to topValue
return reverse ? (result <= 0) : (result >= 0);
} else {
// by default competitive
return true;
}
// by default competitive
return true;
}
@Override

View File

@ -285,6 +285,30 @@ public class TestSortOptimization extends LuceneTestCase {
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs);
}
{
// test that optimization is run when missing value setting of SortField is NOT competitive
// with after on asc order
long afterValue = 3L;
FieldDoc after = new FieldDoc(3, Float.NaN, new Long[] {afterValue});
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
sortField.setMissingValue(2L);
final Sort sort = new Sort(sortField);
final TopFieldCollectorManager collectorManager =
new TopFieldCollectorManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), collectorManager);
assertEquals(topDocs.scoreDocs.length, numHits);
for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(afterValue + 1 + i, fieldDoc.fields[0]);
}
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, topDocs.totalHits.relation);
// expect to skip all but the first leaf in the BKD tree in the first segment as well as the
// second segment
// doc-0 has no target field, so we need to minus 1
final int expectedSkipped = (7001 - 512 - 1) + (numDocs - 7001);
assertNonCompetitiveHitsAreSkipped(topDocs.totalHits.value, numDocs - expectedSkipped + 1);
}
reader.close();
dir.close();
}