mirror of https://github.com/apache/lucene.git
LUCENE-10119: Do not set single sort with search after (#317)
We should not set single sort when the search_after is non-null; otherwise, we will incorrectly skip documents whose values are equal to the value from the search_after and docIDs are greater than the docID from the search_after.
This commit is contained in:
parent
fc475360a8
commit
7390d1af51
|
@ -136,11 +136,6 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry>
|
|||
reverseMul[i] = field.reverse ? -1 : 1;
|
||||
comparators[i] = field.getComparator(size, i);
|
||||
}
|
||||
if (numComparators == 1) {
|
||||
// inform a comparator that sort is based on this single field
|
||||
// to enable some optimizations for skipping over non-competitive documents
|
||||
comparators[0].setSingleSort();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -450,6 +450,13 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||
|
||||
if (after == null) {
|
||||
// inform a comparator that sort is based on this single field
|
||||
// to enable some optimizations for skipping over non-competitive documents
|
||||
// We can't set single sort when the `after` parameter is non-null as it's
|
||||
// an implicit sort over the document id.
|
||||
if (queue.comparators.length == 1) {
|
||||
queue.comparators[0].setSingleSort();
|
||||
}
|
||||
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
|
||||
} else {
|
||||
if (after.fields == null) {
|
||||
|
|
|
@ -20,10 +20,9 @@ import static org.apache.lucene.search.SortField.FIELD_DOC;
|
|||
import static org.apache.lucene.search.SortField.FIELD_SCORE;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.LongStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatDocValuesField;
|
||||
|
@ -256,12 +255,13 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
< numDocs); // assert that some docs were skipped => optimization was run
|
||||
}
|
||||
|
||||
{ // test that sorting on a single field with equal values and after parameter uses the
|
||||
// optimization
|
||||
{ // test that sorting on a single field with equal values and after parameter
|
||||
// doesn't use the optimization
|
||||
final int afterValue = 100;
|
||||
final int afterDocID = 10 + random().nextInt(1000);
|
||||
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
|
||||
final Sort sort = new Sort(sortField);
|
||||
FieldDoc after = new FieldDoc(10, Float.NaN, new Integer[] {afterValue});
|
||||
FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[] {afterValue});
|
||||
final TopFieldCollector collector =
|
||||
TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
||||
searcher.search(new MatchAllDocsQuery(), collector);
|
||||
|
@ -270,10 +270,9 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
for (int i = 0; i < numHits; i++) {
|
||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||
assertEquals(100, fieldDoc.fields[0]);
|
||||
assertTrue(fieldDoc.doc > afterDocID);
|
||||
}
|
||||
assertTrue(
|
||||
topDocs.totalHits.value
|
||||
< numDocs); // assert that some docs were skipped => optimization was run
|
||||
assertEquals(topDocs.totalHits.value, numDocs);
|
||||
}
|
||||
|
||||
{ // test that sorting on main field with equal values + another field for tie breaks doesn't
|
||||
|
@ -675,7 +674,21 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
public void testRandomLong() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
|
||||
List<Long> seqNos = new ArrayList<>();
|
||||
int iterations = 10000 + random().nextInt(10000);
|
||||
long seqNoGenerator = random().nextInt(1000);
|
||||
for (long i = 0; i < iterations; i++) {
|
||||
int copies = random().nextInt(100) <= 5 ? 1 : 1 + random().nextInt(5);
|
||||
for (int j = 0; j < copies; j++) {
|
||||
seqNos.add(seqNoGenerator);
|
||||
}
|
||||
seqNos.add(seqNoGenerator);
|
||||
seqNoGenerator++;
|
||||
if (random().nextInt(100) <= 5) {
|
||||
seqNoGenerator += random().nextInt(10);
|
||||
}
|
||||
}
|
||||
|
||||
Collections.shuffle(seqNos, random());
|
||||
int pendingDocs = 0;
|
||||
for (long seqNo : seqNos) {
|
||||
|
|
Loading…
Reference in New Issue