LUCENE-10119: Do not set single sort with search after (#317)

We should not set single sort when the search_after is non-null; 
otherwise, we will incorrectly skip documents whose values are equal to
the value from the search_after and docIDs are greater than the docID
from the search_after.
This commit is contained in:
Nhat Nguyen 2021-09-23 13:10:17 -04:00 committed by GitHub
parent fc475360a8
commit 7390d1af51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 14 deletions

View File

@ -136,11 +136,6 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry>
reverseMul[i] = field.reverse ? -1 : 1; reverseMul[i] = field.reverse ? -1 : 1;
comparators[i] = field.getComparator(size, i); comparators[i] = field.getComparator(size, i);
} }
if (numComparators == 1) {
// inform a comparator that sort is based on this single field
// to enable some optimizations for skipping over non-competitive documents
comparators[0].setSingleSort();
}
} }
/** /**

View File

@ -450,6 +450,13 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits); FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
if (after == null) { if (after == null) {
// inform a comparator that sort is based on this single field
// to enable some optimizations for skipping over non-competitive documents
// We can't set single sort when the `after` parameter is non-null as it's
// an implicit sort over the document id.
if (queue.comparators.length == 1) {
queue.comparators[0].setSingleSort();
}
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc); return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
} else { } else {
if (after.fields == null) { if (after.fields == null) {

View File

@ -20,10 +20,9 @@ import static org.apache.lucene.search.SortField.FIELD_DOC;
import static org.apache.lucene.search.SortField.FIELD_SCORE; import static org.apache.lucene.search.SortField.FIELD_SCORE;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.FloatDocValuesField;
@ -256,12 +255,13 @@ public class TestSortOptimization extends LuceneTestCase {
< numDocs); // assert that some docs were skipped => optimization was run < numDocs); // assert that some docs were skipped => optimization was run
} }
{ // test that sorting on a single field with equal values and after parameter uses the { // test that sorting on a single field with equal values and after parameter
// optimization // doesn't use the optimization
final int afterValue = 100; final int afterValue = 100;
final int afterDocID = 10 + random().nextInt(1000);
final SortField sortField = new SortField("my_field1", SortField.Type.INT); final SortField sortField = new SortField("my_field1", SortField.Type.INT);
final Sort sort = new Sort(sortField); final Sort sort = new Sort(sortField);
FieldDoc after = new FieldDoc(10, Float.NaN, new Integer[] {afterValue}); FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[] {afterValue});
final TopFieldCollector collector = final TopFieldCollector collector =
TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
searcher.search(new MatchAllDocsQuery(), collector); searcher.search(new MatchAllDocsQuery(), collector);
@ -270,10 +270,9 @@ public class TestSortOptimization extends LuceneTestCase {
for (int i = 0; i < numHits; i++) { for (int i = 0; i < numHits; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i]; FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
assertEquals(100, fieldDoc.fields[0]); assertEquals(100, fieldDoc.fields[0]);
assertTrue(fieldDoc.doc > afterDocID);
} }
assertTrue( assertEquals(topDocs.totalHits.value, numDocs);
topDocs.totalHits.value
< numDocs); // assert that some docs were skipped => optimization was run
} }
{ // test that sorting on main field with equal values + another field for tie breaks doesn't { // test that sorting on main field with equal values + another field for tie breaks doesn't
@ -675,7 +674,21 @@ public class TestSortOptimization extends LuceneTestCase {
public void testRandomLong() throws IOException { public void testRandomLong() throws IOException {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList()); List<Long> seqNos = new ArrayList<>();
int iterations = 10000 + random().nextInt(10000);
long seqNoGenerator = random().nextInt(1000);
for (long i = 0; i < iterations; i++) {
int copies = random().nextInt(100) <= 5 ? 1 : 1 + random().nextInt(5);
for (int j = 0; j < copies; j++) {
seqNos.add(seqNoGenerator);
}
seqNos.add(seqNoGenerator);
seqNoGenerator++;
if (random().nextInt(100) <= 5) {
seqNoGenerator += random().nextInt(10);
}
}
Collections.shuffle(seqNos, random()); Collections.shuffle(seqNos, random());
int pendingDocs = 0; int pendingDocs = 0;
for (long seqNo : seqNos) { for (long seqNo : seqNos) {