LUCENE-10119: Do not set single sort with search after (#317)

We should not set single sort when the search_after is non-null; otherwise, we will incorrectly skip documents whose values are equal to the value from the search_after and docIDs are greater than the docID from the search_after.
2021-09-23 13:10:17 -04:00 · 2021-09-23 13:10:17 -04:00 · 7390d1af51
parent fc475360a8
commit 7390d1af51
3 changed files with 29 additions and 14 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
@ -136,11 +136,6 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry>
      reverseMul[i] = field.reverse ? -1 : 1;
      comparators[i] = field.getComparator(size, i);
    }
    if (numComparators == 1) {
      // inform a comparator that sort is based on this single field
      // to enable some optimizations for skipping over non-competitive documents
      comparators[0].setSingleSort();
    }
  }
  /**
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@ -450,6 +450,13 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
    FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
    if (after == null) {
      // inform a comparator that sort is based on this single field
      // to enable some optimizations for skipping over non-competitive documents
      // We can't set single sort when the `after` parameter is non-null as it's
      // an implicit sort over the document id.
      if (queue.comparators.length == 1) {
        queue.comparators[0].setSingleSort();
      }
      return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
    } else {
      if (after.fields == null) {
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
@ -20,10 +20,9 @@ import static org.apache.lucene.search.SortField.FIELD_DOC;
 import static org.apache.lucene.search.SortField.FIELD_SCORE;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.stream.Collectors;
 import java.util.stream.LongStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatDocValuesField;
@ -256,12 +255,13 @@ public class TestSortOptimization extends LuceneTestCase {
              < numDocs); // assert that some docs were skipped => optimization was run
    }
-    { // test that sorting on a single field with equal values and after parameter uses the
+    { // test that sorting on a single field with equal values and after parameter
-      // optimization
+      // doesn't use the optimization
      final int afterValue = 100;
      final int afterDocID = 10 + random().nextInt(1000);
      final SortField sortField = new SortField("my_field1", SortField.Type.INT);
      final Sort sort = new Sort(sortField);
-      FieldDoc after = new FieldDoc(10, Float.NaN, new Integer[] {afterValue});
+      FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[] {afterValue});
      final TopFieldCollector collector =
          TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
      searcher.search(new MatchAllDocsQuery(), collector);
@ -270,10 +270,9 @@ public class TestSortOptimization extends LuceneTestCase {
      for (int i = 0; i < numHits; i++) {
        FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
        assertEquals(100, fieldDoc.fields[0]);
        assertTrue(fieldDoc.doc > afterDocID);
      }
-      assertTrue(
+      assertEquals(topDocs.totalHits.value, numDocs);
          topDocs.totalHits.value
              < numDocs); // assert that some docs were skipped => optimization was run
    }
    { // test that sorting on main field with equal values + another field for tie breaks doesn't
@ -675,7 +674,21 @@ public class TestSortOptimization extends LuceneTestCase {
  public void testRandomLong() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
-    List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
+    List<Long> seqNos = new ArrayList<>();
    int iterations = 10000 + random().nextInt(10000);
    long seqNoGenerator = random().nextInt(1000);
    for (long i = 0; i < iterations; i++) {
      int copies = random().nextInt(100) <= 5 ? 1 : 1 + random().nextInt(5);
      for (int j = 0; j < copies; j++) {
        seqNos.add(seqNoGenerator);
      }
      seqNos.add(seqNoGenerator);
      seqNoGenerator++;
      if (random().nextInt(100) <= 5) {
        seqNoGenerator += random().nextInt(10);
      }
    }
    Collections.shuffle(seqNos, random());
    int pendingDocs = 0;
    for (long seqNo : seqNos) {