LUCENE-10119: Do not set single sort with search after (#317)

We should not set single sort when the search_after is non-null; otherwise, we will incorrectly skip documents whose values are equal to the value from the search_after and docIDs are greater than the docID from the search_after.
2021-09-23 13:10:17 -04:00 · 2021-09-23 13:10:17 -04:00 · 7390d1af51
parent fc475360a8
commit 7390d1af51
3 changed files with 29 additions and 14 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
@ -136,11 +136,6 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry>
      reverseMul[i] = field.reverse ? -1 : 1;
      comparators[i] = field.getComparator(size, i);
    }
-    if (numComparators == 1) {
-      // inform a comparator that sort is based on this single field
-      // to enable some optimizations for skipping over non-competitive documents
-      comparators[0].setSingleSort();
-    }
  }

  /**
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@ -450,6 +450,13 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
    FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);

    if (after == null) {
+      // inform a comparator that sort is based on this single field
+      // to enable some optimizations for skipping over non-competitive documents
+      // We can't set single sort when the `after` parameter is non-null as it's
+      // an implicit sort over the document id.
+      if (queue.comparators.length == 1) {
+        queue.comparators[0].setSingleSort();
+      }
      return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
    } else {
      if (after.fields == null) {
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
@ -20,10 +20,9 @@ import static org.apache.lucene.search.SortField.FIELD_DOC;
 import static org.apache.lucene.search.SortField.FIELD_SCORE;

 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.stream.Collectors;
-import java.util.stream.LongStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatDocValuesField;
@ -256,12 +255,13 @@ public class TestSortOptimization extends LuceneTestCase {
              < numDocs); // assert that some docs were skipped => optimization was run
    }

-    { // test that sorting on a single field with equal values and after parameter uses the
-      // optimization
+    { // test that sorting on a single field with equal values and after parameter
+      // doesn't use the optimization
      final int afterValue = 100;
+      final int afterDocID = 10 + random().nextInt(1000);
      final SortField sortField = new SortField("my_field1", SortField.Type.INT);
      final Sort sort = new Sort(sortField);
-      FieldDoc after = new FieldDoc(10, Float.NaN, new Integer[] {afterValue});
+      FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[] {afterValue});
      final TopFieldCollector collector =
          TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
      searcher.search(new MatchAllDocsQuery(), collector);
@ -270,10 +270,9 @@ public class TestSortOptimization extends LuceneTestCase {
      for (int i = 0; i < numHits; i++) {
        FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
        assertEquals(100, fieldDoc.fields[0]);
+        assertTrue(fieldDoc.doc > afterDocID);
      }
-      assertTrue(
-          topDocs.totalHits.value
-              < numDocs); // assert that some docs were skipped => optimization was run
+      assertEquals(topDocs.totalHits.value, numDocs);
    }

    { // test that sorting on main field with equal values + another field for tie breaks doesn't
@ -675,7 +674,21 @@ public class TestSortOptimization extends LuceneTestCase {
  public void testRandomLong() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
-    List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
+    List<Long> seqNos = new ArrayList<>();
+    int iterations = 10000 + random().nextInt(10000);
+    long seqNoGenerator = random().nextInt(1000);
+    for (long i = 0; i < iterations; i++) {
+      int copies = random().nextInt(100) <= 5 ? 1 : 1 + random().nextInt(5);
+      for (int j = 0; j < copies; j++) {
+        seqNos.add(seqNoGenerator);
+      }
+      seqNos.add(seqNoGenerator);
+      seqNoGenerator++;
+      if (random().nextInt(100) <= 5) {
+        seqNoGenerator += random().nextInt(10);
+      }
+    }
+
    Collections.shuffle(seqNos, random());
    int pendingDocs = 0;
    for (long seqNo : seqNos) {