mirror of https://github.com/apache/lucene.git
LUCENE-10119: Do not set single sort with search after (#317)
We should not set single sort when the search_after is non-null; otherwise, we will incorrectly skip documents whose values are equal to the value from the search_after and docIDs are greater than the docID from the search_after.
This commit is contained in:
parent
fc475360a8
commit
7390d1af51
|
@ -136,11 +136,6 @@ public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry>
|
||||||
reverseMul[i] = field.reverse ? -1 : 1;
|
reverseMul[i] = field.reverse ? -1 : 1;
|
||||||
comparators[i] = field.getComparator(size, i);
|
comparators[i] = field.getComparator(size, i);
|
||||||
}
|
}
|
||||||
if (numComparators == 1) {
|
|
||||||
// inform a comparator that sort is based on this single field
|
|
||||||
// to enable some optimizations for skipping over non-competitive documents
|
|
||||||
comparators[0].setSingleSort();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -450,6 +450,13 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
||||||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||||
|
|
||||||
if (after == null) {
|
if (after == null) {
|
||||||
|
// inform a comparator that sort is based on this single field
|
||||||
|
// to enable some optimizations for skipping over non-competitive documents
|
||||||
|
// We can't set single sort when the `after` parameter is non-null as it's
|
||||||
|
// an implicit sort over the document id.
|
||||||
|
if (queue.comparators.length == 1) {
|
||||||
|
queue.comparators[0].setSingleSort();
|
||||||
|
}
|
||||||
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
|
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
|
||||||
} else {
|
} else {
|
||||||
if (after.fields == null) {
|
if (after.fields == null) {
|
||||||
|
|
|
@ -20,10 +20,9 @@ import static org.apache.lucene.search.SortField.FIELD_DOC;
|
||||||
import static org.apache.lucene.search.SortField.FIELD_SCORE;
|
import static org.apache.lucene.search.SortField.FIELD_SCORE;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.LongStream;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FloatDocValuesField;
|
import org.apache.lucene.document.FloatDocValuesField;
|
||||||
|
@ -256,12 +255,13 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
< numDocs); // assert that some docs were skipped => optimization was run
|
< numDocs); // assert that some docs were skipped => optimization was run
|
||||||
}
|
}
|
||||||
|
|
||||||
{ // test that sorting on a single field with equal values and after parameter uses the
|
{ // test that sorting on a single field with equal values and after parameter
|
||||||
// optimization
|
// doesn't use the optimization
|
||||||
final int afterValue = 100;
|
final int afterValue = 100;
|
||||||
|
final int afterDocID = 10 + random().nextInt(1000);
|
||||||
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
|
final SortField sortField = new SortField("my_field1", SortField.Type.INT);
|
||||||
final Sort sort = new Sort(sortField);
|
final Sort sort = new Sort(sortField);
|
||||||
FieldDoc after = new FieldDoc(10, Float.NaN, new Integer[] {afterValue});
|
FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[] {afterValue});
|
||||||
final TopFieldCollector collector =
|
final TopFieldCollector collector =
|
||||||
TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
TopFieldCollector.create(sort, numHits, after, totalHitsThreshold);
|
||||||
searcher.search(new MatchAllDocsQuery(), collector);
|
searcher.search(new MatchAllDocsQuery(), collector);
|
||||||
|
@ -270,10 +270,9 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
for (int i = 0; i < numHits; i++) {
|
for (int i = 0; i < numHits; i++) {
|
||||||
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||||
assertEquals(100, fieldDoc.fields[0]);
|
assertEquals(100, fieldDoc.fields[0]);
|
||||||
|
assertTrue(fieldDoc.doc > afterDocID);
|
||||||
}
|
}
|
||||||
assertTrue(
|
assertEquals(topDocs.totalHits.value, numDocs);
|
||||||
topDocs.totalHits.value
|
|
||||||
< numDocs); // assert that some docs were skipped => optimization was run
|
|
||||||
}
|
}
|
||||||
|
|
||||||
{ // test that sorting on main field with equal values + another field for tie breaks doesn't
|
{ // test that sorting on main field with equal values + another field for tie breaks doesn't
|
||||||
|
@ -675,7 +674,21 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
public void testRandomLong() throws IOException {
|
public void testRandomLong() throws IOException {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||||
List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
|
List<Long> seqNos = new ArrayList<>();
|
||||||
|
int iterations = 10000 + random().nextInt(10000);
|
||||||
|
long seqNoGenerator = random().nextInt(1000);
|
||||||
|
for (long i = 0; i < iterations; i++) {
|
||||||
|
int copies = random().nextInt(100) <= 5 ? 1 : 1 + random().nextInt(5);
|
||||||
|
for (int j = 0; j < copies; j++) {
|
||||||
|
seqNos.add(seqNoGenerator);
|
||||||
|
}
|
||||||
|
seqNos.add(seqNoGenerator);
|
||||||
|
seqNoGenerator++;
|
||||||
|
if (random().nextInt(100) <= 5) {
|
||||||
|
seqNoGenerator += random().nextInt(10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Collections.shuffle(seqNos, random());
|
Collections.shuffle(seqNos, random());
|
||||||
int pendingDocs = 0;
|
int pendingDocs = 0;
|
||||||
for (long seqNo : seqNos) {
|
for (long seqNo : seqNos) {
|
||||||
|
|
Loading…
Reference in New Issue