mirror of https://github.com/apache/lucene.git
LUCENE-10106: Sort optimization wrongly skip first docs (#300)
The first documents of subsequent segments are mistakenly skipped when sort optimization is enabled. We should initialize maxDocVisited in NumericComparator to -1 instead of 0.
This commit is contained in:
parent
1586933b18
commit
b7a286dd69
|
@ -84,7 +84,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
|
||||||
|
|
||||||
private DocIdSetIterator competitiveIterator;
|
private DocIdSetIterator competitiveIterator;
|
||||||
private long iteratorCost;
|
private long iteratorCost;
|
||||||
private int maxDocVisited = 0;
|
private int maxDocVisited = -1;
|
||||||
private int updateCounter = 0;
|
private int updateCounter = 0;
|
||||||
|
|
||||||
public NumericLeafComparator(LeafReaderContext context) throws IOException {
|
public NumericLeafComparator(LeafReaderContext context) throws IOException {
|
||||||
|
|
|
@ -20,6 +20,10 @@ import static org.apache.lucene.search.SortField.FIELD_DOC;
|
||||||
import static org.apache.lucene.search.SortField.FIELD_SCORE;
|
import static org.apache.lucene.search.SortField.FIELD_SCORE;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.LongStream;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FloatDocValuesField;
|
import org.apache.lucene.document.FloatDocValuesField;
|
||||||
|
@ -633,4 +637,84 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMaxDocVisited() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||||
|
int numDocs = atLeast(10000);
|
||||||
|
long offset = 100 + random().nextInt(100);
|
||||||
|
long smallestValue = 50 + random().nextInt(50);
|
||||||
|
boolean flushed = false;
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new NumericDocValuesField("my_field", i + offset));
|
||||||
|
doc.add(new LongPoint("my_field", i + offset));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
if (i >= 5000 && flushed == false) {
|
||||||
|
flushed = true;
|
||||||
|
writer.flush();
|
||||||
|
// Index the smallest value to the first slot of the second segment
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new NumericDocValuesField("my_field", smallestValue));
|
||||||
|
doc.add(new LongPoint("my_field", smallestValue));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IndexReader reader = DirectoryReader.open(writer);
|
||||||
|
writer.close();
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||||
|
TopFieldDocs topDocs =
|
||||||
|
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
|
||||||
|
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[0];
|
||||||
|
assertEquals(smallestValue, ((Long) fieldDoc.fields[0]).intValue());
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomLong() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||||
|
List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
|
||||||
|
Collections.shuffle(seqNos, random());
|
||||||
|
int pendingDocs = 0;
|
||||||
|
for (long seqNo : seqNos) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new NumericDocValuesField("seq_no", seqNo));
|
||||||
|
doc.add(new LongPoint("seq_no", seqNo));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
pendingDocs++;
|
||||||
|
if (pendingDocs > 500 && random().nextInt(100) <= 5) {
|
||||||
|
pendingDocs = 0;
|
||||||
|
writer.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writer.flush();
|
||||||
|
seqNos.sort(Long::compare);
|
||||||
|
IndexReader reader = DirectoryReader.open(writer);
|
||||||
|
writer.close();
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
SortField sortField = new SortField("seq_no", SortField.Type.LONG);
|
||||||
|
int visitedHits = 0;
|
||||||
|
ScoreDoc after = null;
|
||||||
|
while (visitedHits < seqNos.size()) {
|
||||||
|
int batch = 1 + random().nextInt(100);
|
||||||
|
Query query =
|
||||||
|
random().nextBoolean()
|
||||||
|
? new MatchAllDocsQuery()
|
||||||
|
: LongPoint.newRangeQuery("seq_no", 0, Long.MAX_VALUE);
|
||||||
|
TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(sortField));
|
||||||
|
int expectedHits = Math.min(seqNos.size() - visitedHits, batch);
|
||||||
|
assertEquals(expectedHits, topDocs.scoreDocs.length);
|
||||||
|
after = topDocs.scoreDocs[expectedHits - 1];
|
||||||
|
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
|
||||||
|
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
|
||||||
|
long expectedSeqNo = seqNos.get(visitedHits);
|
||||||
|
assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue());
|
||||||
|
visitedHits++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue