From b7a286dd69f33261637182cd274bc5bb35c773f6 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 15 Sep 2021 09:21:59 -0400 Subject: [PATCH] LUCENE-10106: Sort optimization wrongly skip first docs (#300) The first documents of subsequent segments are mistakenly skipped when sort optimization is enabled. We should initialize maxDocVisited in NumericComparator to -1 instead of 0. --- .../search/comparators/NumericComparator.java | 2 +- .../lucene/search/TestSortOptimization.java | 84 +++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index 0455cea24dc..051d9cc77c9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -84,7 +84,7 @@ public abstract class NumericComparator extends FieldComparato private DocIdSetIterator competitiveIterator; private long iteratorCost; - private int maxDocVisited = 0; + private int maxDocVisited = -1; private int updateCounter = 0; public NumericLeafComparator(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index ec6ec66d322..4581cf15454 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -20,6 +20,10 @@ import static org.apache.lucene.search.SortField.FIELD_DOC; import static org.apache.lucene.search.SortField.FIELD_SCORE; import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.LongStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatDocValuesField; @@ -633,4 +637,84 @@ public class TestSortOptimization extends LuceneTestCase { reader.close(); dir.close(); } + + public void testMaxDocVisited() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + int numDocs = atLeast(10000); + long offset = 100 + random().nextInt(100); + long smallestValue = 50 + random().nextInt(50); + boolean flushed = false; + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("my_field", i + offset)); + doc.add(new LongPoint("my_field", i + offset)); + writer.addDocument(doc); + if (i >= 5000 && flushed == false) { + flushed = true; + writer.flush(); + // Index the smallest value to the first slot of the second segment + doc = new Document(); + doc.add(new NumericDocValuesField("my_field", smallestValue)); + doc.add(new LongPoint("my_field", smallestValue)); + writer.addDocument(doc); + } + } + IndexReader reader = DirectoryReader.open(writer); + writer.close(); + IndexSearcher searcher = new IndexSearcher(reader); + SortField sortField = new SortField("my_field", SortField.Type.LONG); + TopFieldDocs topDocs = + searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField)); + FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[0]; + assertEquals(smallestValue, ((Long) fieldDoc.fields[0]).intValue()); + reader.close(); + dir.close(); + } + + public void testRandomLong() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + List seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList()); + Collections.shuffle(seqNos, random()); + int pendingDocs = 0; + for (long seqNo : seqNos) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("seq_no", seqNo)); + doc.add(new LongPoint("seq_no", seqNo)); + writer.addDocument(doc); + pendingDocs++; + if (pendingDocs > 500 && random().nextInt(100) <= 5) { + pendingDocs = 0; + writer.flush(); + } + } + writer.flush(); + seqNos.sort(Long::compare); + IndexReader reader = DirectoryReader.open(writer); + writer.close(); + IndexSearcher searcher = new IndexSearcher(reader); + SortField sortField = new SortField("seq_no", SortField.Type.LONG); + int visitedHits = 0; + ScoreDoc after = null; + while (visitedHits < seqNos.size()) { + int batch = 1 + random().nextInt(100); + Query query = + random().nextBoolean() + ? new MatchAllDocsQuery() + : LongPoint.newRangeQuery("seq_no", 0, Long.MAX_VALUE); + TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(sortField)); + int expectedHits = Math.min(seqNos.size() - visitedHits, batch); + assertEquals(expectedHits, topDocs.scoreDocs.length); + after = topDocs.scoreDocs[expectedHits - 1]; + for (int i = 0; i < topDocs.scoreDocs.length; i++) { + FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i]; + long expectedSeqNo = seqNos.get(visitedHits); + assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue()); + visitedHits++; + } + } + reader.close(); + dir.close(); + } }