mirror of https://github.com/apache/lucene.git
LUCENE-10020 DocComparator don't skip docs of same docID (#204)
DocComparator should not skip docs with the same docID on multiple sorts with search after. Because of the optimization introduced in LUCENE-9449, currently when searching with sort on [_doc, other fields] with search after, DocComparator can efficiently skip all docs before and including the provided [search after docID]. This is a desirable behaviour in a single index search. But in a distributed search, where multiple indices have docs with the same docID, and when searching on [_doc, other fields], the sort optimization should NOT skip documents with the same docIDs. This PR fixes this. Relates to LUCENE-9449
This commit is contained in:
parent
167bd99c23
commit
64d9f8c587
|
@ -383,6 +383,9 @@ Bug Fixes
|
||||||
* LUCENE-9964: Duplicate long values in a document field should only be counted once when using SortedNumericDocValuesFields
|
* LUCENE-9964: Duplicate long values in a document field should only be counted once when using SortedNumericDocValuesFields
|
||||||
(Gautam Worah)
|
(Gautam Worah)
|
||||||
|
|
||||||
|
* LUCENE-10020: DocComparator should not skip docs with the same docID on
|
||||||
|
multiple sorts with search after (Mayya Sharipova, Julie Tibshirani)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
(No changes)
|
||||||
|
|
|
@ -81,7 +81,11 @@ public class DocComparator extends FieldComparator<Integer> {
|
||||||
public DocLeafComparator(LeafReaderContext context) {
|
public DocLeafComparator(LeafReaderContext context) {
|
||||||
this.docBase = context.docBase;
|
this.docBase = context.docBase;
|
||||||
if (enableSkipping) {
|
if (enableSkipping) {
|
||||||
this.minDoc = topValue + 1;
|
// Skip docs before topValue, but include docs starting with topValue.
|
||||||
|
// Including topValue is necessary when doing sort on [_doc, other fields]
|
||||||
|
// in a distributed search where there are docs from different indices
|
||||||
|
// with the same docID.
|
||||||
|
this.minDoc = topValue;
|
||||||
this.maxDoc = context.reader().maxDoc();
|
this.maxDoc = context.reader().maxDoc();
|
||||||
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
this.competitiveIterator = DocIdSetIterator.all(maxDoc);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
|
public class TestSortOptimization extends LuceneTestCase {
|
||||||
|
|
||||||
public void testLongSortOptimization() throws IOException {
|
public void testLongSortOptimization() throws IOException {
|
||||||
|
|
||||||
|
@ -332,6 +332,69 @@ public class TestFieldSortOptimizationSkipping extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that a search with sort on [_doc, other fields] across multiple indices doesn't miss any
|
||||||
|
* documents.
|
||||||
|
*/
|
||||||
|
public void testDocSortOptimizationMultipleIndices() throws IOException {
|
||||||
|
final int numIndices = 3;
|
||||||
|
final int numDocsInIndex = atLeast(50);
|
||||||
|
Directory[] dirs = new Directory[numIndices];
|
||||||
|
IndexReader[] readers = new IndexReader[numIndices];
|
||||||
|
for (int i = 0; i < numIndices; i++) {
|
||||||
|
dirs[i] = newDirectory();
|
||||||
|
try (IndexWriter writer = new IndexWriter(dirs[i], new IndexWriterConfig())) {
|
||||||
|
for (int docID = 0; docID < numDocsInIndex; docID++) {
|
||||||
|
final Document doc = new Document();
|
||||||
|
doc.add(new NumericDocValuesField("my_field", docID * numIndices + i));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.flush();
|
||||||
|
}
|
||||||
|
readers[i] = DirectoryReader.open(dirs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int size = 7;
|
||||||
|
final int totalHitsThreshold = 7;
|
||||||
|
final Sort sort = new Sort(FIELD_DOC, new SortField("my_field", SortField.Type.LONG));
|
||||||
|
TopFieldDocs[] topDocs = new TopFieldDocs[numIndices];
|
||||||
|
int curNumHits;
|
||||||
|
FieldDoc after = null;
|
||||||
|
long collectedDocs = 0;
|
||||||
|
long totalDocs = 0;
|
||||||
|
int numHits = 0;
|
||||||
|
do {
|
||||||
|
for (int i = 0; i < numIndices; i++) {
|
||||||
|
IndexSearcher searcher = newSearcher(readers[i]);
|
||||||
|
final TopFieldCollector collector =
|
||||||
|
TopFieldCollector.create(sort, size, after, totalHitsThreshold);
|
||||||
|
searcher.search(new MatchAllDocsQuery(), collector);
|
||||||
|
topDocs[i] = collector.topDocs();
|
||||||
|
for (int docID = 0; docID < topDocs[i].scoreDocs.length; docID++) {
|
||||||
|
topDocs[i].scoreDocs[docID].shardIndex = i;
|
||||||
|
}
|
||||||
|
collectedDocs += topDocs[i].totalHits.value;
|
||||||
|
totalDocs += numDocsInIndex;
|
||||||
|
}
|
||||||
|
TopFieldDocs mergedTopDcs = TopDocs.merge(sort, size, topDocs);
|
||||||
|
curNumHits = mergedTopDcs.scoreDocs.length;
|
||||||
|
numHits += curNumHits;
|
||||||
|
if (curNumHits > 0) {
|
||||||
|
after = (FieldDoc) mergedTopDcs.scoreDocs[curNumHits - 1];
|
||||||
|
}
|
||||||
|
} while (curNumHits > 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < numIndices; i++) {
|
||||||
|
readers[i].close();
|
||||||
|
dirs[i].close();
|
||||||
|
}
|
||||||
|
|
||||||
|
final int expectedNumHits = numDocsInIndex * numIndices;
|
||||||
|
assertEquals(expectedNumHits, numHits);
|
||||||
|
// check that the optimization was run, as very few docs were collected
|
||||||
|
assertTrue(collectedDocs < totalDocs);
|
||||||
|
}
|
||||||
|
|
||||||
public void testDocSortOptimizationWithAfter() throws IOException {
|
public void testDocSortOptimizationWithAfter() throws IOException {
|
||||||
final Directory dir = newDirectory();
|
final Directory dir = newDirectory();
|
||||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
Loading…
Reference in New Issue