LUCENE-10126: Fix competitiveIterator wrongly skip documents (#324)

The competitive iterator can wrongly skip a document that is advanced 
but not collected in the previous scoreRange.
This commit is contained in:
Nhat Nguyen 2021-09-28 15:26:30 -04:00 committed by GitHub
parent 9f80b4d8fb
commit 5ab900e10b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 42 additions and 23 deletions

View File

@ -225,18 +225,23 @@ public abstract class Weight implements SegmentCacheable {
throws IOException { throws IOException {
collector.setScorer(scorer); collector.setScorer(scorer);
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation(); DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
DocIdSetIterator collectorIterator = collector.competitiveIterator(); DocIdSetIterator competitiveIterator = collector.competitiveIterator();
DocIdSetIterator filteredIterator; DocIdSetIterator filteredIterator;
if (collectorIterator == null) { if (competitiveIterator == null) {
filteredIterator = scorerIterator; filteredIterator = scorerIterator;
} else { } else {
// Wrap CompetitiveIterator and ScorerIterator start with (i.e., calling nextDoc()) the last
// visited docID because ConjunctionDISI might have advanced to it in the previous
// scoreRange, but we didn't process due to the range limit of scoreRange.
if (scorerIterator.docID() != -1) { if (scorerIterator.docID() != -1) {
// Wrap ScorerIterator to start from -1 for conjunction
scorerIterator = new StartDISIWrapper(scorerIterator); scorerIterator = new StartDISIWrapper(scorerIterator);
} }
if (competitiveIterator.docID() != -1) {
competitiveIterator = new StartDISIWrapper(competitiveIterator);
}
// filter scorerIterator to keep only competitive docs as defined by collector // filter scorerIterator to keep only competitive docs as defined by collector
filteredIterator = filteredIterator =
ConjunctionUtils.intersectIterators(Arrays.asList(scorerIterator, collectorIterator)); ConjunctionUtils.intersectIterators(Arrays.asList(scorerIterator, competitiveIterator));
} }
if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) { if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
scoreAll(collector, filteredIterator, twoPhase, acceptDocs); scoreAll(collector, filteredIterator, twoPhase, acceptDocs);
@ -314,15 +319,15 @@ public abstract class Weight implements SegmentCacheable {
} }
} }
/** Wraps an internal docIdSetIterator for it to start with docID = -1 */ /** Wraps an internal docIdSetIterator for it to start with the last visited docID */
protected static class StartDISIWrapper extends DocIdSetIterator { private static class StartDISIWrapper extends DocIdSetIterator {
private final DocIdSetIterator in; private final DocIdSetIterator in;
private final int min; private final int startDocID;
private int docID = -1; private int docID = -1;
public StartDISIWrapper(DocIdSetIterator in) { StartDISIWrapper(DocIdSetIterator in) {
this.in = in; this.in = in;
this.min = in.docID(); this.startDocID = in.docID();
} }
@Override @Override
@ -337,8 +342,8 @@ public abstract class Weight implements SegmentCacheable {
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
if (target <= min) { if (target <= startDocID) {
return docID = min; return docID = startDocID;
} }
return docID = in.advance(target); return docID = in.advance(target);
} }

View File

@ -131,7 +131,7 @@ public class DocComparator extends FieldComparator<Integer> {
return null; return null;
} else { } else {
return new DocIdSetIterator() { return new DocIdSetIterator() {
private int docID = -1; private int docID = competitiveIterator.docID();
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {

View File

@ -273,7 +273,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
public DocIdSetIterator competitiveIterator() { public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) return null; if (enableSkipping == false) return null;
return new DocIdSetIterator() { return new DocIdSetIterator() {
private int docID = -1; private int docID = competitiveIterator.docID();
@Override @Override
public int nextDoc() throws IOException { public int nextDoc() throws IOException {

View File

@ -58,7 +58,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = newSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.LONG); final SortField sortField = new SortField("my_field", SortField.Type.LONG);
final Sort sort = new Sort(sortField); final Sort sort = new Sort(sortField);
final int numHits = 3; final int numHits = 3;
@ -143,7 +143,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = newSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.LONG); final SortField sortField = new SortField("my_field", SortField.Type.LONG);
final Sort sort = new Sort(sortField); final Sort sort = new Sort(sortField);
final int numHits = 3; final int numHits = 3;
@ -182,7 +182,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = newSearcher(reader);
final int numHits = 3; final int numHits = 3;
final int totalHitsThreshold = 3; final int totalHitsThreshold = 3;
@ -220,7 +220,7 @@ public class TestSortOptimization extends LuceneTestCase {
public void testSortOptimizationEqualValues() throws IOException { public void testSortOptimizationEqualValues() throws IOException {
final Directory dir = newDirectory(); final Directory dir = newDirectory();
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
final int numDocs = atLeast(10000); final int numDocs = atLeast(TEST_NIGHTLY ? 50_000 : 10_000);
for (int i = 1; i <= numDocs; ++i) { for (int i = 1; i <= numDocs; ++i) {
final Document doc = new Document(); final Document doc = new Document();
doc.add( doc.add(
@ -234,7 +234,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = newSearcher(reader);
final int numHits = 3; final int numHits = 3;
final int totalHitsThreshold = 3; final int totalHitsThreshold = 3;
@ -313,7 +313,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
final IndexReader reader = DirectoryReader.open(writer); final IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = newSearcher(reader);
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT); final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
final Sort sort = new Sort(sortField); final Sort sort = new Sort(sortField);
final int numHits = 3; final int numHits = 3;
@ -661,7 +661,7 @@ public class TestSortOptimization extends LuceneTestCase {
} }
IndexReader reader = DirectoryReader.open(writer); IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = newSearcher(reader);
SortField sortField = new SortField("my_field", SortField.Type.LONG); SortField sortField = new SortField("my_field", SortField.Type.LONG);
TopFieldDocs topDocs = TopFieldDocs topDocs =
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField)); searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
@ -706,7 +706,7 @@ public class TestSortOptimization extends LuceneTestCase {
seqNos.sort(Long::compare); seqNos.sort(Long::compare);
IndexReader reader = DirectoryReader.open(writer); IndexReader reader = DirectoryReader.open(writer);
writer.close(); writer.close();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = newSearcher(reader);
SortField sortField = new SortField("seq_no", SortField.Type.LONG); SortField sortField = new SortField("seq_no", SortField.Type.LONG);
int visitedHits = 0; int visitedHits = 0;
ScoreDoc after = null; ScoreDoc after = null;

View File

@ -80,8 +80,22 @@ final class AssertingBulkScorer extends BulkScorer {
assert min <= max : "max must be greater than min, got min=" + min + ", and max=" + max; assert min <= max : "max must be greater than min, got min=" + min + ", and max=" + max;
this.max = max; this.max = max;
collector = new AssertingLeafCollector(collector, min, max); collector = new AssertingLeafCollector(collector, min, max);
final int next = in.score(collector, acceptDocs, min, max); int next = min;
assert next >= max; while (next < max) {
final int upTo;
if (random.nextBoolean()) {
upTo = max;
} else {
final int interval;
if (random.nextInt(100) <= 5) {
interval = 1 + random.nextInt(10);
} else {
interval = 1 + random.nextInt(random.nextBoolean() ? 100 : 5000);
}
upTo = Math.toIntExact(Math.min(min + interval, max));
}
next = in.score(collector, acceptDocs, min, upTo);
}
if (max >= maxDoc || next >= maxDoc) { if (max >= maxDoc || next >= maxDoc) {
assert next == DocIdSetIterator.NO_MORE_DOCS; assert next == DocIdSetIterator.NO_MORE_DOCS;
return DocIdSetIterator.NO_MORE_DOCS; return DocIdSetIterator.NO_MORE_DOCS;