mirror of https://github.com/apache/lucene.git
LUCENE-10126: Fix competitiveIterator wrongly skip documents (#324)
The competitive iterator can wrongly skip a document that is advanced but not collected in the previous scoreRange.
This commit is contained in:
parent
9f80b4d8fb
commit
5ab900e10b
|
@ -225,18 +225,23 @@ public abstract class Weight implements SegmentCacheable {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
collector.setScorer(scorer);
|
collector.setScorer(scorer);
|
||||||
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
|
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
|
||||||
DocIdSetIterator collectorIterator = collector.competitiveIterator();
|
DocIdSetIterator competitiveIterator = collector.competitiveIterator();
|
||||||
DocIdSetIterator filteredIterator;
|
DocIdSetIterator filteredIterator;
|
||||||
if (collectorIterator == null) {
|
if (competitiveIterator == null) {
|
||||||
filteredIterator = scorerIterator;
|
filteredIterator = scorerIterator;
|
||||||
} else {
|
} else {
|
||||||
|
// Wrap CompetitiveIterator and ScorerIterator start with (i.e., calling nextDoc()) the last
|
||||||
|
// visited docID because ConjunctionDISI might have advanced to it in the previous
|
||||||
|
// scoreRange, but we didn't process due to the range limit of scoreRange.
|
||||||
if (scorerIterator.docID() != -1) {
|
if (scorerIterator.docID() != -1) {
|
||||||
// Wrap ScorerIterator to start from -1 for conjunction
|
|
||||||
scorerIterator = new StartDISIWrapper(scorerIterator);
|
scorerIterator = new StartDISIWrapper(scorerIterator);
|
||||||
}
|
}
|
||||||
|
if (competitiveIterator.docID() != -1) {
|
||||||
|
competitiveIterator = new StartDISIWrapper(competitiveIterator);
|
||||||
|
}
|
||||||
// filter scorerIterator to keep only competitive docs as defined by collector
|
// filter scorerIterator to keep only competitive docs as defined by collector
|
||||||
filteredIterator =
|
filteredIterator =
|
||||||
ConjunctionUtils.intersectIterators(Arrays.asList(scorerIterator, collectorIterator));
|
ConjunctionUtils.intersectIterators(Arrays.asList(scorerIterator, competitiveIterator));
|
||||||
}
|
}
|
||||||
if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
|
if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
scoreAll(collector, filteredIterator, twoPhase, acceptDocs);
|
scoreAll(collector, filteredIterator, twoPhase, acceptDocs);
|
||||||
|
@ -314,15 +319,15 @@ public abstract class Weight implements SegmentCacheable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Wraps an internal docIdSetIterator for it to start with docID = -1 */
|
/** Wraps an internal docIdSetIterator for it to start with the last visited docID */
|
||||||
protected static class StartDISIWrapper extends DocIdSetIterator {
|
private static class StartDISIWrapper extends DocIdSetIterator {
|
||||||
private final DocIdSetIterator in;
|
private final DocIdSetIterator in;
|
||||||
private final int min;
|
private final int startDocID;
|
||||||
private int docID = -1;
|
private int docID = -1;
|
||||||
|
|
||||||
public StartDISIWrapper(DocIdSetIterator in) {
|
StartDISIWrapper(DocIdSetIterator in) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.min = in.docID();
|
this.startDocID = in.docID();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -337,8 +342,8 @@ public abstract class Weight implements SegmentCacheable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advance(int target) throws IOException {
|
public int advance(int target) throws IOException {
|
||||||
if (target <= min) {
|
if (target <= startDocID) {
|
||||||
return docID = min;
|
return docID = startDocID;
|
||||||
}
|
}
|
||||||
return docID = in.advance(target);
|
return docID = in.advance(target);
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,7 +131,7 @@ public class DocComparator extends FieldComparator<Integer> {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new DocIdSetIterator() {
|
return new DocIdSetIterator() {
|
||||||
private int docID = -1;
|
private int docID = competitiveIterator.docID();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public int nextDoc() throws IOException {
|
||||||
|
|
|
@ -273,7 +273,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
|
||||||
public DocIdSetIterator competitiveIterator() {
|
public DocIdSetIterator competitiveIterator() {
|
||||||
if (enableSkipping == false) return null;
|
if (enableSkipping == false) return null;
|
||||||
return new DocIdSetIterator() {
|
return new DocIdSetIterator() {
|
||||||
private int docID = -1;
|
private int docID = competitiveIterator.docID();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public int nextDoc() throws IOException {
|
||||||
|
|
|
@ -58,7 +58,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
final IndexReader reader = DirectoryReader.open(writer);
|
final IndexReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||||
final Sort sort = new Sort(sortField);
|
final Sort sort = new Sort(sortField);
|
||||||
final int numHits = 3;
|
final int numHits = 3;
|
||||||
|
@ -143,7 +143,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
final IndexReader reader = DirectoryReader.open(writer);
|
final IndexReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
final SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||||
final Sort sort = new Sort(sortField);
|
final Sort sort = new Sort(sortField);
|
||||||
final int numHits = 3;
|
final int numHits = 3;
|
||||||
|
@ -182,7 +182,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
final IndexReader reader = DirectoryReader.open(writer);
|
final IndexReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
final int numHits = 3;
|
final int numHits = 3;
|
||||||
final int totalHitsThreshold = 3;
|
final int totalHitsThreshold = 3;
|
||||||
|
|
||||||
|
@ -220,7 +220,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
public void testSortOptimizationEqualValues() throws IOException {
|
public void testSortOptimizationEqualValues() throws IOException {
|
||||||
final Directory dir = newDirectory();
|
final Directory dir = newDirectory();
|
||||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||||
final int numDocs = atLeast(10000);
|
final int numDocs = atLeast(TEST_NIGHTLY ? 50_000 : 10_000);
|
||||||
for (int i = 1; i <= numDocs; ++i) {
|
for (int i = 1; i <= numDocs; ++i) {
|
||||||
final Document doc = new Document();
|
final Document doc = new Document();
|
||||||
doc.add(
|
doc.add(
|
||||||
|
@ -234,7 +234,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
final IndexReader reader = DirectoryReader.open(writer);
|
final IndexReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
final int numHits = 3;
|
final int numHits = 3;
|
||||||
final int totalHitsThreshold = 3;
|
final int totalHitsThreshold = 3;
|
||||||
|
|
||||||
|
@ -313,7 +313,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
final IndexReader reader = DirectoryReader.open(writer);
|
final IndexReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
|
final SortField sortField = new SortField("my_field", SortField.Type.FLOAT);
|
||||||
final Sort sort = new Sort(sortField);
|
final Sort sort = new Sort(sortField);
|
||||||
final int numHits = 3;
|
final int numHits = 3;
|
||||||
|
@ -661,7 +661,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
IndexReader reader = DirectoryReader.open(writer);
|
IndexReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
SortField sortField = new SortField("my_field", SortField.Type.LONG);
|
||||||
TopFieldDocs topDocs =
|
TopFieldDocs topDocs =
|
||||||
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
|
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
|
||||||
|
@ -706,7 +706,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
||||||
seqNos.sort(Long::compare);
|
seqNos.sort(Long::compare);
|
||||||
IndexReader reader = DirectoryReader.open(writer);
|
IndexReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
SortField sortField = new SortField("seq_no", SortField.Type.LONG);
|
SortField sortField = new SortField("seq_no", SortField.Type.LONG);
|
||||||
int visitedHits = 0;
|
int visitedHits = 0;
|
||||||
ScoreDoc after = null;
|
ScoreDoc after = null;
|
||||||
|
|
|
@ -80,8 +80,22 @@ final class AssertingBulkScorer extends BulkScorer {
|
||||||
assert min <= max : "max must be greater than min, got min=" + min + ", and max=" + max;
|
assert min <= max : "max must be greater than min, got min=" + min + ", and max=" + max;
|
||||||
this.max = max;
|
this.max = max;
|
||||||
collector = new AssertingLeafCollector(collector, min, max);
|
collector = new AssertingLeafCollector(collector, min, max);
|
||||||
final int next = in.score(collector, acceptDocs, min, max);
|
int next = min;
|
||||||
assert next >= max;
|
while (next < max) {
|
||||||
|
final int upTo;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
upTo = max;
|
||||||
|
} else {
|
||||||
|
final int interval;
|
||||||
|
if (random.nextInt(100) <= 5) {
|
||||||
|
interval = 1 + random.nextInt(10);
|
||||||
|
} else {
|
||||||
|
interval = 1 + random.nextInt(random.nextBoolean() ? 100 : 5000);
|
||||||
|
}
|
||||||
|
upTo = Math.toIntExact(Math.min(min + interval, max));
|
||||||
|
}
|
||||||
|
next = in.score(collector, acceptDocs, min, upTo);
|
||||||
|
}
|
||||||
if (max >= maxDoc || next >= maxDoc) {
|
if (max >= maxDoc || next >= maxDoc) {
|
||||||
assert next == DocIdSetIterator.NO_MORE_DOCS;
|
assert next == DocIdSetIterator.NO_MORE_DOCS;
|
||||||
return DocIdSetIterator.NO_MORE_DOCS;
|
return DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
|
Loading…
Reference in New Issue