LUCENE-9565 Fix competitive iteration (#1952)

PR #1351 introduced a sort optimization where documents can be skipped.
But iteration over competitive iterators was not properly organized,
as they were not storing the current docID, and
when competitive iterator was updated the current doc ID was lost.

This patch fixed it.

Relates to #1351
This commit is contained in:
Mayya Sharipova 2020-10-06 13:22:16 -04:00 committed by GitHub
parent 6ac94a6f9f
commit 874c446ab9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 9 deletions

View File

@ -204,9 +204,17 @@ public abstract class Weight implements SegmentCacheable {
collector.setScorer(scorer);
DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
DocIdSetIterator collectorIterator = collector.competitiveIterator();
// if possible filter scorerIterator to keep only competitive docs as defined by collector
DocIdSetIterator filteredIterator = collectorIterator == null ? scorerIterator :
ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator));
DocIdSetIterator filteredIterator;
if (collectorIterator == null) {
filteredIterator = scorerIterator;
} else {
if (scorerIterator.docID() != -1) {
// Wrap ScorerIterator to start from -1 for conjunction
scorerIterator = new RangeDISIWrapper(scorerIterator, max);
}
// filter scorerIterator to keep only competitive docs as defined by collector
filteredIterator = ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator));
}
if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
scoreAll(collector, filteredIterator, twoPhase, acceptDocs);
return DocIdSetIterator.NO_MORE_DOCS;
@ -266,4 +274,45 @@ public abstract class Weight implements SegmentCacheable {
}
}
/**
* Wraps an internal docIdSetIterator for it to start with docID = -1
*/
protected static class RangeDISIWrapper extends DocIdSetIterator {
private final DocIdSetIterator in;
private final int min;
private final int max;
private int docID = -1;
public RangeDISIWrapper(DocIdSetIterator in, int max) {
this.in = in;
this.min = in.docID();
this.max = max;
}
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
return advance(docID + 1);
}
@Override
public int advance(int target) throws IOException {
target = Math.max(min, target);
if (target >= max) {
return docID = NO_MORE_DOCS;
}
return docID = in.advance(target);
}
@Override
public long cost() {
return Math.min(max - min, in.cost());
}
}
}

View File

@ -133,14 +133,16 @@ public class DocComparator extends FieldComparator<Integer> {
return null;
} else {
return new DocIdSetIterator() {
private int docID = -1;
@Override
public int nextDoc() throws IOException {
return competitiveIterator.nextDoc();
return advance(docID + 1);
}
@Override
public int docID() {
return competitiveIterator.docID();
return docID;
}
@Override
@ -150,7 +152,7 @@ public class DocComparator extends FieldComparator<Integer> {
@Override
public int advance(int target) throws IOException {
return competitiveIterator.advance(target);
return docID = competitiveIterator.advance(target);
}
};
}

View File

@ -220,14 +220,16 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
public DocIdSetIterator competitiveIterator() {
if (enableSkipping == false) return null;
return new DocIdSetIterator() {
private int docID = -1;
@Override
public int nextDoc() throws IOException {
return competitiveIterator.nextDoc();
return advance(docID + 1);
}
@Override
public int docID() {
return competitiveIterator.docID();
return docID;
}
@Override
@ -237,7 +239,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
@Override
public int advance(int target) throws IOException {
return competitiveIterator.advance(target);
return docID = competitiveIterator.advance(target);
}
};
}