From 874c446ab945aded465d12f01085af89b83563c6 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 6 Oct 2020 13:22:16 -0400 Subject: [PATCH] LUCENE-9565 Fix competitive iteration (#1952) PR #1351 introduced a sort optimization where documents can be skipped. But iteration over competitive iterators was not properly organized, as they were not storing the current docID, and when competitive iterator was updated the current doc ID was lost. This patch fixed it. Relates to #1351 --- .../java/org/apache/lucene/search/Weight.java | 55 ++++++++++++++++++- .../search/comparators/DocComparator.java | 8 ++- .../search/comparators/NumericComparator.java | 8 ++- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 772093baad7..ba3755a5f4e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -204,9 +204,17 @@ public abstract class Weight implements SegmentCacheable { collector.setScorer(scorer); DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation(); DocIdSetIterator collectorIterator = collector.competitiveIterator(); - // if possible filter scorerIterator to keep only competitive docs as defined by collector - DocIdSetIterator filteredIterator = collectorIterator == null ? scorerIterator : - ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator)); + DocIdSetIterator filteredIterator; + if (collectorIterator == null) { + filteredIterator = scorerIterator; + } else { + if (scorerIterator.docID() != -1) { + // Wrap ScorerIterator to start from -1 for conjunction + scorerIterator = new RangeDISIWrapper(scorerIterator, max); + } + // filter scorerIterator to keep only competitive docs as defined by collector + filteredIterator = ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator)); + } if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) { scoreAll(collector, filteredIterator, twoPhase, acceptDocs); return DocIdSetIterator.NO_MORE_DOCS; @@ -266,4 +274,45 @@ public abstract class Weight implements SegmentCacheable { } } + /** + * Wraps an internal docIdSetIterator for it to start with docID = -1 + */ + protected static class RangeDISIWrapper extends DocIdSetIterator { + private final DocIdSetIterator in; + private final int min; + private final int max; + private int docID = -1; + + public RangeDISIWrapper(DocIdSetIterator in, int max) { + this.in = in; + this.min = in.docID(); + this.max = max; + } + + @Override + public int docID() { + return docID; + } + + @Override + public int nextDoc() throws IOException { + return advance(docID + 1); + } + + @Override + public int advance(int target) throws IOException { + target = Math.max(min, target); + if (target >= max) { + return docID = NO_MORE_DOCS; + } + return docID = in.advance(target); + } + + @Override + public long cost() { + return Math.min(max - min, in.cost()); + } + + } + } diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java index 8974ca69c40..c0b3e2efdbd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java @@ -133,14 +133,16 @@ public class DocComparator extends FieldComparator { return null; } else { return new DocIdSetIterator() { + private int docID = -1; + @Override public int nextDoc() throws IOException { - return competitiveIterator.nextDoc(); + return advance(docID + 1); } @Override public int docID() { - return competitiveIterator.docID(); + return docID; } @Override @@ -150,7 +152,7 @@ public class DocComparator extends FieldComparator { @Override public int advance(int target) throws IOException { - return competitiveIterator.advance(target); + return docID = competitiveIterator.advance(target); } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index dc166f71a8a..fa5f2679cd3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -220,14 +220,16 @@ public abstract class NumericComparator extends FieldComparato public DocIdSetIterator competitiveIterator() { if (enableSkipping == false) return null; return new DocIdSetIterator() { + private int docID = -1; + @Override public int nextDoc() throws IOException { - return competitiveIterator.nextDoc(); + return advance(docID + 1); } @Override public int docID() { - return competitiveIterator.docID(); + return docID; } @Override @@ -237,7 +239,7 @@ public abstract class NumericComparator extends FieldComparato @Override public int advance(int target) throws IOException { - return competitiveIterator.advance(target); + return docID = competitiveIterator.advance(target); } }; }