Remove scoreAll() optimization from DefaultBulkScorer.

I cannot see benefits from this optimization anymore when running luceneutil. However, I do see some benefits from specializing cases when the collector produces a competitive iterator or when the scorer produces a two-phase iterator.
2024-12-04 16:54:31 +01:00 · 2024-12-04 16:54:31 +01:00 · dadd548936
parent 6c48b404cd
commit dadd548936
1 changed files with 89 additions and 83 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@ -234,12 +234,13 @@ public abstract class Weight implements SegmentCacheable {
    /** Sole constructor. */
    public DefaultBulkScorer(Scorer scorer) {
-      if (scorer == null) {
+      this.scorer = Objects.requireNonNull(scorer);
        throw new NullPointerException();
      }
      this.scorer = scorer;
      this.iterator = scorer.iterator();
      this.twoPhase = scorer.twoPhaseIterator();
      if (twoPhase == null) {
        this.iterator = scorer.iterator();
      } else {
        this.iterator = twoPhase.approximation();
      }
    }
    @Override
@ -251,36 +252,8 @@ public abstract class Weight implements SegmentCacheable {
    public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
        throws IOException {
      collector.setScorer(scorer);
      DocIdSetIterator scorerIterator = twoPhase == null ? iterator : twoPhase.approximation();
      DocIdSetIterator competitiveIterator = collector.competitiveIterator();
      if (competitiveIterator == null
          && scorerIterator.docID() == -1
          && min == 0
          && max == DocIdSetIterator.NO_MORE_DOCS) {
        scoreAll(collector, scorerIterator, twoPhase, acceptDocs);
        return DocIdSetIterator.NO_MORE_DOCS;
      } else {
        return scoreRange(
            collector, scorerIterator, twoPhase, competitiveIterator, acceptDocs, min, max);
      }
    }
    /**
     * Specialized method to bulk-score a range of hits; we separate this from {@link #scoreAll} to
     * help out hotspot. See <a
     * href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a>
     */
    static int scoreRange(
        LeafCollector collector,
        DocIdSetIterator iterator,
        TwoPhaseIterator twoPhase,
        DocIdSetIterator competitiveIterator,
        Bits acceptDocs,
        int min,
        int max)
        throws IOException {
      if (competitiveIterator != null) {
        if (competitiveIterator.docID() > min) {
          min = competitiveIterator.docID();
@ -289,75 +262,108 @@ public abstract class Weight implements SegmentCacheable {
        }
      }
-      int doc = iterator.docID();
+      if (iterator.docID() < min) {
-      if (doc < min) {
+        if (iterator.docID() == min - 1) {
-        if (doc == min - 1) {
+          iterator.nextDoc();
          doc = iterator.nextDoc();
        } else {
-          doc = iterator.advance(min);
+          iterator.advance(min);
        }
      }
      // These various specializations help save some null checks in a hot loop, but as importantly
      // if not more importantly, they help reduce the polymorphism of calls sites to nextDoc() and
      // collect() because only a subset of collectors produce a competitive iterator, and the set
      // of implementing classes for two-phase approximations is smaller than the set of doc id set
      // iterator implementations.
      if (twoPhase == null && competitiveIterator == null) {
        // Optimize simple iterators with collectors that can't skip
-        while (doc < max) {
+        scoreIterator(collector, acceptDocs, iterator, max);
-          if (acceptDocs == null || acceptDocs.get(doc)) {
+      } else if (competitiveIterator == null) {
-            collector.collect(doc);
+        scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max);
-          }
+      } else if (twoPhase == null) {
-          doc = iterator.nextDoc();
+        scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max);
        }
      } else {
-        while (doc < max) {
+        scoreTwoPhaseOrCompetitiveIterator(
-          if (competitiveIterator != null) {
+            collector, acceptDocs, iterator, twoPhase, competitiveIterator, max);
            assert competitiveIterator.docID() <= doc;
            if (competitiveIterator.docID() < doc) {
              competitiveIterator.advance(doc);
            }
            if (competitiveIterator.docID() != doc) {
              doc = iterator.advance(competitiveIterator.docID());
              continue;
            }
          }
          if ((acceptDocs == null || acceptDocs.get(doc))
              && (twoPhase == null || twoPhase.matches())) {
            collector.collect(doc);
          }
          doc = iterator.nextDoc();
        }
      }
-      return doc;
+      return iterator.docID();
    }
-    /**
+    private static void scoreIterator(
-     * Specialized method to bulk-score all hits; we separate this from {@link #scoreRange} to help
+        LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max)
-     * out hotspot. See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a>
+        throws IOException {
-     */
+      for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
-    static void scoreAll(
+        if (acceptDocs == null || acceptDocs.get(doc)) {
          collector.collect(doc);
        }
      }
    }
    private static void scoreTwoPhaseIterator(
        LeafCollector collector,
        Bits acceptDocs,
        DocIdSetIterator iterator,
        TwoPhaseIterator twoPhase,
-        Bits acceptDocs)
+        int max)
        throws IOException {
-      if (twoPhase == null) {
+      for (int doc = iterator.docID(); doc < max; ) {
-        for (int doc = iterator.nextDoc();
+        if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
-            doc != DocIdSetIterator.NO_MORE_DOCS;
+          collector.collect(doc);
-            doc = iterator.nextDoc()) {
+        }
-          if (acceptDocs == null || acceptDocs.get(doc)) {
+
-            collector.collect(doc);
+        doc = iterator.nextDoc();
      }
    }
    private static void scoreCompetitiveIterator(
        LeafCollector collector,
        Bits acceptDocs,
        DocIdSetIterator iterator,
        DocIdSetIterator competitiveIterator,
        int max)
        throws IOException {
      for (int doc = iterator.docID(); doc < max; ) {
        assert competitiveIterator.docID() <= doc; // invariant
        if (competitiveIterator.docID() < doc) {
          int competitiveNext = competitiveIterator.advance(doc);
          if (competitiveNext != doc) {
            doc = iterator.advance(competitiveNext);
            continue;
          }
        }
-      } else {
+
-        // The scorer has an approximation, so run the approximation first, then check acceptDocs,
+        if ((acceptDocs == null || acceptDocs.get(doc))) {
-        // then confirm
+          collector.collect(doc);
-        for (int doc = iterator.nextDoc();
+        }
-            doc != DocIdSetIterator.NO_MORE_DOCS;
+
-            doc = iterator.nextDoc()) {
+        doc = iterator.nextDoc();
-          if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
+      }
-            collector.collect(doc);
+    }
    private static void scoreTwoPhaseOrCompetitiveIterator(
        LeafCollector collector,
        Bits acceptDocs,
        DocIdSetIterator iterator,
        TwoPhaseIterator twoPhase,
        DocIdSetIterator competitiveIterator,
        int max)
        throws IOException {
      for (int doc = iterator.docID(); doc < max; ) {
        assert competitiveIterator.docID() <= doc; // invariant
        if (competitiveIterator.docID() < doc) {
          int competitiveNext = competitiveIterator.advance(doc);
          if (competitiveNext != doc) {
            doc = iterator.advance(competitiveNext);
            continue;
          }
        }
        if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
          collector.collect(doc);
        }
        doc = iterator.nextDoc();
      }
    }
  }