Introduce IndexSearcher#searchLeaf(LeafReaderContext, Weight, Collector) method (#13603)

There's a couple of places in the codebase where we extend `IndexSearcher` to customize per leaf behaviour, and in order to do that, we need to override the entire search method that loops through the leaves. A good example is `ScorerIndexSearcher`. Adding a `searchLeaf` method that provides the per leaf behaviour makes those cases a little easier to deal with.
2024-07-30 17:17:27 +02:00 · 2024-07-30 17:17:27 +02:00 · 30c965ea57
parent 68aa629f6c
commit 30c965ea57
5 changed files with 73 additions and 56 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -259,6 +259,10 @@ API Changes
 * GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov)
 * GITHUB#13603: Introduced `IndexSearcher#searchLeaf(LeafReaderContext, Weight, Collector)` protected method to
  facilitate customizing per-leaf behavior of search without requiring to override
  `search(LeafReaderContext[], Weight, Collector)` which requires overriding the entire loop across the leaves (Luca Cavanna)
 New Features
 ---------------------
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@ -674,7 +674,8 @@ public class IndexSearcher {
  /**
   * Lower-level search API.
   *
-   * <p>{@link LeafCollector#collect(int)} is called for every document. <br>
+   * <p>{@link #searchLeaf(LeafReaderContext, Weight, Collector)} is called for every leaf
   * partition. <br>
   *
   * <p>NOTE: this method executes the searches on all given leaves exclusively. To search across
   * all the searchers leaves use {@link #leafContexts}.
@ -694,40 +695,56 @@ public class IndexSearcher {
    // threaded...? the Collector could be sync'd?
    // always use single thread:
    for (LeafReaderContext ctx : leaves) { // search each subreader
-      final LeafCollector leafCollector;
+      searchLeaf(ctx, weight, collector);
    }
  }
  /**
   * Lower-level search API
   *
   * <p>{@link LeafCollector#collect(int)} is called for every document. <br>
   *
   * @param ctx the leaf to execute the search against
   * @param weight to match document
   * @param collector to receive hits
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  protected void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector)
      throws IOException {
    final LeafCollector leafCollector;
    try {
      leafCollector = collector.getLeafCollector(ctx);
    } catch (
        @SuppressWarnings("unused")
        CollectionTerminatedException e) {
      // there is no doc of interest in this reader context
      // continue with the following leaf
      return;
    }
    ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx);
    if (scorerSupplier != null) {
      scorerSupplier.setTopLevelScoringClause();
      BulkScorer scorer = scorerSupplier.bulkScorer();
      if (queryTimeout != null) {
        scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
      }
      try {
-        leafCollector = collector.getLeafCollector(ctx);
+        scorer.score(leafCollector, ctx.reader().getLiveDocs());
      } catch (
          @SuppressWarnings("unused")
          CollectionTerminatedException e) {
-        // there is no doc of interest in this reader context
+        // collection was terminated prematurely
        // continue with the following leaf
-        continue;
+      } catch (
          @SuppressWarnings("unused")
          TimeLimitingBulkScorer.TimeExceededException e) {
        partialResult = true;
      }
      ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx);
      if (scorerSupplier != null) {
        scorerSupplier.setTopLevelScoringClause();
        BulkScorer scorer = scorerSupplier.bulkScorer();
        if (queryTimeout != null) {
          scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
        }
        try {
          scorer.score(leafCollector, ctx.reader().getLiveDocs());
        } catch (
            @SuppressWarnings("unused")
            CollectionTerminatedException e) {
          // collection was terminated prematurely
          // continue with the following leaf
        } catch (
            @SuppressWarnings("unused")
            TimeLimitingBulkScorer.TimeExceededException e) {
          partialResult = true;
        }
      }
      // Note: this is called if collection ran successfully, including the above special cases of
      // CollectionTerminatedException and TimeExceededException, but no other exception.
      leafCollector.finish();
    }
    // Note: this is called if collection ran successfully, including the above special cases of
    // CollectionTerminatedException and TimeExceededException, but no other exception.
    leafCollector.finish();
  }
  /**
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
@ -43,27 +43,27 @@ import org.apache.lucene.util.BytesRef;
 public class TestTopDocsMerge extends LuceneTestCase {
  private static class ShardSearcher extends IndexSearcher {
-    private final List<LeafReaderContext> ctx;
+    private final LeafReaderContext ctx;
    public ShardSearcher(LeafReaderContext ctx, IndexReaderContext parent) {
      super(parent);
-      this.ctx = Collections.singletonList(ctx);
+      this.ctx = ctx;
    }
    public void search(Weight weight, Collector collector) throws IOException {
-      search(ctx, weight, collector);
+      searchLeaf(ctx, weight, collector);
    }
    public TopDocs search(Weight weight, int topN) throws IOException {
      TopScoreDocCollector collector =
          new TopScoreDocCollectorManager(topN, null, Integer.MAX_VALUE, false).newCollector();
-      search(ctx, weight, collector);
+      searchLeaf(ctx, weight, collector);
      return collector.topDocs();
    }
    @Override
    public String toString() {
-      return "ShardSearcher(" + ctx.get(0) + ")";
+      return "ShardSearcher(" + ctx + ")";
    }
  }
--- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
@ -1571,20 +1571,20 @@ public class TestGrouping extends LuceneTestCase {
  }
  private static class ShardSearcher extends IndexSearcher {
-    private final List<LeafReaderContext> ctx;
+    private final LeafReaderContext ctx;
    public ShardSearcher(LeafReaderContext ctx, IndexReaderContext parent) {
      super(parent);
-      this.ctx = Collections.singletonList(ctx);
+      this.ctx = ctx;
    }
    public void search(Weight weight, Collector collector) throws IOException {
-      search(ctx, weight, collector);
+      searchLeaf(ctx, weight, collector);
    }
    @Override
    public String toString() {
-      return "ShardSearcher(" + ctx.get(0).reader() + ")";
+      return "ShardSearcher(" + ctx.reader() + ")";
    }
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/search/ScorerIndexSearcher.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/search/ScorerIndexSearcher.java
@ -17,7 +17,6 @@
 package org.apache.lucene.tests.search;
 import java.io.IOException;
 import java.util.List;
 import java.util.concurrent.Executor;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
@ -53,25 +52,22 @@ public class ScorerIndexSearcher extends IndexSearcher {
  }
  @Override
-  protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector)
+  protected void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector)
      throws IOException {
-    collector.setWeight(weight);
+    // we force the use of Scorer (not BulkScorer) to make sure
-    for (LeafReaderContext ctx : leaves) { // search each subreader
+    // that the scorer passed to LeafCollector.setScorer supports
-      // we force the use of Scorer (not BulkScorer) to make sure
+    // Scorer.getChildren
-      // that the scorer passed to LeafCollector.setScorer supports
+    Scorer scorer = weight.scorer(ctx);
-      // Scorer.getChildren
+    if (scorer != null) {
-      Scorer scorer = weight.scorer(ctx);
+      final DocIdSetIterator iterator = scorer.iterator();
-      if (scorer != null) {
+      final LeafCollector leafCollector = collector.getLeafCollector(ctx);
-        final DocIdSetIterator iterator = scorer.iterator();
+      leafCollector.setScorer(scorer);
-        final LeafCollector leafCollector = collector.getLeafCollector(ctx);
+      final Bits liveDocs = ctx.reader().getLiveDocs();
-        leafCollector.setScorer(scorer);
+      for (int doc = iterator.nextDoc();
-        final Bits liveDocs = ctx.reader().getLiveDocs();
+          doc != DocIdSetIterator.NO_MORE_DOCS;
-        for (int doc = iterator.nextDoc();
+          doc = iterator.nextDoc()) {
-            doc != DocIdSetIterator.NO_MORE_DOCS;
+        if (liveDocs == null || liveDocs.get(doc)) {
-            doc = iterator.nextDoc()) {
+          leafCollector.collect(doc);
          if (liveDocs == null || liveDocs.get(doc)) {
            leafCollector.collect(doc);
          }
        }
      }
    }