Simplify leaf slice calculation (#13893)

No need to go through the indirection of 2 wrapped functions, just put the logic in plain methods. Also, we can just outright set the field if there's no executor.
2024-11-06 10:36:06 +01:00 · 2024-11-06 10:36:06 +01:00 · a888af76b2
parent 539cf3c9a3
commit a888af76b2
1 changed files with 50 additions and 79 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@ -27,7 +27,6 @@ import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.Executor;
-import java.util.function.Function;
 import java.util.function.Supplier;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@ -115,13 +114,7 @@ public class IndexSearcher {
  protected final IndexReaderContext readerContext;
  protected final List<LeafReaderContext> leafContexts;

-  /**
-   * Used with executor - LeafSlice supplier where each slice holds a set of leafs executed within
-   * one thread. We are caching it instead of creating it eagerly to avoid calling a protected
-   * method from constructor, which is a bad practice. Always non-null, regardless of whether an
-   * executor is provided or not.
-   */
-  private final Supplier<LeafSlice[]> leafSlicesSupplier;
+  private volatile LeafSlice[] leafSlices;

  // Used internally for load balancing threads executing for the query
  private final TaskExecutor taskExecutor;
@ -230,20 +223,18 @@ public class IndexSearcher {
        executor == null ? new TaskExecutor(Runnable::run) : new TaskExecutor(executor);
    this.readerContext = context;
    leafContexts = context.leaves();
-    Function<List<LeafReaderContext>, LeafSlice[]> slicesProvider =
-        executor == null
-            ? leaves ->
-                leaves.isEmpty()
+    if (executor == null) {
+      leafSlices =
+          leafContexts.isEmpty()
              ? new LeafSlice[0]
              : new LeafSlice[] {
                new LeafSlice(
                    new ArrayList<>(
-                              leaves.stream()
+                        leafContexts.stream()
                            .map(LeafReaderContextPartition::createForEntireSegment)
                            .toList()))
+              };
    }
-            : this::slices;
-    leafSlicesSupplier = new CachingLeafSlicesSupplier(slicesProvider, leafContexts);
  }

  /**
@ -540,7 +531,43 @@ public class IndexSearcher {
   * @lucene.experimental
   */
  public final LeafSlice[] getSlices() {
-    return leafSlicesSupplier.get();
+    LeafSlice[] res = leafSlices;
+    if (res == null) {
+      res = computeAndCacheSlices();
+    }
+    return res;
+  }
+
+  private synchronized LeafSlice[] computeAndCacheSlices() {
+    LeafSlice[] res = leafSlices;
+    if (res == null) {
+      res = slices(leafContexts);
+      /*
+       * Enforce that there aren't multiple leaf partitions within the same leaf slice pointing to the
+       * same leaf context. It is a requirement that {@link Collector#getLeafCollector(LeafReaderContext)}
+       * gets called once per leaf context. Also, it does not make sense to partition a segment to then search
+       * those partitions as part of the same slice, because the goal of partitioning is parallel searching
+       * which happens at the slice level.
+       */
+      for (LeafSlice leafSlice : res) {
+        if (leafSlice.partitions.length <= 1) {
+          continue;
+        }
+        enforceDistinctLeaves(leafSlice);
+      }
+      leafSlices = res;
+    }
+    return res;
+  }
+
+  private static void enforceDistinctLeaves(LeafSlice leafSlice) {
+    Set<LeafReaderContext> distinctLeaves = new HashSet<>();
+    for (LeafReaderContextPartition leafPartition : leafSlice.partitions) {
+      if (distinctLeaves.add(leafPartition.ctx) == false) {
+        throw new IllegalStateException(
+            "The same slice targets multiple leaf partitions of the same leaf reader context. A physical segment should rather get partitioned to be searched concurrently from as many slices as the number of leaf partitions it is split into.");
+      }
+    }
  }

  /**
@ -1164,60 +1191,4 @@ public class IndexSearcher {
              + IndexSearcher.getMaxClauseCount());
    }
  }
-
-  /**
-   * Supplier for {@link LeafSlice} slices which computes and caches the value on first invocation
-   * and returns cached value on subsequent invocation. If the passed in provider for slice
-   * computation throws exception then same will be passed to the caller of this supplier on each
-   * invocation. If the provider returns null then {@link NullPointerException} will be thrown to
-   * the caller.
-   *
-   * <p>NOTE: To provide thread safe caching mechanism this class is implementing the (subtle) <a
-   * href="https://shipilev.net/blog/2014/safe-public-construction/">double-checked locking
-   * idiom</a>
-   */
-  private static class CachingLeafSlicesSupplier implements Supplier<LeafSlice[]> {
-    private volatile LeafSlice[] leafSlices;
-
-    private final Function<List<LeafReaderContext>, LeafSlice[]> sliceProvider;
-
-    private final List<LeafReaderContext> leaves;
-
-    private CachingLeafSlicesSupplier(
-        Function<List<LeafReaderContext>, LeafSlice[]> provider, List<LeafReaderContext> leaves) {
-      this.sliceProvider = Objects.requireNonNull(provider, "leaf slice provider cannot be null");
-      this.leaves = Objects.requireNonNull(leaves, "list of LeafReaderContext cannot be null");
-    }
-
-    @Override
-    public LeafSlice[] get() {
-      if (leafSlices == null) {
-        synchronized (this) {
-          if (leafSlices == null) {
-            leafSlices =
-                Objects.requireNonNull(
-                    sliceProvider.apply(leaves), "slices computed by the provider is null");
-            /*
-             * Enforce that there aren't multiple leaf partitions within the same leaf slice pointing to the
-             * same leaf context. It is a requirement that {@link Collector#getLeafCollector(LeafReaderContext)}
-             * gets called once per leaf context. Also, it does not make sense to partition a segment to then search
-             * those partitions as part of the same slice, because the goal of partitioning is parallel searching
-             * which happens at the slice level.
-             */
-            for (LeafSlice leafSlice : leafSlices) {
-              Set<LeafReaderContext> distinctLeaves = new HashSet<>();
-              for (LeafReaderContextPartition leafPartition : leafSlice.partitions) {
-                distinctLeaves.add(leafPartition.ctx);
-              }
-              if (leafSlice.partitions.length != distinctLeaves.size()) {
-                throw new IllegalStateException(
-                    "The same slice targets multiple leaf partitions of the same leaf reader context. A physical segment should rather get partitioned to be searched concurrently from as many slices as the number of leaf partitions it is split into.");
-              }
-            }
-          }
-        }
-      }
-      return leafSlices;
-    }
-  }
 }