From 223eecca3389e731480cb48262edc0ae83e0bfdf Mon Sep 17 00:00:00 2001 From: Sorabh Date: Fri, 30 Jun 2023 05:57:34 -0700 Subject: [PATCH] Add a thread safe CachingLeafSlicesSupplier to compute and cache the LeafSlices used with concurrent segment (#12374) search. It uses the protected method `slices` by default to compute the slices which can be overriden by the sub classes of IndexSearcher --- lucene/CHANGES.txt | 2 +- .../apache/lucene/search/IndexSearcher.java | 69 ++++++++++++++++--- 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5cd2c70352c..42ea9730f0d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -126,7 +126,7 @@ New Features Improvements --------------------- -(No changes) +* GITHUB#12374: Add CachingLeafSlicesSupplier to compute the LeafSlices for concurrent segment search (Sorabh Hamirwasia) Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index ca6116a398b..f7611b4ef23 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -28,6 +28,7 @@ import java.util.concurrent.Executor; import java.util.concurrent.FutureTask; import java.util.concurrent.RunnableFuture; import java.util.concurrent.ThreadPoolExecutor; +import java.util.function.Function; import java.util.function.Supplier; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -114,8 +115,13 @@ public class IndexSearcher { protected final IndexReaderContext readerContext; protected final List leafContexts; - /** used with executor - each slice holds a set of leafs executed within one thread */ - private final LeafSlice[] leafSlices; + /** + * used with executor - LeafSlice supplier where each slice holds a set of leafs executed within + * one thread. We are caching it instead of creating it eagerly to avoid calling a protected + * method from constructor, which is a bad practice. This is {@code null} if no executor is + * provided + */ + private final CachingLeafSlicesSupplier leafSlicesSupplier; // These are only used for multi-threaded search private final Executor executor; @@ -234,7 +240,8 @@ public class IndexSearcher { this.taskExecutor = taskExecutor; this.readerContext = context; leafContexts = context.leaves(); - this.leafSlices = executor == null ? null : slices(leafContexts); + leafSlicesSupplier = + (executor == null) ? null : new CachingLeafSlicesSupplier(this::slices, leafContexts); } /** @@ -429,7 +436,7 @@ public class IndexSearcher { * @lucene.experimental */ public LeafSlice[] getSlices() { - return leafSlices; + return (executor == null) ? null : leafSlicesSupplier.get(); } /** @@ -454,16 +461,17 @@ public class IndexSearcher { final int cappedNumHits = Math.min(numHits, limit); + final LeafSlice[] leafSlices = getSlices(); final CollectorManager manager = new CollectorManager() { private final HitsThresholdChecker hitsThresholdChecker = - (executor == null || leafSlices.length <= 1) + (leafSlices == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); private final MaxScoreAccumulator minScoreAcc = - (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); + (leafSlices == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); @Override public TopScoreDocCollector newCollector() throws IOException { @@ -597,17 +605,18 @@ public class IndexSearcher { } final int cappedNumHits = Math.min(numHits, limit); final Sort rewrittenSort = sort.rewrite(this); + final LeafSlice[] leafSlices = getSlices(); final CollectorManager manager = new CollectorManager<>() { private final HitsThresholdChecker hitsThresholdChecker = - (executor == null || leafSlices.length <= 1) + (leafSlices == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); private final MaxScoreAccumulator minScoreAcc = - (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); + (leafSlices == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); @Override public TopFieldCollector newCollector() throws IOException { @@ -637,7 +646,7 @@ public class IndexSearcher { /** * Lower-level search API. Search all leaves using the given {@link CollectorManager}. In contrast * to {@link #search(Query, Collector)}, this method will use the searcher's {@link Executor} in - * order to parallelize execution of the collection on the configured {@link #leafSlices}. + * order to parallelize execution of the collection on the configured {@link #getSlices()}. * * @see CollectorManager * @lucene.experimental @@ -652,7 +661,8 @@ public class IndexSearcher { private T search( Weight weight, CollectorManager collectorManager, C firstCollector) throws IOException { - if (executor == null || leafSlices.length <= 1) { + final LeafSlice[] leafSlices = getSlices(); + if (leafSlices == null || leafSlices.length <= 1) { search(leafContexts, weight, firstCollector); return collectorManager.reduce(Collections.singletonList(firstCollector)); } else { @@ -1001,4 +1011,43 @@ public class IndexSearcher { return new TaskExecutor(executor); } + + /** + * Supplier for {@link LeafSlice} slices which computes and caches the value on first invocation + * and returns cached value on subsequent invocation. If the passed in provider for slice + * computation throws exception then same will be passed to the caller of this supplier on each + * invocation. If the provider returns null then {@link NullPointerException} will be thrown to + * the caller. + * + *

NOTE: To provide thread safe caching mechanism this class is implementing the (subtle) double-checked locking + * idiom + */ + private static class CachingLeafSlicesSupplier implements Supplier { + private volatile LeafSlice[] leafSlices; + + private final Function, LeafSlice[]> sliceProvider; + + private final List leaves; + + private CachingLeafSlicesSupplier( + Function, LeafSlice[]> provider, List leaves) { + this.sliceProvider = Objects.requireNonNull(provider, "leaf slice provider cannot be null"); + this.leaves = Objects.requireNonNull(leaves, "list of LeafReaderContext cannot be null"); + } + + @Override + public LeafSlice[] get() { + if (leafSlices == null) { + synchronized (this) { + if (leafSlices == null) { + leafSlices = + Objects.requireNonNull( + sliceProvider.apply(leaves), "slices computed by the provider is null"); + } + } + } + return leafSlices; + } + } }