diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9f036b0b17d..caff0fdb9d4 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -153,6 +153,9 @@ Improvements * GITHUB#12603: Simplify the TaskExecutor API by exposing a single invokeAll method that takes a collection of callables, executes them and returns their results (Luca Cavanna) +* GITHUB#12606: Create a TaskExecutor when an executor is not provided to the IndexSearcher, in + order to simplify consumer's code (Luca Cavanna) + Optimizations --------------------- * GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary) diff --git a/lucene/core/src/java/org/apache/lucene/index/TermStates.java b/lucene/core/src/java/org/apache/lucene/index/TermStates.java index 7efa8f8ee0c..75c64a907b1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermStates.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermStates.java @@ -98,37 +98,26 @@ public final class TermStates { final TermStates perReaderTermState = new TermStates(needsStats ? null : term, context); if (needsStats) { TaskExecutor taskExecutor = indexSearcher.getTaskExecutor(); - if (taskExecutor != null) { - // build the term states concurrently - List> tasks = new ArrayList<>(context.leaves().size()); - for (LeafReaderContext ctx : context.leaves()) { - tasks.add( - () -> { - TermsEnum termsEnum = loadTermsEnum(ctx, term); - return termsEnum == null - ? null - : new TermStateInfo( - termsEnum.termState(), - ctx.ord, - termsEnum.docFreq(), - termsEnum.totalTermFreq()); - }); - } - List resultInfos = taskExecutor.invokeAll(tasks); - for (TermStateInfo info : resultInfos) { - if (info != null) { - perReaderTermState.register( - info.getState(), info.getOrdinal(), info.getDocFreq(), info.getTotalTermFreq()); - } - } - } else { - // build the term states sequentially - for (final LeafReaderContext ctx : context.leaves()) { - TermsEnum termsEnum = loadTermsEnum(ctx, term); - if (termsEnum != null) { - perReaderTermState.register( - termsEnum.termState(), ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); - } + // build the term states concurrently + List> tasks = new ArrayList<>(context.leaves().size()); + for (LeafReaderContext ctx : context.leaves()) { + tasks.add( + () -> { + TermsEnum termsEnum = loadTermsEnum(ctx, term); + return termsEnum == null + ? null + : new TermStateInfo( + termsEnum.termState(), + ctx.ord, + termsEnum.docFreq(), + termsEnum.totalTermFreq()); + }); + } + List resultInfos = taskExecutor.invokeAll(tasks); + for (TermStateInfo info : resultInfos) { + if (info != null) { + perReaderTermState.register( + info.getState(), info.getOrdinal(), info.getDocFreq(), info.getTotalTermFreq()); } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java index 34b8e318ed1..d81ab2806fe 100644 --- a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java @@ -80,10 +80,12 @@ abstract class AbstractKnnVectorQuery extends Query { } TaskExecutor taskExecutor = indexSearcher.getTaskExecutor(); - TopDocs[] perLeafResults = - (taskExecutor == null) - ? sequentialSearch(reader.leaves(), filterWeight) - : parallelSearch(reader.leaves(), filterWeight, taskExecutor); + List leafReaderContexts = reader.leaves(); + List> tasks = new ArrayList<>(leafReaderContexts.size()); + for (LeafReaderContext context : leafReaderContexts) { + tasks.add(() -> searchLeaf(context, filterWeight)); + } + TopDocs[] perLeafResults = taskExecutor.invokeAll(tasks).toArray(TopDocs[]::new); // Merge sort the results TopDocs topK = TopDocs.merge(k, perLeafResults); @@ -93,25 +95,6 @@ abstract class AbstractKnnVectorQuery extends Query { return createRewrittenQuery(reader, topK); } - private TopDocs[] sequentialSearch( - List leafReaderContexts, Weight filterWeight) throws IOException { - TopDocs[] perLeafResults = new TopDocs[leafReaderContexts.size()]; - for (LeafReaderContext ctx : leafReaderContexts) { - perLeafResults[ctx.ord] = searchLeaf(ctx, filterWeight); - } - return perLeafResults; - } - - private TopDocs[] parallelSearch( - List leafReaderContexts, Weight filterWeight, TaskExecutor taskExecutor) - throws IOException { - List> tasks = new ArrayList<>(leafReaderContexts.size()); - for (LeafReaderContext context : leafReaderContexts) { - tasks.add(() -> searchLeaf(context, filterWeight)); - } - return taskExecutor.invokeAll(tasks).toArray(TopDocs[]::new); - } - private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight) throws IOException { TopDocs results = getLeafResults(ctx, filterWeight); if (ctx.docBase > 0) { diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 5ccedd53531..723ec037c67 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -119,10 +119,7 @@ public class IndexSearcher { * method from constructor, which is a bad practice. This is {@code null} if no executor is * provided */ - private final CachingLeafSlicesSupplier leafSlicesSupplier; - - // These are only used for multi-threaded search - private final Executor executor; + private final Supplier leafSlicesSupplier; // Used internally for load balancing threads executing for the query private final TaskExecutor taskExecutor; @@ -227,12 +224,18 @@ public class IndexSearcher { assert context.isTopLevel : "IndexSearcher's ReaderContext must be topLevel for reader" + context.reader(); reader = context.reader(); - this.executor = executor; - this.taskExecutor = executor == null ? null : new TaskExecutor(executor); + this.taskExecutor = + executor == null ? new TaskExecutor(Runnable::run) : new TaskExecutor(executor); this.readerContext = context; leafContexts = context.leaves(); - leafSlicesSupplier = - (executor == null) ? null : new CachingLeafSlicesSupplier(this::slices, leafContexts); + Function, LeafSlice[]> slicesProvider = + executor == null + ? leaves -> + leaves.size() == 0 + ? new LeafSlice[0] + : new LeafSlice[] {new LeafSlice(new ArrayList<>(leaves))} + : this::slices; + leafSlicesSupplier = new CachingLeafSlicesSupplier(slicesProvider, leafContexts); } /** @@ -421,13 +424,12 @@ public class IndexSearcher { } /** - * Returns the leaf slices used for concurrent searching, or null if no {@code Executor} was - * passed to the constructor. + * Returns the leaf slices used for concurrent searching * * @lucene.experimental */ public LeafSlice[] getSlices() { - return (executor == null) ? null : leafSlicesSupplier.get(); + return leafSlicesSupplier.get(); } /** @@ -457,12 +459,12 @@ public class IndexSearcher { new CollectorManager() { private final HitsThresholdChecker hitsThresholdChecker = - (leafSlices == null || leafSlices.length <= 1) + leafSlices.length <= 1 ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); private final MaxScoreAccumulator minScoreAcc = - (leafSlices == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); + leafSlices.length <= 1 ? null : new MaxScoreAccumulator(); @Override public TopScoreDocCollector newCollector() throws IOException { @@ -602,12 +604,12 @@ public class IndexSearcher { new CollectorManager<>() { private final HitsThresholdChecker hitsThresholdChecker = - (leafSlices == null || leafSlices.length <= 1) + leafSlices.length <= 1 ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); private final MaxScoreAccumulator minScoreAcc = - (leafSlices == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); + leafSlices.length <= 1 ? null : new MaxScoreAccumulator(); @Override public TopFieldCollector newCollector() throws IOException { @@ -653,8 +655,10 @@ public class IndexSearcher { private T search( Weight weight, CollectorManager collectorManager, C firstCollector) throws IOException { final LeafSlice[] leafSlices = getSlices(); - if (leafSlices == null || leafSlices.length == 0) { - search(leafContexts, weight, firstCollector); + if (leafSlices.length == 0) { + // there are no segments, nothing to offload to the executor, but we do need to call reduce to + // create some kind of empty result + assert leafContexts.size() == 0; return collectorManager.reduce(Collections.singletonList(firstCollector)); } else { final List collectors = new ArrayList<>(leafSlices.length); @@ -893,13 +897,7 @@ public class IndexSearcher { @Override public String toString() { - return "IndexSearcher(" - + reader - + "; executor=" - + executor - + "; sliceExecutionControlPlane " - + taskExecutor - + ")"; + return "IndexSearcher(" + reader + "; taskExecutor=" + taskExecutor + ")"; } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java b/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java index b1dbc27a280..8e1f6c3fac8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java +++ b/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java @@ -110,4 +110,9 @@ public final class TaskExecutor { } } } + + @Override + public String toString() { + return "TaskExecutor(" + "executor=" + executor + ')'; + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java b/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java index eb3ea7c0013..3cd2ecbdd19 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestIndexSearcher.java @@ -42,7 +42,6 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.NamedThreadFactory; -import org.junit.Test; public class TestIndexSearcher extends LuceneTestCase { Directory dir; @@ -115,7 +114,6 @@ public class TestIndexSearcher extends LuceneTestCase { TestUtil.shutdownExecutorService(service); } - @Test public void testSearchAfterPassedMaxDoc() throws Exception { // LUCENE-5128: ensure we get a meaningful message if searchAfter exceeds maxDoc Directory dir = newDirectory(); @@ -221,30 +219,50 @@ public class TestIndexSearcher extends LuceneTestCase { assertEquals(dummyPolicy, searcher.getQueryCachingPolicy()); } - public void testGetSlices() throws Exception { - assertNull(new IndexSearcher(new MultiReader()).getSlices()); + public void testGetSlicesNoLeavesNoExecutor() throws IOException { + IndexSearcher.LeafSlice[] slices = new IndexSearcher(new MultiReader()).getSlices(); + assertEquals(0, slices.length); + } + public void testGetSlicesNoLeavesWithExecutor() throws IOException { + IndexSearcher.LeafSlice[] slices = + new IndexSearcher(new MultiReader(), Runnable::run).getSlices(); + assertEquals(0, slices.length); + } + + public void testGetSlices() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); - w.addDocument(new Document()); + for (int i = 0; i < 10; i++) { + w.addDocument(new Document()); + // manually flush, so we get to create multiple segments almost all the times, as well as + // multiple slices + w.flush(); + } IndexReader r = w.getReader(); w.close(); - ExecutorService service = - new ThreadPoolExecutor( - 4, - 4, - 0L, - TimeUnit.MILLISECONDS, - new LinkedBlockingQueue(), - new NamedThreadFactory("TestIndexSearcher")); - IndexSearcher s = new IndexSearcher(r, service); - IndexSearcher.LeafSlice[] slices = s.getSlices(); - assertNotNull(slices); - assertEquals(1, slices.length); - assertEquals(1, slices[0].leaves.length); - assertTrue(slices[0].leaves[0] == r.leaves().get(0)); - service.shutdown(); + { + // without executor + IndexSearcher.LeafSlice[] slices = new IndexSearcher(r).getSlices(); + assertEquals(1, slices.length); + assertEquals(r.leaves().size(), slices[0].leaves.length); + } + { + // force creation of multiple slices, and provide an executor + IndexSearcher searcher = + new IndexSearcher(r, Runnable::run) { + @Override + protected LeafSlice[] slices(List leaves) { + return slices(leaves, 1, 1); + } + }; + IndexSearcher.LeafSlice[] slices = searcher.getSlices(); + for (IndexSearcher.LeafSlice slice : slices) { + assertEquals(1, slice.leaves.length); + } + assertEquals(r.leaves().size(), slices.length); + } IOUtils.close(r, dir); } @@ -270,4 +288,9 @@ public class TestIndexSearcher extends LuceneTestCase { searcher.search(new MatchAllDocsQuery(), 10); assertEquals(leaves.size(), numExecutions.get()); } + + public void testNullExecutorNonNullTaskExecutor() { + IndexSearcher indexSearcher = new IndexSearcher(reader); + assertNotNull(indexSearcher.getTaskExecutor()); + } }