LUCENE-8865: Move to executor in IndexSearcher (#731)

In order to simplify testing this change moves to use the Executor
interface instead of ExecutorService. This change also simplifies
customizing execute methods for use-cases that need to add additional
logic for forking to new threads. This change also adds a test for
the optimization added in LUCENE-8865.

This change is fully backwards compatible since ExecutorService implements
Executor.
This commit is contained in:
Simon Willnauer 2019-06-20 14:26:40 +02:00 committed by GitHub
parent 0aa6b11ae2
commit c6899fc40d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 55 additions and 22 deletions

View File

@ -33,6 +33,12 @@ Other
======================= Lucene 8.2.0 =======================
API Changes
* LUCENE-8865: IndexSearcher now uses Executor instead of ExecutorSerivce.
This change is fully backwards compatible since ExecutorService directly
implements Executor. (Simon Willnauer)
New Features
* LUCENE-8815: Provide a DoubleValues implementation for retrieving the value of features without

View File

@ -28,8 +28,9 @@ import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
@ -118,7 +119,7 @@ public class IndexSearcher {
private final LeafSlice[] leafSlices;
// These are only used for multi-threaded search
private final ExecutorService executor;
private final Executor executor;
// the default Similarity
private static final Similarity defaultSimilarity = new BM25Similarity();
@ -178,9 +179,7 @@ public class IndexSearcher {
}
/** Runs searches for each segment separately, using the
* provided ExecutorService. IndexSearcher will not
* close/awaitTermination this ExecutorService on
* close; you must do so, eventually, on your own. NOTE:
* provided Executor. NOTE:
* if you are using {@link NIOFSDirectory}, do not use
* the shutdownNow method of ExecutorService as this uses
* Thread.interrupt under-the-hood which can silently
@ -188,18 +187,16 @@ public class IndexSearcher {
* href="https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>).
*
* @lucene.experimental */
public IndexSearcher(IndexReader r, ExecutorService executor) {
public IndexSearcher(IndexReader r, Executor executor) {
this(r.getContext(), executor);
}
/**
* Creates a searcher searching the provided top-level {@link IndexReaderContext}.
* <p>
* Given a non-<code>null</code> {@link ExecutorService} this method runs
* searches for each segment separately, using the provided ExecutorService.
* IndexSearcher will not close/awaitTermination this ExecutorService on
* close; you must do so, eventually, on your own. NOTE: if you are using
* {@link NIOFSDirectory}, do not use the shutdownNow method of
* Given a non-<code>null</code> {@link Executor} this method runs
* searches for each segment separately, using the provided Executor.
* NOTE: if you are using {@link NIOFSDirectory}, do not use the shutdownNow method of
* ExecutorService as this uses Thread.interrupt under-the-hood which can
* silently close file descriptors (see <a
* href="https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>).
@ -208,7 +205,7 @@ public class IndexSearcher {
* @see IndexReader#getContext()
* @lucene.experimental
*/
public IndexSearcher(IndexReaderContext context, ExecutorService executor) {
public IndexSearcher(IndexReaderContext context, Executor executor) {
assert context.isTopLevel: "IndexSearcher's ReaderContext must be topLevel for reader" + context.reader();
reader = context.reader();
this.executor = executor;
@ -420,7 +417,7 @@ public class IndexSearcher {
return search(query, collectorManager);
}
/** Returns the leaf slices used for concurrent searching, or null if no {@code ExecutorService} was
/** Returns the leaf slices used for concurrent searching, or null if no {@code Executor} was
* passed to the constructor.
*
* @lucene.experimental */
@ -607,7 +604,7 @@ public class IndexSearcher {
* Lower-level search API.
* Search all leaves using the given {@link CollectorManager}. In contrast
* to {@link #search(Query, Collector)}, this method will use the searcher's
* {@link ExecutorService} in order to parallelize execution of the collection
* {@link Executor} in order to parallelize execution of the collection
* on the configured {@link #leafSlices}.
* @see CollectorManager
* @lucene.experimental
@ -639,10 +636,12 @@ public class IndexSearcher {
for (int i = 0; i < leafSlices.length - 1; ++i) {
final LeafReaderContext[] leaves = leafSlices[i].leaves;
final C collector = collectors.get(i);
topDocsFutures.add(executor.submit(() -> {
FutureTask<C> task = new FutureTask<>(() -> {
search(Arrays.asList(leaves), weight, collector);
return collector;
}));
});
executor.execute(task);
topDocsFutures.add(task);
}
final LeafReaderContext[] leaves = leafSlices[leafSlices.length - 1].leaves;
final C collector = collectors.get(leafSlices.length - 1);

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.concurrent.ExecutorService; // javadocs
import java.util.concurrent.Executor; // javadocs
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; // javadocs
@ -39,7 +39,7 @@ import org.apache.lucene.search.similarities.Similarity; // javadocs
* You can pass your own factory instead if you want custom behavior, such as:
* <ul>
* <li>Setting a custom scoring model: {@link IndexSearcher#setSimilarity(Similarity)}
* <li>Parallel per-segment search: {@link IndexSearcher#IndexSearcher(IndexReader, ExecutorService)}
* <li>Parallel per-segment search: {@link IndexSearcher#IndexSearcher(IndexReader, Executor)}
* <li>Return custom subclasses of IndexSearcher (for example that implement distributed scoring)
* <li>Run queries to warm your IndexSearcher before it is used. Note: when using near-realtime search
* you may want to also {@link IndexWriterConfig#setMergedSegmentWarmer(IndexWriter.IndexReaderWarmer)} to warm

View File

@ -18,11 +18,14 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
@ -30,6 +33,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -237,4 +241,28 @@ public class TestIndexSearcher extends LuceneTestCase {
service.shutdown();
IOUtils.close(r, dir);
}
public void testOneSegmentExecutesOnTheCallerThread() throws IOException {
List<LeafReaderContext> leaves = reader.leaves();
AtomicInteger numExecutions = new AtomicInteger(0);
IndexSearcher searcher = new IndexSearcher(reader, task -> {
numExecutions.incrementAndGet();
task.run();
}) {
@Override
protected LeafSlice[] slices(List<LeafReaderContext> leaves) {
ArrayList<LeafSlice> slices = new ArrayList<>();
for (LeafReaderContext ctx : leaves) {
slices.add(new LeafSlice(Arrays.asList(ctx)));
}
return slices.toArray(new LeafSlice[0]);
}
};
searcher.search(new MatchAllDocsQuery(), 10);
if (leaves.size() <= 1) {
assertEquals(0, numExecutions.get());
} else {
assertEquals(leaves.size() - 1, numExecutions.get());
}
}
}

View File

@ -18,7 +18,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executor;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
@ -30,9 +30,9 @@ import org.apache.lucene.util.Bits;
public class ScorerIndexSearcher extends IndexSearcher {
/** Creates a searcher searching the provided index. Search on individual
* segments will be run in the provided {@link ExecutorService}.
* @see IndexSearcher#IndexSearcher(IndexReader, ExecutorService) */
public ScorerIndexSearcher(IndexReader r, ExecutorService executor) {
* segments will be run in the provided {@link Executor}.
* @see IndexSearcher#IndexSearcher(IndexReader, Executor) */
public ScorerIndexSearcher(IndexReader r, Executor executor) {
super(r, executor);
}