mirror of https://github.com/apache/lucene.git
This reverts commit 10bebde269
.
Based on a recent discussion in
https://github.com/apache/lucene/pull/12183#discussion_r1235739084 we
agreed it makes more sense to parallelize knn query vector rewrite
across leaves rather than leaf slices.
This commit is contained in:
parent
cb195bd96e
commit
7f10dca1e5
|
@ -19,12 +19,12 @@ package org.apache.lucene.search;
|
|||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.FutureTask;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -81,12 +81,11 @@ abstract class AbstractKnnVectorQuery extends Query {
|
|||
filterWeight = null;
|
||||
}
|
||||
|
||||
SliceExecutor sliceExecutor = indexSearcher.getSliceExecutor();
|
||||
// in case of parallel execution, the leaf results are not ordered by leaf context's ordinal
|
||||
Executor executor = indexSearcher.getExecutor();
|
||||
TopDocs[] perLeafResults =
|
||||
(sliceExecutor == null)
|
||||
(executor == null)
|
||||
? sequentialSearch(reader.leaves(), filterWeight)
|
||||
: parallelSearch(indexSearcher.getSlices(), filterWeight, sliceExecutor);
|
||||
: parallelSearch(reader.leaves(), filterWeight, executor);
|
||||
|
||||
// Merge sort the results
|
||||
TopDocs topK = TopDocs.merge(k, perLeafResults);
|
||||
|
@ -110,40 +109,27 @@ abstract class AbstractKnnVectorQuery extends Query {
|
|||
}
|
||||
|
||||
private TopDocs[] parallelSearch(
|
||||
IndexSearcher.LeafSlice[] slices, Weight filterWeight, SliceExecutor sliceExecutor) {
|
||||
|
||||
List<FutureTask<TopDocs[]>> tasks = new ArrayList<>(slices.length);
|
||||
int segmentsCount = 0;
|
||||
for (IndexSearcher.LeafSlice slice : slices) {
|
||||
segmentsCount += slice.leaves.length;
|
||||
tasks.add(
|
||||
new FutureTask<>(
|
||||
() -> {
|
||||
TopDocs[] results = new TopDocs[slice.leaves.length];
|
||||
int i = 0;
|
||||
for (LeafReaderContext context : slice.leaves) {
|
||||
results[i++] = searchLeaf(context, filterWeight);
|
||||
}
|
||||
return results;
|
||||
}));
|
||||
}
|
||||
List<LeafReaderContext> leafReaderContexts, Weight filterWeight, Executor executor) {
|
||||
List<FutureTask<TopDocs>> tasks =
|
||||
leafReaderContexts.stream()
|
||||
.map(ctx -> new FutureTask<>(() -> searchLeaf(ctx, filterWeight)))
|
||||
.toList();
|
||||
|
||||
SliceExecutor sliceExecutor = new SliceExecutor(executor);
|
||||
sliceExecutor.invokeAll(tasks);
|
||||
|
||||
TopDocs[] topDocs = new TopDocs[segmentsCount];
|
||||
int i = 0;
|
||||
for (FutureTask<TopDocs[]> task : tasks) {
|
||||
return tasks.stream()
|
||||
.map(
|
||||
task -> {
|
||||
try {
|
||||
for (TopDocs docs : task.get()) {
|
||||
topDocs[i++] = docs;
|
||||
}
|
||||
return task.get();
|
||||
} catch (ExecutionException e) {
|
||||
throw new RuntimeException(e.getCause());
|
||||
} catch (InterruptedException e) {
|
||||
throw new ThreadInterruptedException(e);
|
||||
}
|
||||
}
|
||||
return topDocs;
|
||||
})
|
||||
.toArray(TopDocs[]::new);
|
||||
}
|
||||
|
||||
private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight) throws IOException {
|
||||
|
|
|
@ -962,10 +962,6 @@ public class IndexSearcher {
|
|||
return executor;
|
||||
}
|
||||
|
||||
SliceExecutor getSliceExecutor() {
|
||||
return sliceExecutor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Thrown when an attempt is made to add more than {@link #getMaxClauseCount()} clauses. This
|
||||
* typically happens if a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery is expanded to
|
||||
|
|
Loading…
Reference in New Issue