mirror of https://github.com/apache/lucene.git
This reverts commit 10bebde269
.
Based on a recent discussion in
https://github.com/apache/lucene/pull/12183#discussion_r1235739084 we
agreed it makes more sense to parallelize knn query vector rewrite
across leaves rather than leaf slices.
This commit is contained in:
parent
cb195bd96e
commit
7f10dca1e5
|
@ -19,12 +19,12 @@ package org.apache.lucene.search;
|
||||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.Executor;
|
||||||
import java.util.concurrent.FutureTask;
|
import java.util.concurrent.FutureTask;
|
||||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -81,12 +81,11 @@ abstract class AbstractKnnVectorQuery extends Query {
|
||||||
filterWeight = null;
|
filterWeight = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
SliceExecutor sliceExecutor = indexSearcher.getSliceExecutor();
|
Executor executor = indexSearcher.getExecutor();
|
||||||
// in case of parallel execution, the leaf results are not ordered by leaf context's ordinal
|
|
||||||
TopDocs[] perLeafResults =
|
TopDocs[] perLeafResults =
|
||||||
(sliceExecutor == null)
|
(executor == null)
|
||||||
? sequentialSearch(reader.leaves(), filterWeight)
|
? sequentialSearch(reader.leaves(), filterWeight)
|
||||||
: parallelSearch(indexSearcher.getSlices(), filterWeight, sliceExecutor);
|
: parallelSearch(reader.leaves(), filterWeight, executor);
|
||||||
|
|
||||||
// Merge sort the results
|
// Merge sort the results
|
||||||
TopDocs topK = TopDocs.merge(k, perLeafResults);
|
TopDocs topK = TopDocs.merge(k, perLeafResults);
|
||||||
|
@ -110,40 +109,27 @@ abstract class AbstractKnnVectorQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
private TopDocs[] parallelSearch(
|
private TopDocs[] parallelSearch(
|
||||||
IndexSearcher.LeafSlice[] slices, Weight filterWeight, SliceExecutor sliceExecutor) {
|
List<LeafReaderContext> leafReaderContexts, Weight filterWeight, Executor executor) {
|
||||||
|
List<FutureTask<TopDocs>> tasks =
|
||||||
List<FutureTask<TopDocs[]>> tasks = new ArrayList<>(slices.length);
|
leafReaderContexts.stream()
|
||||||
int segmentsCount = 0;
|
.map(ctx -> new FutureTask<>(() -> searchLeaf(ctx, filterWeight)))
|
||||||
for (IndexSearcher.LeafSlice slice : slices) {
|
.toList();
|
||||||
segmentsCount += slice.leaves.length;
|
|
||||||
tasks.add(
|
|
||||||
new FutureTask<>(
|
|
||||||
() -> {
|
|
||||||
TopDocs[] results = new TopDocs[slice.leaves.length];
|
|
||||||
int i = 0;
|
|
||||||
for (LeafReaderContext context : slice.leaves) {
|
|
||||||
results[i++] = searchLeaf(context, filterWeight);
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
SliceExecutor sliceExecutor = new SliceExecutor(executor);
|
||||||
sliceExecutor.invokeAll(tasks);
|
sliceExecutor.invokeAll(tasks);
|
||||||
|
|
||||||
TopDocs[] topDocs = new TopDocs[segmentsCount];
|
return tasks.stream()
|
||||||
int i = 0;
|
.map(
|
||||||
for (FutureTask<TopDocs[]> task : tasks) {
|
task -> {
|
||||||
try {
|
try {
|
||||||
for (TopDocs docs : task.get()) {
|
return task.get();
|
||||||
topDocs[i++] = docs;
|
} catch (ExecutionException e) {
|
||||||
}
|
throw new RuntimeException(e.getCause());
|
||||||
} catch (ExecutionException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new RuntimeException(e.getCause());
|
throw new ThreadInterruptedException(e);
|
||||||
} catch (InterruptedException e) {
|
}
|
||||||
throw new ThreadInterruptedException(e);
|
})
|
||||||
}
|
.toArray(TopDocs[]::new);
|
||||||
}
|
|
||||||
return topDocs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight) throws IOException {
|
private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight) throws IOException {
|
||||||
|
|
|
@ -962,10 +962,6 @@ public class IndexSearcher {
|
||||||
return executor;
|
return executor;
|
||||||
}
|
}
|
||||||
|
|
||||||
SliceExecutor getSliceExecutor() {
|
|
||||||
return sliceExecutor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Thrown when an attempt is made to add more than {@link #getMaxClauseCount()} clauses. This
|
* Thrown when an attempt is made to add more than {@link #getMaxClauseCount()} clauses. This
|
||||||
* typically happens if a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery is expanded to
|
* typically happens if a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery is expanded to
|
||||||
|
|
Loading…
Reference in New Issue