mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 16:29:30 +00:00
Remove CollectorOwner class (#13702)
This commit is contained in:
parent
10420c6c47
commit
c21bc5405b
@ -286,10 +286,7 @@ API Changes
|
||||
* GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and
|
||||
MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar)
|
||||
|
||||
* GITHUB#13568: Add CollectorOwner class that wraps CollectorManager, and handles list of Collectors and results.
|
||||
Add IndexSearcher#search method that takes CollectorOwner. (Egor Potemkin)
|
||||
|
||||
* GITHUB#13568: Add DrillSideways#search method that supports any collector types for any drill-sideways dimensions
|
||||
* GITHUB#13568: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
|
||||
or drill-down. (Egor Potemkin)
|
||||
|
||||
New Features
|
||||
|
@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class wraps {@link CollectorManager} and owns the collectors the manager creates. It is
|
||||
* convenient that clients of the class don't have to worry about keeping the list of collectors, as
|
||||
* well as about making the collector's type (C) compatible when reduce is called. Instances of this
|
||||
* class cache results of {@link CollectorManager#reduce(Collection)}.
|
||||
*
|
||||
* <p>Note that instance of this class ignores any {@link Collector} created by {@link
|
||||
* CollectorManager#newCollector()} directly, not through {@link #newCollector()}
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class CollectorOwner<C extends Collector, T> {
|
||||
|
||||
private final CollectorManager<C, T> manager;
|
||||
|
||||
private T result;
|
||||
private boolean reduced;
|
||||
|
||||
// TODO: For IndexSearcher, the list doesn't have to be synchronized
|
||||
// because we create new collectors sequentially. Drill sideways creates new collectors in
|
||||
// DrillSidewaysQuery#Weight#bulkScorer which is already called concurrently.
|
||||
// I think making the list synchronized here is not a huge concern, at the same time, do we want
|
||||
// to do something about it?
|
||||
// e.g. have boolean property in constructor that makes it threads friendly when set?
|
||||
private final List<C> collectors = Collections.synchronizedList(new ArrayList<>());
|
||||
|
||||
public CollectorOwner(CollectorManager<C, T> manager) {
|
||||
this.manager = manager;
|
||||
}
|
||||
|
||||
/** Return a new {@link Collector}. This must return a different instance on each call. */
|
||||
public C newCollector() throws IOException {
|
||||
C collector = manager.newCollector();
|
||||
collectors.add(collector);
|
||||
return collector;
|
||||
}
|
||||
|
||||
public C getCollector(int i) {
|
||||
return collectors.get(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns result of {@link CollectorManager#reduce(Collection)}. The result is cached.
|
||||
*
|
||||
* <p>This method is NOT threadsafe.
|
||||
*/
|
||||
public T getResult() throws IOException {
|
||||
if (reduced == false) {
|
||||
result = manager.reduce(collectors);
|
||||
reduced = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -671,53 +671,6 @@ public class IndexSearcher {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lower-level search API. Search all leaves using the given {@link CollectorOwner}, without
|
||||
* calling {@link CollectorOwner#getResult()} so that clients can reduce and read results
|
||||
* themselves.
|
||||
*
|
||||
* <p>Note that this method doesn't return anything - users can access results by calling {@link
|
||||
* CollectorOwner#getResult()}
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public <C extends Collector> void search(Query query, CollectorOwner<C, ?> collectorOwner)
|
||||
throws IOException {
|
||||
final C firstCollector = collectorOwner.newCollector();
|
||||
query = rewrite(query, firstCollector.scoreMode().needsScores());
|
||||
final Weight weight = createWeight(query, firstCollector.scoreMode(), 1);
|
||||
search(weight, collectorOwner, firstCollector);
|
||||
}
|
||||
|
||||
private <C extends Collector> void search(
|
||||
Weight weight, CollectorOwner<C, ?> collectorOwner, C firstCollector) throws IOException {
|
||||
final LeafSlice[] leafSlices = getSlices();
|
||||
if (leafSlices.length == 0) {
|
||||
// there are no segments, nothing to offload to the executor
|
||||
assert leafContexts.isEmpty();
|
||||
} else {
|
||||
final ScoreMode scoreMode = firstCollector.scoreMode();
|
||||
for (int i = 1; i < leafSlices.length; ++i) {
|
||||
final C collector = collectorOwner.newCollector();
|
||||
if (scoreMode != collector.scoreMode()) {
|
||||
throw new IllegalStateException(
|
||||
"CollectorManager does not always produce collectors with the same score mode");
|
||||
}
|
||||
}
|
||||
final List<Callable<C>> listTasks = new ArrayList<>(leafSlices.length);
|
||||
for (int i = 0; i < leafSlices.length; ++i) {
|
||||
final LeafReaderContext[] leaves = leafSlices[i].leaves;
|
||||
final C collector = collectorOwner.getCollector(i);
|
||||
listTasks.add(
|
||||
() -> {
|
||||
search(Arrays.asList(leaves), weight, collector);
|
||||
return collector;
|
||||
});
|
||||
}
|
||||
taskExecutor.invokeAll(listTasks);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lower-level search API.
|
||||
*
|
||||
|
@ -21,7 +21,6 @@ import static org.apache.lucene.sandbox.facet.ComparableUtils.byAggregatedValue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
@ -58,7 +57,7 @@ import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
|
||||
import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder;
|
||||
import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder;
|
||||
import org.apache.lucene.sandbox.facet.recorders.Reducer;
|
||||
import org.apache.lucene.search.CollectorOwner;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
@ -564,17 +563,13 @@ public class SandboxFacetsExample {
|
||||
// FacetFieldCollectorManager anyway, and leaf cutter are not merged or anything like that.
|
||||
FacetFieldCollectorManager<CountFacetRecorder> publishDayDimensionCollectorManager =
|
||||
new FacetFieldCollectorManager<>(defaultTaxoCutter, publishDayDimensionRecorder);
|
||||
List<CollectorOwner<FacetFieldCollector, CountFacetRecorder>> drillSidewaysOwners =
|
||||
List.of(new CollectorOwner<>(publishDayDimensionCollectorManager));
|
||||
List<CollectorManager<FacetFieldCollector, CountFacetRecorder>> drillSidewaysManagers =
|
||||
List.of(publishDayDimensionCollectorManager);
|
||||
|
||||
//// (3) search
|
||||
// Right now we return the same Recorder we created - so we can ignore results
|
||||
DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
|
||||
// We must wrap list of drill sideways owner with unmodifiableList to make generics work.
|
||||
ds.search(
|
||||
q,
|
||||
new CollectorOwner<>(drillDownCollectorManager),
|
||||
Collections.unmodifiableList(drillSidewaysOwners));
|
||||
ds.search(q, drillDownCollectorManager, drillSidewaysManagers);
|
||||
|
||||
//// (4) Get top 10 results by count for Author
|
||||
List<FacetResult> facetResults = new ArrayList<>(2);
|
||||
|
@ -18,7 +18,6 @@ package org.apache.lucene.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -31,8 +30,8 @@ import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
|
||||
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.CollectorOwner;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
@ -302,25 +301,13 @@ public class DrillSideways {
|
||||
}
|
||||
}
|
||||
|
||||
private static class CallableCollector implements Callable<Void> {
|
||||
private final IndexSearcher searcher;
|
||||
private final Query query;
|
||||
private final CollectorOwner<?, ?> collectorOwner;
|
||||
|
||||
private CallableCollector(
|
||||
IndexSearcher searcher, Query query, CollectorOwner<?, ?> collectorOwner) {
|
||||
this.searcher = searcher;
|
||||
this.query = query;
|
||||
this.collectorOwner = collectorOwner;
|
||||
}
|
||||
private record CallableCollector<R>(
|
||||
IndexSearcher searcher, Query query, CollectorManager<?, R> collectorManager)
|
||||
implements Callable<R> {
|
||||
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
searcher.search(query, collectorOwner);
|
||||
// Call getResult to trigger reduce, we don't need to return results because users can access
|
||||
// them directly from collectorOwner
|
||||
collectorOwner.getResult();
|
||||
return null;
|
||||
public R call() throws Exception {
|
||||
return searcher.search(query, collectorManager);
|
||||
}
|
||||
}
|
||||
|
||||
@ -344,31 +331,30 @@ public class DrillSideways {
|
||||
// Main query
|
||||
FacetsCollectorManager drillDownFacetsCollectorManager =
|
||||
createDrillDownFacetsCollectorManager();
|
||||
final CollectorOwner<?, ?> mainCollectorOwner;
|
||||
final CollectorManager<?, ?> mainCollectorManager;
|
||||
if (drillDownFacetsCollectorManager != null) {
|
||||
// Make sure we populate a facet collector corresponding to the base query if desired:
|
||||
mainCollectorOwner =
|
||||
new CollectorOwner<>(
|
||||
new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager));
|
||||
mainCollectorManager =
|
||||
new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager);
|
||||
} else {
|
||||
mainCollectorOwner = new CollectorOwner<>(hitCollectorManager);
|
||||
mainCollectorManager = hitCollectorManager;
|
||||
}
|
||||
// Drill sideways dimensions
|
||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners;
|
||||
final List<CollectorManager<FacetsCollector, FacetsCollector>> drillSidewaysCollectorManagers;
|
||||
if (query.getDims().isEmpty() == false) {
|
||||
drillSidewaysCollectorOwners = new ArrayList<>(query.getDims().size());
|
||||
drillSidewaysCollectorManagers = new ArrayList<>(query.getDims().size());
|
||||
for (int i = 0; i < query.getDims().size(); i++) {
|
||||
drillSidewaysCollectorOwners.add(
|
||||
new CollectorOwner<>(createDrillSidewaysFacetsCollectorManager()));
|
||||
drillSidewaysCollectorManagers.add(createDrillSidewaysFacetsCollectorManager());
|
||||
}
|
||||
} else {
|
||||
drillSidewaysCollectorOwners = null;
|
||||
drillSidewaysCollectorManagers = null;
|
||||
}
|
||||
// Execute query
|
||||
final Result<?, FacetsCollector> result;
|
||||
if (executor != null) {
|
||||
searchConcurrently(query, mainCollectorOwner, drillSidewaysCollectorOwners);
|
||||
result = searchConcurrently(query, mainCollectorManager, drillSidewaysCollectorManagers);
|
||||
} else {
|
||||
searchSequentially(query, mainCollectorOwner, drillSidewaysCollectorOwners);
|
||||
result = searchSequentially(query, mainCollectorManager, drillSidewaysCollectorManagers);
|
||||
}
|
||||
|
||||
// Collect results
|
||||
@ -377,12 +363,12 @@ public class DrillSideways {
|
||||
if (drillDownFacetsCollectorManager != null) {
|
||||
// drill down collected using MultiCollector
|
||||
// Extract the results:
|
||||
Object[] drillDownResult = (Object[]) mainCollectorOwner.getResult();
|
||||
Object[] drillDownResult = (Object[]) result.drillDownResult;
|
||||
facetsCollectorResult = (FacetsCollector) drillDownResult[0];
|
||||
hitCollectorResult = (R) drillDownResult[1];
|
||||
} else {
|
||||
facetsCollectorResult = null;
|
||||
hitCollectorResult = (R) mainCollectorOwner.getResult();
|
||||
hitCollectorResult = (R) result.drillDownResult;
|
||||
}
|
||||
|
||||
// Getting results for drill sideways dimensions (if any)
|
||||
@ -391,12 +377,11 @@ public class DrillSideways {
|
||||
if (query.getDims().isEmpty() == false) {
|
||||
drillSidewaysDims = query.getDims().keySet().toArray(new String[0]);
|
||||
int numDims = query.getDims().size();
|
||||
assert drillSidewaysCollectorOwners != null;
|
||||
assert drillSidewaysCollectorOwners.size() == numDims;
|
||||
assert drillSidewaysCollectorManagers != null;
|
||||
assert drillSidewaysCollectorManagers.size() == numDims;
|
||||
drillSidewaysCollectors = new FacetsCollector[numDims];
|
||||
for (int dim = 0; dim < numDims; dim++) {
|
||||
drillSidewaysCollectors[dim] =
|
||||
(FacetsCollector) drillSidewaysCollectorOwners.get(dim).getResult();
|
||||
drillSidewaysCollectors[dim] = result.drillSidewaysResults.get(dim);
|
||||
}
|
||||
} else {
|
||||
drillSidewaysDims = null;
|
||||
@ -414,52 +399,51 @@ public class DrillSideways {
|
||||
|
||||
/**
|
||||
* Search using DrillDownQuery with custom collectors. This method can be used with any {@link
|
||||
* CollectorOwner}s. It doesn't return anything because it is expected that you read results from
|
||||
* provided {@link CollectorOwner}s.
|
||||
* CollectorManager}s.
|
||||
*
|
||||
* <p>To read the results, run {@link CollectorOwner#getResult()} for drill down and all drill
|
||||
* sideways dimensions.
|
||||
*
|
||||
* <p>Note: use {@link Collections#unmodifiableList(List)} to wrap {@code
|
||||
* drillSidewaysCollectorOwners} to convince compiler that it is safe to use List here.
|
||||
*
|
||||
* <p>Use {@link MultiCollectorManager} wrapped by {@link CollectorOwner} to collect both hits and
|
||||
* facets for the entire query and/or for drill-sideways dimensions.
|
||||
*
|
||||
* <p>TODO: Class CollectorOwner was created so that we can ignore CollectorManager type C,
|
||||
* because we want each dimensions to be able to use their own types. Alternatively, we can use
|
||||
* typesafe heterogeneous container and provide CollectorManager type for each dimension to this
|
||||
* method? I do like CollectorOwner approach as it seems more intuitive?
|
||||
* <p>Note: Use {@link MultiCollectorManager} to collect both hits and facets for the entire query
|
||||
* and/or for drill-sideways dimensions. You can also use it to wrap different types of {@link
|
||||
* CollectorManager} for drill-sideways dimensions.
|
||||
*/
|
||||
public void search(
|
||||
final DrillDownQuery query,
|
||||
CollectorOwner<?, ?> drillDownCollectorOwner,
|
||||
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
|
||||
public <C extends Collector, T, K extends Collector, R> Result<T, R> search(
|
||||
DrillDownQuery query,
|
||||
CollectorManager<C, T> drillDownCollectorManager,
|
||||
List<CollectorManager<K, R>> drillSidewaysCollectorManagers)
|
||||
throws IOException {
|
||||
if (drillDownCollectorOwner == null) {
|
||||
if (drillDownCollectorManager == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"This search method requires client to provide drill down collector manager");
|
||||
}
|
||||
if (drillSidewaysCollectorOwners == null) {
|
||||
if (drillSidewaysCollectorManagers == null) {
|
||||
if (query.getDims().isEmpty() == false) {
|
||||
throw new IllegalArgumentException(
|
||||
"The query requires not null drillSidewaysCollectorOwners");
|
||||
"The query requires not null drillSidewaysCollectorManagers");
|
||||
}
|
||||
} else if (drillSidewaysCollectorOwners.size() != query.getDims().size()) {
|
||||
} else if (drillSidewaysCollectorManagers.size() != query.getDims().size()) {
|
||||
throw new IllegalArgumentException(
|
||||
"drillSidewaysCollectorOwners size must be equal to number of dimensions in the query.");
|
||||
"drillSidewaysCollectorManagers size must be equal to number of dimensions in the query.");
|
||||
}
|
||||
if (executor != null) {
|
||||
searchConcurrently(query, drillDownCollectorOwner, drillSidewaysCollectorOwners);
|
||||
return searchConcurrently(query, drillDownCollectorManager, drillSidewaysCollectorManagers);
|
||||
} else {
|
||||
searchSequentially(query, drillDownCollectorOwner, drillSidewaysCollectorOwners);
|
||||
return searchSequentially(query, drillDownCollectorManager, drillSidewaysCollectorManagers);
|
||||
}
|
||||
}
|
||||
|
||||
private void searchSequentially(
|
||||
/**
|
||||
* {@link #search(DrillDownQuery, CollectorManager, List)} result. It doesn't depend on {@link
|
||||
* Facets} to allow users to use any type of {@link CollectorManager} for drill-down or
|
||||
* drill-sideways dimension.
|
||||
*
|
||||
* @param drillDownResult result from drill down (main) {@link CollectorManager}
|
||||
* @param drillSidewaysResults results from drill sideways {@link CollectorManager}s
|
||||
*/
|
||||
public record Result<T, R>(T drillDownResult, List<R> drillSidewaysResults) {}
|
||||
|
||||
private <C extends Collector, T, K extends Collector, R> Result<T, R> searchSequentially(
|
||||
final DrillDownQuery query,
|
||||
final CollectorOwner<?, ?> drillDownCollectorOwner,
|
||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
|
||||
final CollectorManager<C, T> drillDownCollectorManager,
|
||||
final List<CollectorManager<K, R>> drillSidewaysCollectorManagers)
|
||||
throws IOException {
|
||||
|
||||
Map<String, Integer> drillDownDims = query.getDims();
|
||||
@ -467,9 +451,7 @@ public class DrillSideways {
|
||||
if (drillDownDims.isEmpty()) {
|
||||
// There are no drill-down dims, so there is no
|
||||
// drill-sideways to compute:
|
||||
searcher.search(query, drillDownCollectorOwner);
|
||||
drillDownCollectorOwner.getResult();
|
||||
return;
|
||||
return new Result<>(searcher.search(query, drillDownCollectorManager), null);
|
||||
}
|
||||
|
||||
Query baseQuery = query.getBaseQuery();
|
||||
@ -480,59 +462,60 @@ public class DrillSideways {
|
||||
}
|
||||
Query[] drillDownQueries = query.getDrillDownQueries();
|
||||
|
||||
DrillSidewaysQuery dsq =
|
||||
new DrillSidewaysQuery(
|
||||
baseQuery,
|
||||
// drillDownCollectorOwner,
|
||||
// Don't pass drill down collector because drill down is collected by IndexSearcher
|
||||
// itself.
|
||||
// TODO: deprecate drillDown collection in DrillSidewaysQuery?
|
||||
null,
|
||||
drillSidewaysCollectorOwners,
|
||||
drillDownQueries,
|
||||
scoreSubDocsAtOnce());
|
||||
DrillSidewaysQuery<K, R> dsq =
|
||||
new DrillSidewaysQuery<>(
|
||||
baseQuery, drillSidewaysCollectorManagers, drillDownQueries, scoreSubDocsAtOnce());
|
||||
|
||||
searcher.search(dsq, drillDownCollectorOwner);
|
||||
// This method doesn't return results as each dimension might have its own result type.
|
||||
// But we call getResult to trigger results reducing, so that users don't have to worry about
|
||||
// it.
|
||||
drillDownCollectorOwner.getResult();
|
||||
if (drillSidewaysCollectorOwners != null) {
|
||||
for (CollectorOwner<?, ?> sidewaysOwner : drillSidewaysCollectorOwners) {
|
||||
sidewaysOwner.getResult();
|
||||
T collectorResult = searcher.search(dsq, drillDownCollectorManager);
|
||||
List<R> drillSidewaysResults = new ArrayList<>(drillDownDims.size());
|
||||
assert drillSidewaysCollectorManagers != null
|
||||
: "Case without drill sideways dimensions is handled above";
|
||||
int numSlices = dsq.managedDrillSidewaysCollectors.size();
|
||||
for (int dim = 0; dim < drillDownDims.size(); dim++) {
|
||||
List<K> collectorsForDim = new ArrayList<>(numSlices);
|
||||
for (int slice = 0; slice < numSlices; slice++) {
|
||||
collectorsForDim.add(dsq.managedDrillSidewaysCollectors.get(slice).get(dim));
|
||||
}
|
||||
drillSidewaysResults.add(
|
||||
dim, drillSidewaysCollectorManagers.get(dim).reduce(collectorsForDim));
|
||||
}
|
||||
return new Result<>(collectorResult, drillSidewaysResults);
|
||||
}
|
||||
|
||||
private void searchConcurrently(
|
||||
private <C extends Collector, T, K extends Collector, R> Result<T, R> searchConcurrently(
|
||||
final DrillDownQuery query,
|
||||
final CollectorOwner<?, ?> drillDownCollectorOwner,
|
||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
|
||||
throws IOException {
|
||||
final CollectorManager<C, T> drillDownCollectorManager,
|
||||
final List<CollectorManager<K, R>> drillSidewaysCollectorManagers) {
|
||||
|
||||
final Map<String, Integer> drillDownDims = query.getDims();
|
||||
final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1);
|
||||
final CallableCollector<T> drillDownCallableCollector =
|
||||
new CallableCollector<>(searcher, query, drillDownCollectorManager);
|
||||
final List<CallableCollector<R>> drillSidewaysCallableCollectors =
|
||||
new ArrayList<>(drillDownDims.size());
|
||||
|
||||
callableCollectors.add(new CallableCollector(searcher, query, drillDownCollectorOwner));
|
||||
int i = 0;
|
||||
final Query[] filters = query.getDrillDownQueries();
|
||||
for (String dim : drillDownDims.keySet()) {
|
||||
callableCollectors.add(
|
||||
new CallableCollector(
|
||||
drillSidewaysCallableCollectors.add(
|
||||
new CallableCollector<>(
|
||||
searcher,
|
||||
getDrillDownQuery(query, filters, dim),
|
||||
drillSidewaysCollectorOwners.get(i)));
|
||||
drillSidewaysCollectorManagers.get(i)));
|
||||
i++;
|
||||
}
|
||||
|
||||
try {
|
||||
// Run the query pool
|
||||
final List<Future<Void>> futures = executor.invokeAll(callableCollectors);
|
||||
final Future<T> drillDownFuture = executor.submit(drillDownCallableCollector);
|
||||
final List<Future<R>> drillSidewaysFutures =
|
||||
executor.invokeAll(drillSidewaysCallableCollectors);
|
||||
|
||||
// Wait for results. We don't read the results as they are collected by CollectorOwners
|
||||
for (i = 0; i < futures.size(); i++) {
|
||||
futures.get(i).get();
|
||||
T collectorResult = drillDownFuture.get();
|
||||
List<R> drillSidewaysResults = new ArrayList<>(drillDownDims.size());
|
||||
|
||||
for (i = 0; i < drillSidewaysFutures.size(); i++) {
|
||||
drillSidewaysResults.add(i, drillSidewaysFutures.get(i).get());
|
||||
}
|
||||
return new Result<>(collectorResult, drillSidewaysResults);
|
||||
} catch (InterruptedException e) {
|
||||
throw new ThreadInterruptedException(e);
|
||||
} catch (ExecutionException e) {
|
||||
|
@ -17,19 +17,20 @@
|
||||
package org.apache.lucene.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BulkScorer;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorOwner;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
@ -41,12 +42,12 @@ import org.apache.lucene.search.Weight;
|
||||
|
||||
// TODO change the way DrillSidewaysScorer is used, this query does not work
|
||||
// with filter caching
|
||||
class DrillSidewaysQuery extends Query {
|
||||
class DrillSidewaysQuery<K extends Collector, R> extends Query {
|
||||
|
||||
final Query baseQuery;
|
||||
|
||||
final CollectorOwner<?, ?> drillDownCollectorOwner;
|
||||
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners;
|
||||
final List<CollectorManager<K, R>> drillSidewaysCollectorManagers;
|
||||
final List<List<K>> managedDrillSidewaysCollectors;
|
||||
|
||||
final Query[] drillDownQueries;
|
||||
|
||||
@ -58,15 +59,36 @@ class DrillSidewaysQuery extends Query {
|
||||
*/
|
||||
DrillSidewaysQuery(
|
||||
Query baseQuery,
|
||||
CollectorOwner<?, ?> drillDownCollectorOwner,
|
||||
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners,
|
||||
List<CollectorManager<K, R>> drillSidewaysCollectorManagers,
|
||||
Query[] drillDownQueries,
|
||||
boolean scoreSubDocsAtOnce) {
|
||||
// Note that the "managed" collector lists are synchronized here since bulkScorer()
|
||||
// can be invoked concurrently and needs to remain thread-safe. We're OK with synchronizing
|
||||
// on the whole list as contention is expected to remain very low:
|
||||
this(
|
||||
baseQuery,
|
||||
drillSidewaysCollectorManagers,
|
||||
Collections.synchronizedList(new ArrayList<>()),
|
||||
drillDownQueries,
|
||||
scoreSubDocsAtOnce);
|
||||
}
|
||||
|
||||
/**
|
||||
* Needed for {@link Query#rewrite(IndexSearcher)}. Ensures the same "managed" lists get used
|
||||
* since {@link DrillSideways} accesses references to these through the original {@code
|
||||
* DrillSidewaysQuery}.
|
||||
*/
|
||||
private DrillSidewaysQuery(
|
||||
Query baseQuery,
|
||||
List<CollectorManager<K, R>> drillSidewaysCollectorManagers,
|
||||
List<List<K>> managedDrillSidewaysCollectors,
|
||||
Query[] drillDownQueries,
|
||||
boolean scoreSubDocsAtOnce) {
|
||||
this.baseQuery = Objects.requireNonNull(baseQuery);
|
||||
this.drillDownCollectorOwner = drillDownCollectorOwner;
|
||||
this.drillSidewaysCollectorOwners = drillSidewaysCollectorOwners;
|
||||
this.drillSidewaysCollectorManagers = drillSidewaysCollectorManagers;
|
||||
this.drillDownQueries = drillDownQueries;
|
||||
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
||||
this.managedDrillSidewaysCollectors = managedDrillSidewaysCollectors;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -87,10 +109,10 @@ class DrillSidewaysQuery extends Query {
|
||||
if (newQuery == baseQuery) {
|
||||
return super.rewrite(indexSearcher);
|
||||
} else {
|
||||
return new DrillSidewaysQuery(
|
||||
return new DrillSidewaysQuery<>(
|
||||
newQuery,
|
||||
drillDownCollectorOwner,
|
||||
drillSidewaysCollectorOwners,
|
||||
drillSidewaysCollectorManagers,
|
||||
managedDrillSidewaysCollectors,
|
||||
drillDownQueries,
|
||||
scoreSubDocsAtOnce);
|
||||
}
|
||||
@ -124,14 +146,8 @@ class DrillSidewaysQuery extends Query {
|
||||
|
||||
int drillDownCount = drillDowns.length;
|
||||
|
||||
Collector drillDownCollector;
|
||||
final LeafCollector drillDownLeafCollector;
|
||||
if (drillDownCollectorOwner != null) {
|
||||
drillDownCollector = drillDownCollectorOwner.newCollector();
|
||||
drillDownLeafCollector = drillDownCollector.getLeafCollector(context);
|
||||
} else {
|
||||
drillDownLeafCollector = null;
|
||||
}
|
||||
List<K> sidewaysCollectors = new ArrayList<>(drillDownCount);
|
||||
managedDrillSidewaysCollectors.add(sidewaysCollectors);
|
||||
|
||||
DrillSidewaysScorer.DocsAndCost[] dims =
|
||||
new DrillSidewaysScorer.DocsAndCost[drillDownCount];
|
||||
@ -144,7 +160,8 @@ class DrillSidewaysQuery extends Query {
|
||||
scorer = new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
|
||||
}
|
||||
|
||||
Collector sidewaysCollector = drillSidewaysCollectorOwners.get(dim).newCollector();
|
||||
K sidewaysCollector = drillSidewaysCollectorManagers.get(dim).newCollector();
|
||||
sidewaysCollectors.add(dim, sidewaysCollector);
|
||||
|
||||
dims[dim] =
|
||||
new DrillSidewaysScorer.DocsAndCost(
|
||||
@ -155,9 +172,6 @@ class DrillSidewaysQuery extends Query {
|
||||
// a null scorer in this case, but we need to make sure #finish gets called on all facet
|
||||
// collectors since IndexSearcher won't handle this for us:
|
||||
if (baseScorerSupplier == null || nullCount > 1) {
|
||||
if (drillDownLeafCollector != null) {
|
||||
drillDownLeafCollector.finish();
|
||||
}
|
||||
for (DrillSidewaysScorer.DocsAndCost dim : dims) {
|
||||
dim.sidewaysLeafCollector.finish();
|
||||
}
|
||||
@ -177,11 +191,7 @@ class DrillSidewaysQuery extends Query {
|
||||
@Override
|
||||
public BulkScorer bulkScorer() throws IOException {
|
||||
return new DrillSidewaysScorer(
|
||||
context,
|
||||
baseScorerSupplier.get(Long.MAX_VALUE),
|
||||
drillDownLeafCollector,
|
||||
dims,
|
||||
scoreSubDocsAtOnce);
|
||||
context, baseScorerSupplier.get(Long.MAX_VALUE), dims, scoreSubDocsAtOnce);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -212,9 +222,8 @@ class DrillSidewaysQuery extends Query {
|
||||
final int prime = 31;
|
||||
int result = classHash();
|
||||
result = prime * result + Objects.hashCode(baseQuery);
|
||||
result = prime * result + Objects.hashCode(drillDownCollectorOwner);
|
||||
result = prime * result + Arrays.hashCode(drillDownQueries);
|
||||
result = prime * result + Objects.hashCode(drillSidewaysCollectorOwners);
|
||||
result = prime * result + Objects.hashCode(drillSidewaysCollectorManagers);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -223,10 +232,9 @@ class DrillSidewaysQuery extends Query {
|
||||
return sameClassAs(other) && equalsTo(getClass().cast(other));
|
||||
}
|
||||
|
||||
private boolean equalsTo(DrillSidewaysQuery other) {
|
||||
private boolean equalsTo(DrillSidewaysQuery<?, ?> other) {
|
||||
return Objects.equals(baseQuery, other.baseQuery)
|
||||
&& Objects.equals(drillDownCollectorOwner, other.drillDownCollectorOwner)
|
||||
&& Arrays.equals(drillDownQueries, other.drillDownQueries)
|
||||
&& Objects.equals(drillSidewaysCollectorOwners, other.drillSidewaysCollectorOwners);
|
||||
&& Objects.equals(drillSidewaysCollectorManagers, other.drillSidewaysCollectorManagers);
|
||||
}
|
||||
}
|
||||
|
@ -45,8 +45,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
|
||||
// private static boolean DEBUG = false;
|
||||
|
||||
private final LeafCollector drillDownLeafCollector;
|
||||
|
||||
private final DocsAndCost[] dims;
|
||||
|
||||
// DrillDown DocsEnums:
|
||||
@ -68,7 +66,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
DrillSidewaysScorer(
|
||||
LeafReaderContext context,
|
||||
Scorer baseScorer,
|
||||
LeafCollector drillDownLeafCollector,
|
||||
DocsAndCost[] dims,
|
||||
boolean scoreSubDocsAtOnce) {
|
||||
this.dims = dims;
|
||||
@ -81,7 +78,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
} else {
|
||||
this.baseApproximation = baseIterator;
|
||||
}
|
||||
this.drillDownLeafCollector = drillDownLeafCollector;
|
||||
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
|
||||
}
|
||||
|
||||
@ -709,9 +705,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
// }
|
||||
|
||||
collector.collect(collectDocID);
|
||||
if (drillDownLeafCollector != null) {
|
||||
drillDownLeafCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
// TODO: we could "fix" faceting of the sideways counts
|
||||
// to do this "union" (of the drill down hits) in the
|
||||
@ -725,9 +718,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
|
||||
private void collectHit(LeafCollector collector, DocsAndCost dim) throws IOException {
|
||||
collector.collect(collectDocID);
|
||||
if (drillDownLeafCollector != null) {
|
||||
drillDownLeafCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
// Tally sideways count:
|
||||
dim.sidewaysLeafCollector.collect(collectDocID);
|
||||
@ -735,9 +725,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
|
||||
private void collectHit(LeafCollector collector, List<DocsAndCost> dims) throws IOException {
|
||||
collector.collect(collectDocID);
|
||||
if (drillDownLeafCollector != null) {
|
||||
drillDownLeafCollector.collect(collectDocID);
|
||||
}
|
||||
|
||||
// Tally sideways counts:
|
||||
for (DocsAndCost dim : dims) {
|
||||
@ -756,9 +743,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
// Note: We _only_ call #finish on the facets collectors we're managing here, but not the
|
||||
// "main" collector. This is because IndexSearcher handles calling #finish on the main
|
||||
// collector.
|
||||
if (drillDownLeafCollector != null) {
|
||||
drillDownLeafCollector.finish();
|
||||
}
|
||||
for (DocsAndCost dim : dims) {
|
||||
dim.sidewaysLeafCollector.finish();
|
||||
}
|
||||
@ -766,9 +750,6 @@ class DrillSidewaysScorer extends BulkScorer {
|
||||
|
||||
private void setScorer(LeafCollector mainCollector, Scorable scorer) throws IOException {
|
||||
mainCollector.setScorer(scorer);
|
||||
if (drillDownLeafCollector != null) {
|
||||
drillDownLeafCollector.setScorer(scorer);
|
||||
}
|
||||
for (DocsAndCost dim : dims) {
|
||||
dim.sidewaysLeafCollector.setScorer(scorer);
|
||||
}
|
||||
|
@ -284,7 +284,6 @@ public class TestDrillSideways extends FacetTestCase {
|
||||
Weight dimWeight = searcher.createWeight(dimQ, ScoreMode.COMPLETE_NO_SCORES, 1f);
|
||||
Scorer dimScorer = dimWeight.scorer(ctx);
|
||||
|
||||
FacetsCollector baseFC = new FacetsCollector();
|
||||
FacetsCollector dimFC = new FacetsCollector();
|
||||
DrillSidewaysScorer.DocsAndCost docsAndCost =
|
||||
new DrillSidewaysScorer.DocsAndCost(dimScorer, dimFC.getLeafCollector(ctx));
|
||||
@ -311,7 +310,6 @@ public class TestDrillSideways extends FacetTestCase {
|
||||
new DrillSidewaysScorer(
|
||||
ctx,
|
||||
baseScorer,
|
||||
baseFC.getLeafCollector(ctx),
|
||||
new DrillSidewaysScorer.DocsAndCost[] {docsAndCost},
|
||||
scoreSubDocsAtOnce);
|
||||
expectThrows(CollectionTerminatedException.class, () -> scorer.score(baseCollector, null));
|
||||
@ -321,7 +319,6 @@ public class TestDrillSideways extends FacetTestCase {
|
||||
// both our base and sideways dim facets collectors. What we really want to test here is
|
||||
// that the matching docs are still correctly present and populated after an early
|
||||
// termination occurs (i.e., #finish is properly called in that scenario):
|
||||
assertEquals(1, baseFC.getMatchingDocs().size());
|
||||
assertEquals(1, dimFC.getMatchingDocs().size());
|
||||
}
|
||||
}
|
||||
|
@ -50,7 +50,6 @@ import org.apache.lucene.sandbox.facet.cutters.ranges.LongRangeFacetCutter;
|
||||
import org.apache.lucene.sandbox.facet.labels.OrdToLabel;
|
||||
import org.apache.lucene.sandbox.facet.labels.RangeOrdToLabel;
|
||||
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
|
||||
import org.apache.lucene.search.CollectorOwner;
|
||||
import org.apache.lucene.search.DoubleValues;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
@ -538,7 +537,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||
|
||||
////// First search, no drill-downs:
|
||||
DrillDownQuery ddq = new DrillDownQuery(config);
|
||||
ds.search(ddq, new CollectorOwner<>(collectorManager), List.of());
|
||||
ds.search(ddq, collectorManager, List.of());
|
||||
|
||||
// assertEquals(100, dsr.hits.totalHits.value);
|
||||
assertEquals(
|
||||
@ -556,10 +555,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
|
||||
ddq = new DrillDownQuery(config);
|
||||
ddq.add("dim", "b");
|
||||
ds.search(
|
||||
ddq,
|
||||
new CollectorOwner<>(fieldCollectorManager),
|
||||
List.of(new CollectorOwner<>(dimCollectorManager)));
|
||||
ds.search(ddq, fieldCollectorManager, List.of(dimCollectorManager));
|
||||
|
||||
// assertEquals(75, dsr.hits.totalHits.value);
|
||||
assertEquals(
|
||||
@ -577,10 +573,7 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||
dimCollectorManager = new FacetFieldCollectorManager<>(dimCutter, dimCountRecorder);
|
||||
ddq = new DrillDownQuery(config);
|
||||
ddq.add("field", LongPoint.newRangeQuery("field", 0L, 10L));
|
||||
ds.search(
|
||||
ddq,
|
||||
new CollectorOwner<>(dimCollectorManager),
|
||||
List.of(new CollectorOwner<>(fieldCollectorManager)));
|
||||
ds.search(ddq, dimCollectorManager, List.of(fieldCollectorManager));
|
||||
|
||||
// assertEquals(11, dsr.hits.totalHits.value);
|
||||
assertEquals(
|
||||
@ -1629,14 +1622,12 @@ public class TestRangeFacet extends SandboxFacetTestCase {
|
||||
|
||||
countRecorder = new CountFacetRecorder();
|
||||
|
||||
CollectorOwner<DummyTotalHitCountCollector, Integer> totalHitsCollectorOwner =
|
||||
new CollectorOwner<>(DummyTotalHitCountCollector.createManager());
|
||||
CollectorOwner<FacetFieldCollector, CountFacetRecorder> drillSidewaysCollectorOwner =
|
||||
new CollectorOwner<>(
|
||||
new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder));
|
||||
ds.search(ddq, totalHitsCollectorOwner, List.of(drillSidewaysCollectorOwner));
|
||||
assertEquals(1, totalHitsCollectorOwner.getResult().intValue());
|
||||
drillSidewaysCollectorOwner.getResult();
|
||||
DrillSideways.Result<Integer, CountFacetRecorder> result =
|
||||
ds.search(
|
||||
ddq,
|
||||
DummyTotalHitCountCollector.createManager(),
|
||||
List.of(new FacetFieldCollectorManager<>(doubleRangeFacetCutter, countRecorder)));
|
||||
assertEquals(1, result.drillDownResult().intValue());
|
||||
assertEquals(
|
||||
"dim=field path=[] value=-2147483648 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n",
|
||||
getAllSortByOrd(getRangeOrdinals(ranges), countRecorder, "field", ordToLabel).toString());
|
||||
|
Loading…
x
Reference in New Issue
Block a user