mirror of https://github.com/apache/lucene.git
LUCENE-10585: Scrub copy/paste code in the facets module and attempt to simplify a bit (#915)
This commit is contained in:
parent
3a80968ddf
commit
8db1e41fc0
|
@ -75,7 +75,9 @@ New Features
|
|||
|
||||
Improvements
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* LUCENE-10585: Facet module code cleanup (copy/paste scrubbing, simplification and some very minor
|
||||
optimization tweaks). (Greg Miller)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
|
|
@ -51,9 +51,13 @@ public abstract class Facets {
|
|||
|
||||
/**
|
||||
* Returns labels for topN dimensions and their topNChildren sorted by the number of
|
||||
* hits/aggregated values that dimension matched; Results should be the same as calling getAllDims
|
||||
* and then only using the first topNDims; Sub-classes may want to override this implementation
|
||||
* with a more efficient one if they are able.
|
||||
* hits/aggregated values that dimension matched. Results should be the same as calling getAllDims
|
||||
* and then only using the first topNDims. Note that dims should be configured as requiring dim
|
||||
* counts if using this functionality to ensure accurate counts are available (see: {@link
|
||||
* FacetsConfig#setRequireDimCount(String, boolean)}).
|
||||
*
|
||||
* <p>Sub-classes may want to override this implementation with a more efficient one if they are
|
||||
* able.
|
||||
*/
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
List<FacetResult> allResults = getAllDims(topNChildren);
|
||||
|
|
|
@ -0,0 +1,349 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.sortedset;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PrimitiveIterator;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.FacetsConfig.DimConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.TopOrdAndIntQueue;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.DimTree;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Base class for SSDV faceting implementations. */
|
||||
abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||
|
||||
private static final Comparator<FacetResult> FACET_RESULT_COMPARATOR =
|
||||
new Comparator<>() {
|
||||
@Override
|
||||
public int compare(FacetResult a, FacetResult b) {
|
||||
if (a.value.intValue() > b.value.intValue()) {
|
||||
return -1;
|
||||
} else if (b.value.intValue() > a.value.intValue()) {
|
||||
return 1;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
final SortedSetDocValuesReaderState state;
|
||||
final FacetsConfig stateConfig;
|
||||
final SortedSetDocValues dv;
|
||||
final String field;
|
||||
|
||||
AbstractSortedSetDocValueFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
|
||||
this.state = state;
|
||||
this.field = state.getField();
|
||||
this.stateConfig = state.getFacetsConfig();
|
||||
this.dv = state.getDocValues();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim, path);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
if (path.length != 1) {
|
||||
throw new IllegalArgumentException("path must be length=1");
|
||||
}
|
||||
int ord = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return getCount(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
validateTopN(topN);
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
for (String dim : state.getDims()) {
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(topN, dim);
|
||||
FacetResult facetResult = createFacetResult(topChildrenForPath, dim);
|
||||
if (facetResult != null) {
|
||||
results.add(facetResult);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by highest count:
|
||||
results.sort(FACET_RESULT_COMPARATOR);
|
||||
return results;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
validateTopN(topNDims);
|
||||
validateTopN(topNChildren);
|
||||
|
||||
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValue> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValue a, DimValue b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
return true;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim) > 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Keep track of intermediate results, if we compute them, so we can reuse them later:
|
||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
for (String dim : state.getDims()) {
|
||||
DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
int dimCount;
|
||||
if (dimConfig.hierarchical) {
|
||||
// For hierarchical dims, we directly index each level of the ancestry path (i.e., we
|
||||
// "rollup" at indexing time), meaning we can directly access accurate dim counts without
|
||||
// needing to rollup the descendant paths:
|
||||
int dimOrd = state.getDimTree(dim).dimStartOrd;
|
||||
dimCount = getCount(dimOrd);
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
// If a dim is configured as multi-valued and requires dim count, we index dim counts
|
||||
// directly, so we can access accurate counts without needing to rollup children:
|
||||
dimCount = getCount(dimOrd);
|
||||
} else {
|
||||
// If a dim is configured as multi-valued but _not_ requiring dim count, we have no
|
||||
// way to get accurate counts. We use -1 to indicate this:
|
||||
dimCount = -1;
|
||||
}
|
||||
} else {
|
||||
// If a dim is single-valued, we must aggregate child counts to get accurate dim counts.
|
||||
// We don't index the dim counts directly:
|
||||
// TODO: If getTopDims becomes a common use-case, we could consider always indexing dim
|
||||
// counts to optimize this path.
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
TopChildrenForPath topChildrenForPath =
|
||||
computeTopChildren(childIt, topNChildren, dimConfig, dimOrd);
|
||||
if (intermediateResults == null) {
|
||||
intermediateResults = new HashMap<>();
|
||||
}
|
||||
intermediateResults.put(dim, topChildrenForPath);
|
||||
dimCount = topChildrenForPath.pathCount;
|
||||
}
|
||||
}
|
||||
|
||||
if (dimCount != 0) {
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValue(dim, dimCount));
|
||||
} else {
|
||||
if (dimCount > pq.top().value
|
||||
|| (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValue bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimCount;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int resultSize = pq.size();
|
||||
FacetResult[] results = new FacetResult[resultSize];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValue dimValue = pq.pop();
|
||||
assert dimValue != null;
|
||||
TopChildrenForPath topChildrenForPath = null;
|
||||
if (intermediateResults != null) {
|
||||
topChildrenForPath = intermediateResults.get(dimValue.dim);
|
||||
}
|
||||
if (topChildrenForPath == null) {
|
||||
topChildrenForPath = getTopChildrenForPath(topNChildren, dimValue.dim);
|
||||
}
|
||||
FacetResult facetResult = createFacetResult(topChildrenForPath, dimValue.dim);
|
||||
// should not be null since only dims with non-zero values were considered earlier
|
||||
assert facetResult != null;
|
||||
resultSize--;
|
||||
results[resultSize] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/** Retrieve the count for a specified ordinal. */
|
||||
abstract int getCount(int ord);
|
||||
|
||||
/**
|
||||
* Compute the top-n children for the given path and iterator of all immediate children of the
|
||||
* path. This returns an intermediate result that does the minimal required work, avoiding the
|
||||
* cost of looking up string labels, etc.
|
||||
*/
|
||||
TopChildrenForPath computeTopChildren(
|
||||
PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
|
||||
TopOrdAndIntQueue q = null;
|
||||
int bottomCount = 0;
|
||||
int pathCount = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
while (childOrds.hasNext()) {
|
||||
int ord = childOrds.next();
|
||||
int count = getCount(ord);
|
||||
if (count > 0) {
|
||||
pathCount += count;
|
||||
childCount++;
|
||||
if (count > bottomCount) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = count;
|
||||
if (q == null) {
|
||||
// Lazy init, so we don't create this for the
|
||||
// sparse case unnecessarily
|
||||
q = new TopOrdAndIntQueue(topN);
|
||||
}
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dimConfig.hierarchical) {
|
||||
pathCount = getCount(pathOrd);
|
||||
} else {
|
||||
// see if pathCount is actually reliable or needs to be reset
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
pathCount = getCount(pathOrd);
|
||||
} else {
|
||||
pathCount = -1; // pathCount is inaccurate at this point, so set it to -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new TopChildrenForPath(pathCount, childCount, q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
||||
* form.
|
||||
*/
|
||||
TopChildrenForPath getTopChildrenForPath(int topN, String dim, String... path)
|
||||
throws IOException {
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
|
||||
// Determine the path ord and resolve an iterator to its immediate children. The logic for this
|
||||
// depends on whether-or-not the dimension is configured as hierarchical:
|
||||
final int pathOrd;
|
||||
final PrimitiveIterator.OfInt childIterator;
|
||||
if (dimConfig.hierarchical) {
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
if (path.length > 0) {
|
||||
pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
} else {
|
||||
// If there's no path, this is a little more efficient to just look up the dim:
|
||||
pathOrd = dimTree.dimStartOrd;
|
||||
}
|
||||
if (pathOrd < 0) {
|
||||
// path was never indexed
|
||||
return null;
|
||||
}
|
||||
childIterator = dimTree.iterator(pathOrd);
|
||||
} else {
|
||||
if (path.length > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Field is not configured as hierarchical, path should be 0 length");
|
||||
}
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
if (ordRange == null) {
|
||||
// means dimension was never indexed
|
||||
return null;
|
||||
}
|
||||
pathOrd = ordRange.start;
|
||||
childIterator = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the actual results:
|
||||
return computeTopChildren(childIterator, topN, dimConfig, pathOrd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
||||
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
||||
*/
|
||||
FacetResult createFacetResult(TopChildrenForPath topChildrenForPath, String dim, String... path)
|
||||
throws IOException {
|
||||
// If the intermediate result is null or there are no children, we return null:
|
||||
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndIntQueue q = topChildrenForPath.q;
|
||||
assert q != null;
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(
|
||||
dim, path, topChildrenForPath.pathCount, labelValues, topChildrenForPath.childCount);
|
||||
}
|
||||
|
||||
/** Intermediate result to store top children for a given path before resolving labels, etc. */
|
||||
record TopChildrenForPath(int pathCount, int childCount, TopOrdAndIntQueue q) {}
|
||||
|
||||
static final class DimValue {
|
||||
String dim;
|
||||
int value;
|
||||
|
||||
DimValue(String dim, int value) {
|
||||
this.dim = dim;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,25 +19,15 @@ package org.apache.lucene.facet.sortedset;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.PrimitiveIterator;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.atomic.AtomicIntegerArray;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetUtils;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.TopOrdAndIntQueue;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -52,27 +42,19 @@ import org.apache.lucene.search.ConjunctionUtils;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Like {@link SortedSetDocValuesFacetCounts}, but aggregates counts concurrently across segments.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
||||
public class ConcurrentSortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts {
|
||||
|
||||
final ExecutorService exec;
|
||||
final SortedSetDocValuesReaderState state;
|
||||
final FacetsConfig stateConfig;
|
||||
final SortedSetDocValues dv;
|
||||
final String field;
|
||||
final AtomicIntegerArray counts;
|
||||
|
||||
private static final String[] emptyPath = new String[0];
|
||||
|
||||
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
|
||||
public ConcurrentSortedSetDocValuesFacetCounts(
|
||||
SortedSetDocValuesReaderState state, ExecutorService exec)
|
||||
|
@ -84,11 +66,8 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
public ConcurrentSortedSetDocValuesFacetCounts(
|
||||
SortedSetDocValuesReaderState state, FacetsCollector hits, ExecutorService exec)
|
||||
throws IOException, InterruptedException {
|
||||
this.state = state;
|
||||
this.field = state.getField();
|
||||
this.stateConfig = state.getFacetsConfig();
|
||||
super(state);
|
||||
this.exec = exec;
|
||||
dv = state.getDocValues();
|
||||
counts = new AtomicIntegerArray(state.getSize());
|
||||
if (hits == null) {
|
||||
// browse only
|
||||
|
@ -99,181 +78,8 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
|
||||
if (dimConfig.hierarchical) {
|
||||
int pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
if (pathOrd < 0) {
|
||||
// path was never indexed
|
||||
return null;
|
||||
}
|
||||
SortedSetDocValuesReaderState.DimTree dimTree = state.getDimTree(dim);
|
||||
return getPathResult(dimConfig, dim, path, pathOrd, dimTree.iterator(pathOrd), topN);
|
||||
} else {
|
||||
if (path.length > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Field is not configured as hierarchical, path should be 0 length");
|
||||
}
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
if (ordRange == null) {
|
||||
// means dimension was never indexed
|
||||
return null;
|
||||
}
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
return getPathResult(dimConfig, dim, null, dimOrd, childIt, topN);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloaded method to allow getPathResult be called without passing in the dimToChildOrdsResult
|
||||
* parameter
|
||||
*/
|
||||
private FacetResult getPathResult(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
String[] path,
|
||||
int pathOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN)
|
||||
throws IOException {
|
||||
return getPathResult(dimConfig, dim, path, pathOrd, childOrds, topN, null);
|
||||
}
|
||||
|
||||
/** Returns path results for a dimension */
|
||||
private FacetResult getPathResult(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
String[] path,
|
||||
int pathOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN,
|
||||
ChildOrdsResult dimToChildOrdsResult)
|
||||
throws IOException {
|
||||
|
||||
ChildOrdsResult childOrdsResult;
|
||||
|
||||
// if getTopDims is called, get results from previously stored dimToChildOrdsResult, otherwise
|
||||
// call getChildOrdsResult to get dimCount, childCount and the queue for the dimension's top
|
||||
// children
|
||||
if (dimToChildOrdsResult != null) {
|
||||
childOrdsResult = dimToChildOrdsResult;
|
||||
} else {
|
||||
childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, pathOrd);
|
||||
}
|
||||
|
||||
if (childOrdsResult.q == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = getLabelValuesFromTopOrdAndIntQueue(childOrdsResult.q);
|
||||
|
||||
if (dimConfig.hierarchical == true) {
|
||||
return new FacetResult(
|
||||
dim, path, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
|
||||
} else {
|
||||
return new FacetResult(
|
||||
dim, emptyPath, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns ChildOrdsResult that contains results of dimCount, childCount, and the queue for the
|
||||
* dimension's top children to populate FacetResult in getPathResult.
|
||||
*/
|
||||
private ChildOrdsResult getChildOrdsResult(
|
||||
PrimitiveIterator.OfInt childOrds, int topN, FacetsConfig.DimConfig dimConfig, int pathOrd) {
|
||||
|
||||
TopOrdAndIntQueue q = null;
|
||||
int bottomCount = 0;
|
||||
int dimCount = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
while (childOrds.hasNext()) {
|
||||
int ord = childOrds.next();
|
||||
if (counts.get(ord) > 0) {
|
||||
dimCount += counts.get(ord);
|
||||
childCount++;
|
||||
if (counts.get(ord) > bottomCount) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = counts.get(ord);
|
||||
if (q == null) {
|
||||
// Lazy init, so we don't create this for the
|
||||
// sparse case unnecessarily
|
||||
q = new TopOrdAndIntQueue(topN);
|
||||
}
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dimConfig.hierarchical == true) {
|
||||
dimCount = counts.get(pathOrd);
|
||||
} else {
|
||||
// see if dimCount is actually reliable or needs to be reset
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
dimCount = counts.get(pathOrd);
|
||||
} else {
|
||||
dimCount = -1; // dimCount is in accurate at this point, so set it to -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ChildOrdsResult(dimCount, childCount, q);
|
||||
}
|
||||
|
||||
/** Returns label values for dims. */
|
||||
private LabelAndValue[] getLabelValuesFromTopOrdAndIntQueue(TopOrdAndIntQueue q)
|
||||
throws IOException {
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
|
||||
}
|
||||
return labelValues;
|
||||
}
|
||||
|
||||
/** Returns value/count of a dimension. */
|
||||
private int getDimValue(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
int dimOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN,
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult) {
|
||||
|
||||
// if dimConfig.hierarchical == true || dim is multiValued and dim count has been aggregated at
|
||||
// indexing time, return dimCount directly
|
||||
if (dimConfig.hierarchical == true || (dimConfig.multiValued && dimConfig.requireDimCount)) {
|
||||
return counts.get(dimOrd);
|
||||
}
|
||||
|
||||
// if dimCount was not aggregated at indexing time, iterate over childOrds to get dimCount
|
||||
ChildOrdsResult childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, dimOrd);
|
||||
|
||||
// if no early termination, store dim and childOrdsResult into a hashmap to avoid calling
|
||||
// getChildOrdsResult again in getPathResult
|
||||
dimToChildOrdsResult.put(dim, childOrdsResult);
|
||||
return childOrdsResult.dimCount;
|
||||
int getCount(int ord) {
|
||||
return counts.get(ord);
|
||||
}
|
||||
|
||||
private class CountOneSegment implements Callable<Void> {
|
||||
|
@ -535,195 +341,4 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
if (path.length != 1) {
|
||||
throw new IllegalArgumentException("path must be length=1");
|
||||
}
|
||||
int ord = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return counts.get(ord);
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloaded method to allow getFacetResultForDim be called without passing in the
|
||||
* dimToChildOrdsResult parameter
|
||||
*/
|
||||
private FacetResult getFacetResultForDim(String dim, int topNChildren) throws IOException {
|
||||
return getFacetResultForDim(dim, topNChildren, null);
|
||||
}
|
||||
|
||||
/** Returns FacetResult for a dimension. */
|
||||
private FacetResult getFacetResultForDim(
|
||||
String dim, int topNChildren, ChildOrdsResult dimToChildOrdsResult) throws IOException {
|
||||
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
|
||||
if (dimConfig.hierarchical) {
|
||||
SortedSetDocValuesReaderState.DimTree dimTree = state.getDimTree(dim);
|
||||
int dimOrd = dimTree.dimStartOrd;
|
||||
return getPathResult(
|
||||
dimConfig,
|
||||
dim,
|
||||
emptyPath,
|
||||
dimOrd,
|
||||
dimTree.iterator(),
|
||||
topNChildren,
|
||||
dimToChildOrdsResult);
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
return getPathResult(
|
||||
dimConfig, dim, emptyPath, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
validateTopN(topN);
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
for (String dim : state.getDims()) {
|
||||
FacetResult factResult = getFacetResultForDim(dim, topN);
|
||||
if (factResult != null) {
|
||||
results.add(factResult);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by highest count:
|
||||
Collections.sort(
|
||||
results,
|
||||
new Comparator<FacetResult>() {
|
||||
@Override
|
||||
public int compare(FacetResult a, FacetResult b) {
|
||||
if (a.value.intValue() > b.value.intValue()) {
|
||||
return -1;
|
||||
} else if (b.value.intValue() > a.value.intValue()) {
|
||||
return 1;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
if (topNDims <= 0 || topNChildren <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0");
|
||||
}
|
||||
|
||||
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValueResult> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValueResult a, DimValueResult b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
return true;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim) > 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
|
||||
int dimCount;
|
||||
|
||||
for (String dim : state.getDims()) {
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
if (dimConfig.hierarchical) {
|
||||
SortedSetDocValuesReaderState.DimTree dimTree = state.getDimTree(dim);
|
||||
int dimOrd = dimTree.dimStartOrd;
|
||||
// get dim value
|
||||
dimCount =
|
||||
getDimValue(
|
||||
dimConfig, dim, dimOrd, dimTree.iterator(), topNChildren, dimToChildOrdsResult);
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
dimCount = getDimValue(dimConfig, dim, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
|
||||
}
|
||||
|
||||
if (dimCount != 0) {
|
||||
// use priority queue to store DimValueResult for topNDims
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValueResult(dim, dimCount));
|
||||
} else {
|
||||
if (dimCount > pq.top().value
|
||||
|| (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValueResult bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimCount;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get FacetResult for topNDims
|
||||
int resultSize = pq.size();
|
||||
FacetResult[] results = new FacetResult[resultSize];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValueResult dimValueResult = pq.pop();
|
||||
FacetResult facetResult =
|
||||
getFacetResultForDim(
|
||||
dimValueResult.dim, topNChildren, dimToChildOrdsResult.get(dimValueResult.dim));
|
||||
resultSize--;
|
||||
results[resultSize] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates ChildOrdsResult to store dimCount, childCount, and the queue for the dimension's top
|
||||
* children
|
||||
*/
|
||||
private static class ChildOrdsResult {
|
||||
final int dimCount;
|
||||
final int childCount;
|
||||
final TopOrdAndIntQueue q;
|
||||
|
||||
ChildOrdsResult(int dimCount, int childCount, TopOrdAndIntQueue q) {
|
||||
this.dimCount = dimCount;
|
||||
this.childCount = childCount;
|
||||
this.q = q;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates DimValueResult to store the label and value of dim in order to sort by these two
|
||||
* fields.
|
||||
*/
|
||||
private static class DimValueResult {
|
||||
String dim;
|
||||
int value;
|
||||
|
||||
DimValueResult(String dim, int value) {
|
||||
this.dim = dim;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,23 +17,12 @@
|
|||
package org.apache.lucene.facet.sortedset;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.PrimitiveIterator;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.FacetUtils;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.LabelAndValue;
|
||||
import org.apache.lucene.facet.TopOrdAndIntQueue;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.DimTree;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -48,9 +37,7 @@ import org.apache.lucene.search.ConjunctionUtils;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Compute facets counts from previously indexed {@link SortedSetDocValuesFacetField}, without
|
||||
|
@ -70,16 +57,9 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SortedSetDocValuesFacetCounts extends Facets {
|
||||
|
||||
final SortedSetDocValuesReaderState state;
|
||||
final FacetsConfig stateConfig;
|
||||
final SortedSetDocValues dv;
|
||||
final String field;
|
||||
public class SortedSetDocValuesFacetCounts extends AbstractSortedSetDocValueFacetCounts {
|
||||
final int[] counts;
|
||||
|
||||
private static final String[] emptyPath = new String[0];
|
||||
|
||||
/** Returns all facet counts, same result as searching on {@link MatchAllDocsQuery} but faster. */
|
||||
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state) throws IOException {
|
||||
this(state, null);
|
||||
|
@ -88,10 +68,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
/** Counts all facet dimensions across the provided hits. */
|
||||
public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits)
|
||||
throws IOException {
|
||||
this.state = state;
|
||||
this.field = state.getField();
|
||||
this.stateConfig = state.getFacetsConfig();
|
||||
this.dv = state.getDocValues();
|
||||
super(state);
|
||||
this.counts = new int[state.getSize()];
|
||||
if (hits == null) {
|
||||
// browse only
|
||||
|
@ -102,180 +79,8 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
validateTopN(topN);
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
|
||||
if (dimConfig.hierarchical) {
|
||||
int pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
if (pathOrd < 0) {
|
||||
// path was never indexed
|
||||
return null;
|
||||
}
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
return getPathResult(dimConfig, dim, path, pathOrd, dimTree.iterator(pathOrd), topN);
|
||||
} else {
|
||||
if (path.length > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"Field is not configured as hierarchical, path should be 0 length");
|
||||
}
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
if (ordRange == null) {
|
||||
// means dimension was never indexed
|
||||
return null;
|
||||
}
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
return getPathResult(dimConfig, dim, null, dimOrd, childIt, topN);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloaded method to allow getPathResult be called without passing in the dimToChildOrdsResult
|
||||
* parameter
|
||||
*/
|
||||
private FacetResult getPathResult(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
String[] path,
|
||||
int pathOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN)
|
||||
throws IOException {
|
||||
return getPathResult(dimConfig, dim, path, pathOrd, childOrds, topN, null);
|
||||
}
|
||||
|
||||
/** Returns path results for a dimension */
|
||||
private FacetResult getPathResult(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
String[] path,
|
||||
int pathOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN,
|
||||
ChildOrdsResult dimToChildOrdsResult)
|
||||
throws IOException {
|
||||
|
||||
ChildOrdsResult childOrdsResult;
|
||||
|
||||
// if getTopDims is called, get results from previously stored dimToChildOrdsResult, otherwise
|
||||
// call getChildOrdsResult to get dimCount, childCount and the queue for the dimension's top
|
||||
// children
|
||||
if (dimToChildOrdsResult != null) {
|
||||
childOrdsResult = dimToChildOrdsResult;
|
||||
} else {
|
||||
childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, pathOrd);
|
||||
}
|
||||
|
||||
if (childOrdsResult.q == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = getLabelValuesFromTopOrdAndIntQueue(childOrdsResult.q);
|
||||
|
||||
if (dimConfig.hierarchical == true) {
|
||||
return new FacetResult(
|
||||
dim, path, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
|
||||
} else {
|
||||
return new FacetResult(
|
||||
dim, emptyPath, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns SortedSetDocValuesChildOrdsResult that contains results of dimCount, childCount, and
|
||||
* the queue for the dimension's top children to populate FacetResult in getPathResult.
|
||||
*/
|
||||
private ChildOrdsResult getChildOrdsResult(
|
||||
PrimitiveIterator.OfInt childOrds, int topN, FacetsConfig.DimConfig dimConfig, int pathOrd) {
|
||||
TopOrdAndIntQueue q = null;
|
||||
int bottomCount = 0;
|
||||
int dimCount = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
while (childOrds.hasNext()) {
|
||||
int ord = childOrds.next();
|
||||
if (counts[ord] > 0) {
|
||||
dimCount += counts[ord];
|
||||
childCount++;
|
||||
if (counts[ord] > bottomCount) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = counts[ord];
|
||||
if (q == null) {
|
||||
// Lazy init, so we don't create this for the
|
||||
// sparse case unnecessarily
|
||||
q = new TopOrdAndIntQueue(topN);
|
||||
}
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dimConfig.hierarchical == true) {
|
||||
dimCount = counts[pathOrd];
|
||||
} else {
|
||||
// see if dimCount is actually reliable or needs to be reset
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
dimCount = counts[pathOrd];
|
||||
} else {
|
||||
dimCount = -1; // dimCount is in accurate at this point, so set it to -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ChildOrdsResult(dimCount, childCount, q);
|
||||
}
|
||||
|
||||
/** Returns label values for dims. */
|
||||
private LabelAndValue[] getLabelValuesFromTopOrdAndIntQueue(TopOrdAndIntQueue q)
|
||||
throws IOException {
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
|
||||
}
|
||||
return labelValues;
|
||||
}
|
||||
|
||||
/** Returns value/count of a dimension. */
|
||||
private int getDimValue(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
int dimOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN,
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult) {
|
||||
|
||||
// if dimConfig.hierarchical == true || dim is multiValued and dim count has been aggregated at
|
||||
// indexing time, return dimCount directly
|
||||
if (dimConfig.hierarchical == true || (dimConfig.multiValued && dimConfig.requireDimCount)) {
|
||||
return counts[dimOrd];
|
||||
}
|
||||
|
||||
// if dimCount was not aggregated at indexing time, iterate over childOrds to get dimCount
|
||||
ChildOrdsResult childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, dimOrd);
|
||||
|
||||
// if no early termination, store dim and childOrdsResult into a hashmap to avoid calling
|
||||
// getChildOrdsResult again in getPathResult
|
||||
dimToChildOrdsResult.put(dim, childOrdsResult);
|
||||
return childOrdsResult.dimCount;
|
||||
int getCount(int ord) {
|
||||
return counts[ord];
|
||||
}
|
||||
|
||||
// Variant of countOneSegment, that has No Hits or Live Docs
|
||||
|
@ -507,193 +312,4 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
if (path.length != 1) {
|
||||
throw new IllegalArgumentException("path must be length=1");
|
||||
}
|
||||
int ord = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return counts[ord];
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloaded method to allow getFacetResultForDim be called without passing in the
|
||||
* dimToChildOrdsResult parameter
|
||||
*/
|
||||
private FacetResult getFacetResultForDim(String dim, int topNChildren) throws IOException {
|
||||
return getFacetResultForDim(dim, topNChildren, null);
|
||||
}
|
||||
|
||||
/** Returns FacetResult for a dimension. */
|
||||
private FacetResult getFacetResultForDim(
|
||||
String dim, int topNChildren, ChildOrdsResult dimToChildOrdsResult) throws IOException {
|
||||
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
|
||||
if (dimConfig.hierarchical) {
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
int dimOrd = dimTree.dimStartOrd;
|
||||
return getPathResult(
|
||||
dimConfig,
|
||||
dim,
|
||||
emptyPath,
|
||||
dimOrd,
|
||||
dimTree.iterator(),
|
||||
topNChildren,
|
||||
dimToChildOrdsResult);
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
return getPathResult(
|
||||
dimConfig, dim, emptyPath, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
validateTopN(topN);
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
for (String dim : state.getDims()) {
|
||||
FacetResult factResult = getFacetResultForDim(dim, topN);
|
||||
if (factResult != null) {
|
||||
results.add(factResult);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by highest count:
|
||||
Collections.sort(
|
||||
results,
|
||||
new Comparator<FacetResult>() {
|
||||
@Override
|
||||
public int compare(FacetResult a, FacetResult b) {
|
||||
if (a.value.intValue() > b.value.intValue()) {
|
||||
return -1;
|
||||
} else if (b.value.intValue() > a.value.intValue()) {
|
||||
return 1;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim);
|
||||
}
|
||||
}
|
||||
});
|
||||
return results;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
validateTopN(topNDims);
|
||||
validateTopN(topNChildren);
|
||||
|
||||
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValueResult> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValueResult a, DimValueResult b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
return true;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim) > 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
|
||||
int dimCount;
|
||||
|
||||
for (String dim : state.getDims()) {
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
if (dimConfig.hierarchical) {
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
int dimOrd = dimTree.dimStartOrd;
|
||||
// get dim value
|
||||
dimCount =
|
||||
getDimValue(
|
||||
dimConfig, dim, dimOrd, dimTree.iterator(), topNChildren, dimToChildOrdsResult);
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
dimCount = getDimValue(dimConfig, dim, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
|
||||
}
|
||||
|
||||
if (dimCount != 0) {
|
||||
// use priority queue to store DimValueResult for topNDims
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValueResult(dim, dimCount));
|
||||
} else {
|
||||
if (dimCount > pq.top().value
|
||||
|| (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValueResult bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimCount;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get FacetResult for topNDims
|
||||
int resultSize = pq.size();
|
||||
FacetResult[] results = new FacetResult[resultSize];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValueResult dimValueResult = pq.pop();
|
||||
FacetResult facetResult =
|
||||
getFacetResultForDim(
|
||||
dimValueResult.dim, topNChildren, dimToChildOrdsResult.get(dimValueResult.dim));
|
||||
resultSize--;
|
||||
results[resultSize] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates ChildOrdsResult to store dimCount, childCount, and the queue for the dimension's top
|
||||
* children
|
||||
*/
|
||||
private static class ChildOrdsResult {
|
||||
final int dimCount;
|
||||
final int childCount;
|
||||
final TopOrdAndIntQueue q;
|
||||
|
||||
ChildOrdsResult(int dimCount, int childCount, TopOrdAndIntQueue q) {
|
||||
this.dimCount = dimCount;
|
||||
this.childCount = childCount;
|
||||
this.q = q;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates DimValueResult to store the label and value of dim in order to sort by these two
|
||||
* fields.
|
||||
*/
|
||||
private static class DimValueResult {
|
||||
String dim;
|
||||
int value;
|
||||
|
||||
DimValueResult(String dim, int value) {
|
||||
this.dim = dim;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,9 +39,6 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
/** Per-ordinal value. */
|
||||
final float[] values;
|
||||
|
||||
/** Pass in emptyPath for getTopDims and getAllDims. */
|
||||
private static final String[] emptyPath = new String[0];
|
||||
|
||||
/** Sole constructor. */
|
||||
FloatTaxonomyFacets(
|
||||
String indexFieldName,
|
||||
|
@ -114,21 +111,15 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
return null;
|
||||
}
|
||||
|
||||
ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd, topN);
|
||||
if (childOrdsResult.aggregatedValue == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = getLabelValues(childOrdsResult.q, cp.length);
|
||||
return new FacetResult(
|
||||
dim, path, childOrdsResult.aggregatedValue, labelValues, childOrdsResult.childCount);
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return ChildOrdsResult that contains results of aggregatedValue, childCount, and the queue for
|
||||
* the dimension's top children to populate FacetResult in getPathResult.
|
||||
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
||||
* form.
|
||||
*/
|
||||
private ChildOrdsResult getChildOrdsResult(DimConfig dimConfig, int dimOrd, int topN)
|
||||
private TopChildrenForPath getTopChildrenForPath(DimConfig dimConfig, int pathOrd, int topN)
|
||||
throws IOException {
|
||||
|
||||
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
|
||||
|
@ -137,7 +128,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
|
||||
int ord = children[dimOrd];
|
||||
int ord = children[pathOrd];
|
||||
float aggregatedValue = 0;
|
||||
int childCount = 0;
|
||||
|
||||
|
@ -164,61 +155,50 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValue = values[dimOrd];
|
||||
aggregatedValue = values[pathOrd];
|
||||
} else {
|
||||
// Our sum'd count is not correct, in general:
|
||||
aggregatedValue = -1;
|
||||
}
|
||||
}
|
||||
return new ChildOrdsResult(aggregatedValue, childCount, q);
|
||||
return new TopChildrenForPath(aggregatedValue, childCount, q);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return label and values for top dimensions and children
|
||||
*
|
||||
* @param q the queue for the dimension's top children
|
||||
* @param pathLength the length of a dimension's children paths
|
||||
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
||||
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
||||
*/
|
||||
private LabelAndValue[] getLabelValues(TopOrdAndFloatQueue q, int pathLength) throws IOException {
|
||||
FacetResult createFacetResult(TopChildrenForPath topChildrenForPath, String dim, String... path)
|
||||
throws IOException {
|
||||
// If the intermediate result is null or there are no children, we return null:
|
||||
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndFloatQueue q = topChildrenForPath.childQueue;
|
||||
assert q != null;
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
int[] ordinals = new int[labelValues.length];
|
||||
float[] values = new float[labelValues.length];
|
||||
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
ordinals[i] = ordAndValue.ord;
|
||||
values[i] = ordAndValue.value;
|
||||
}
|
||||
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
|
||||
// The path component we're interested in is the one immediately after the provided path. We
|
||||
// add 1 here to also account for the dim:
|
||||
int childComponentIdx = path.length + 1;
|
||||
for (int i = 0; i < labelValues.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[pathLength], values[i]);
|
||||
}
|
||||
return labelValues;
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]);
|
||||
}
|
||||
|
||||
/** Return value of a dimension. */
|
||||
private float getDimValue(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
int dimOrd,
|
||||
int topN,
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult)
|
||||
throws IOException {
|
||||
|
||||
// if dimConfig.hierarchical == true || dim is multiValued and dim count has been aggregated at
|
||||
// indexing time, return dimCount directly
|
||||
if (dimConfig.hierarchical == true || (dimConfig.multiValued && dimConfig.requireDimCount)) {
|
||||
return values[dimOrd];
|
||||
}
|
||||
|
||||
// if dimCount was not aggregated at indexing time, iterate over childOrds to get dimCount
|
||||
ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd, topN);
|
||||
|
||||
// if no early termination, store dim and childOrdsResult into a hashmap to avoid calling
|
||||
// getChildOrdsResult again in getTopDims
|
||||
dimToChildOrdsResult.put(dim, childOrdsResult);
|
||||
return childOrdsResult.aggregatedValue;
|
||||
return new FacetResult(
|
||||
dim, path, topChildrenForPath.pathValue, labelValues, topChildrenForPath.childCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -232,10 +212,10 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
|
||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValueResult> pq =
|
||||
PriorityQueue<DimValue> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValueResult a, DimValueResult b) {
|
||||
protected boolean lessThan(DimValue a, DimValue b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
|
@ -246,8 +226,8 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
};
|
||||
|
||||
// create hashMap to store the ChildOrdsResult to avoid calling getChildOrdsResult for all dims
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
|
||||
// Keep track of intermediate results, if we compute them, so we can reuse them later:
|
||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
// iterate over children and siblings ordinals for all dims
|
||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
||||
|
@ -255,22 +235,42 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
String dim = taxoReader.getPath(ord).components[0];
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
if (dimConfig.indexFieldName.equals(indexFieldName)) {
|
||||
FacetLabel cp = new FacetLabel(dim, emptyPath);
|
||||
FacetLabel cp = new FacetLabel(dim);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
float dimCount = 0;
|
||||
// if dimOrd = -1, we skip this dim, else call getDimValue
|
||||
if (dimOrd != -1) {
|
||||
dimCount = getDimValue(dimConfig, dim, dimOrd, topNChildren, dimToChildOrdsResult);
|
||||
if (dimCount != 0) {
|
||||
// use priority queue to store DimValueResult for topNDims
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValueResult(dim, dimOrd, dimCount));
|
||||
float dimValue;
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
// If the dim is configured as multi-valued and requires dim counts, we can access
|
||||
// an accurate count for the dim computed at indexing time:
|
||||
dimValue = values[dimOrd];
|
||||
} else {
|
||||
if (dimCount > pq.top().value
|
||||
|| (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValueResult bottomDim = pq.top();
|
||||
// If the dim is configured as multi-valued but not requiring dim counts, we cannot
|
||||
// compute an accurate dim count, and use -1 as a place-holder:
|
||||
dimValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Single-valued dims require aggregating descendant paths to get accurate dim counts
|
||||
// since we don't directly access ancestry paths:
|
||||
// TODO: We could consider indexing dim counts directly if getTopDims is a common
|
||||
// use-case.
|
||||
TopChildrenForPath topChildrenForPath =
|
||||
getTopChildrenForPath(dimConfig, dimOrd, topNChildren);
|
||||
if (intermediateResults == null) {
|
||||
intermediateResults = new HashMap<>();
|
||||
}
|
||||
intermediateResults.put(dim, topChildrenForPath);
|
||||
dimValue = topChildrenForPath.pathValue;
|
||||
}
|
||||
if (dimValue != 0) {
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValue(dim, dimOrd, dimValue));
|
||||
} else {
|
||||
if (dimValue > pq.top().value
|
||||
|| (dimValue == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValue bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimCount;
|
||||
bottomDim.value = dimValue;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
|
@ -280,63 +280,40 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
|||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
// use fixed-size array to reduce space usage
|
||||
FacetResult[] results = new FacetResult[pq.size()];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValueResult dimValueResult = pq.pop();
|
||||
String dim = dimValueResult.dim;
|
||||
ChildOrdsResult childOrdsResult;
|
||||
// if the childOrdsResult was stored in the map, avoid calling getChildOrdsResult again
|
||||
if (dimToChildOrdsResult.containsKey(dim)) {
|
||||
childOrdsResult = dimToChildOrdsResult.get(dim);
|
||||
} else {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
childOrdsResult = getChildOrdsResult(dimConfig, dimValueResult.dimOrd, topNChildren);
|
||||
DimValue dimValue = pq.pop();
|
||||
assert dimValue != null;
|
||||
String dim = dimValue.dim;
|
||||
TopChildrenForPath topChildrenForPath = null;
|
||||
if (intermediateResults != null) {
|
||||
topChildrenForPath = intermediateResults.get(dim);
|
||||
}
|
||||
// FacetResult requires String[] path, and path is always empty for getTopDims.
|
||||
// pathLength is always equal to 1 when FacetLabel is constructed with
|
||||
// FacetLabel(dim, emptyPath), and therefore, 1 is passed in when calling getLabelValues
|
||||
FacetResult facetResult =
|
||||
new FacetResult(
|
||||
dimValueResult.dim,
|
||||
emptyPath,
|
||||
dimValueResult.value,
|
||||
getLabelValues(childOrdsResult.q, 1),
|
||||
childOrdsResult.childCount);
|
||||
if (topChildrenForPath == null) {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
topChildrenForPath = getTopChildrenForPath(dimConfig, dimValue.dimOrd, topNChildren);
|
||||
}
|
||||
FacetResult facetResult = createFacetResult(topChildrenForPath, dim);
|
||||
assert facetResult != null;
|
||||
results[pq.size()] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create DimValueResult to store the label, dim ordinal and dim count of a dim in priority queue
|
||||
*/
|
||||
private static class DimValueResult {
|
||||
private static class DimValue {
|
||||
String dim;
|
||||
int dimOrd;
|
||||
float value;
|
||||
|
||||
DimValueResult(String dim, int dimOrd, float value) {
|
||||
DimValue(String dim, int dimOrd, float value) {
|
||||
this.dim = dim;
|
||||
this.dimOrd = dimOrd;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create ChildOrdsResult to store dimCount, childCount, and the queue for the dimension's top
|
||||
* children
|
||||
*/
|
||||
private static class ChildOrdsResult {
|
||||
final float aggregatedValue;
|
||||
final int childCount;
|
||||
final TopOrdAndFloatQueue q;
|
||||
|
||||
ChildOrdsResult(float aggregatedValue, int childCount, TopOrdAndFloatQueue q) {
|
||||
this.aggregatedValue = aggregatedValue;
|
||||
this.childCount = childCount;
|
||||
this.q = q;
|
||||
}
|
||||
}
|
||||
/** Intermediate result to store top children for a given path before resolving labels, etc. */
|
||||
private record TopChildrenForPath(
|
||||
float pathValue, int childCount, TopOrdAndFloatQueue childQueue) {}
|
||||
}
|
||||
|
|
|
@ -44,9 +44,6 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
/** Sparse ordinal values. */
|
||||
final IntIntHashMap sparseValues;
|
||||
|
||||
/** Pass in emptyPath for getTopDims and getAllDims. */
|
||||
private static final String[] emptyPath = new String[0];
|
||||
|
||||
/** Sole constructor. */
|
||||
IntTaxonomyFacets(
|
||||
String indexFieldName,
|
||||
|
@ -176,46 +173,15 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
return null;
|
||||
}
|
||||
|
||||
ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd, topN);
|
||||
|
||||
if (childOrdsResult.q == null || childOrdsResult.aggregatedValue == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = getLabelValues(childOrdsResult.q, cp.length);
|
||||
return new FacetResult(
|
||||
dim, path, childOrdsResult.aggregatedValue, labelValues, childOrdsResult.childCount);
|
||||
TopChildrenForPath topChildrenForPath = getTopChildrenForPath(dimConfig, dimOrd, topN);
|
||||
return createFacetResult(topChildrenForPath, dim, path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return label and values for top dimensions and children
|
||||
*
|
||||
* @param q the queue for the dimension's top children
|
||||
* @param pathLength the length of a dimension's children paths
|
||||
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
||||
* form.
|
||||
*/
|
||||
private LabelAndValue[] getLabelValues(TopOrdAndIntQueue q, int pathLength) throws IOException {
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
int[] ordinals = new int[labelValues.length];
|
||||
int[] values = new int[labelValues.length];
|
||||
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
ordinals[i] = ordAndValue.ord;
|
||||
values[i] = ordAndValue.value;
|
||||
}
|
||||
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
|
||||
for (int i = 0; i < labelValues.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[pathLength], values[i]);
|
||||
}
|
||||
return labelValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return ChildOrdsResult that contains results of dimCount, childCount, and the queue for the
|
||||
* dimension's top children to populate FacetResult in getPathResult.
|
||||
*/
|
||||
private ChildOrdsResult getChildOrdsResult(DimConfig dimConfig, int dimOrd, int topN)
|
||||
private TopChildrenForPath getTopChildrenForPath(DimConfig dimConfig, int pathOrd, int topN)
|
||||
throws IOException {
|
||||
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
|
||||
int bottomValue = 0;
|
||||
|
@ -230,7 +196,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
for (IntIntCursor c : sparseValues) {
|
||||
int value = c.value;
|
||||
int ord = c.key;
|
||||
if (parents[ord] == dimOrd && value > 0) {
|
||||
if (parents[ord] == pathOrd && value > 0) {
|
||||
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
|
||||
childCount++;
|
||||
if (value > bottomValue) {
|
||||
|
@ -249,7 +215,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
} else {
|
||||
int[] children = getChildren();
|
||||
int[] siblings = getSiblings();
|
||||
int ord = children[dimOrd];
|
||||
int ord = children[pathOrd];
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int value = values[ord];
|
||||
if (value > 0) {
|
||||
|
@ -273,38 +239,14 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
aggregatedValue = getValue(dimOrd);
|
||||
aggregatedValue = getValue(pathOrd);
|
||||
} else {
|
||||
// Our sum'd value is not correct, in general:
|
||||
aggregatedValue = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return new ChildOrdsResult(aggregatedValue, childCount, q);
|
||||
}
|
||||
|
||||
/** Return value/count of a dimension. */
|
||||
private int getDimValue(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
int dimOrd,
|
||||
int topN,
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult)
|
||||
throws IOException {
|
||||
|
||||
// if dimConfig.hierarchical == true || dim is multiValued and dim count has been aggregated at
|
||||
// indexing time, return dimCount directly
|
||||
if (dimConfig.hierarchical == true || (dimConfig.multiValued && dimConfig.requireDimCount)) {
|
||||
return getValue(dimOrd);
|
||||
}
|
||||
|
||||
// if dimCount was not aggregated at indexing time, iterate over childOrds to get dimCount
|
||||
ChildOrdsResult childOrdsResult = getChildOrdsResult(dimConfig, dimOrd, topN);
|
||||
|
||||
// if no early termination, store dim and childOrdsResult into a hashmap to avoid calling
|
||||
// getChildOrdsResult again in getTopDims
|
||||
dimToChildOrdsResult.put(dim, childOrdsResult);
|
||||
return childOrdsResult.aggregatedValue;
|
||||
return new TopChildrenForPath(aggregatedValue, childCount, q);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -319,10 +261,10 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
|
||||
// Create priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValueResult> pq =
|
||||
PriorityQueue<DimValue> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValueResult a, DimValueResult b) {
|
||||
protected boolean lessThan(DimValue a, DimValue b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
|
@ -333,8 +275,8 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
}
|
||||
};
|
||||
|
||||
// create hashMap to store the ChildOrdsResult to avoid calling getChildOrdsResult for all dims
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
|
||||
// Keep track of intermediate results, if we compute them, so we can reuse them later:
|
||||
Map<String, TopChildrenForPath> intermediateResults = null;
|
||||
|
||||
// iterate over children and siblings ordinals for all dims
|
||||
int ord = children[TaxonomyReader.ROOT_ORDINAL];
|
||||
|
@ -342,21 +284,42 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
String dim = taxoReader.getPath(ord).components[0];
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
if (dimConfig.indexFieldName.equals(indexFieldName)) {
|
||||
FacetLabel cp = new FacetLabel(dim, emptyPath);
|
||||
FacetLabel cp = new FacetLabel(dim);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
// if dimOrd = -1, we skip this dim, else call getDimValue
|
||||
if (dimOrd != -1) {
|
||||
int dimCount = getDimValue(dimConfig, dim, dimOrd, topNChildren, dimToChildOrdsResult);
|
||||
if (dimCount != 0) {
|
||||
// use priority queue to store DimValueResult for topNDims
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValueResult(dim, dimOrd, dimCount));
|
||||
int dimValue;
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
// If the dim is configured as multi-valued and requires dim counts, we can access
|
||||
// an accurate count for the dim computed at indexing time:
|
||||
dimValue = getValue(dimOrd);
|
||||
} else {
|
||||
if (dimCount > pq.top().value
|
||||
|| (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValueResult bottomDim = pq.top();
|
||||
// If the dim is configured as multi-valued but not requiring dim counts, we cannot
|
||||
// compute an accurate dim count, and use -1 as a place-holder:
|
||||
dimValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Single-valued dims require aggregating descendant paths to get accurate dim counts
|
||||
// since we don't directly access ancestry paths:
|
||||
// TODO: We could consider indexing dim counts directly if getTopDims is a common
|
||||
// use-case.
|
||||
TopChildrenForPath topChildrenForPath =
|
||||
getTopChildrenForPath(dimConfig, dimOrd, topNChildren);
|
||||
if (intermediateResults == null) {
|
||||
intermediateResults = new HashMap<>();
|
||||
}
|
||||
intermediateResults.put(dim, topChildrenForPath);
|
||||
dimValue = topChildrenForPath.pathValue;
|
||||
}
|
||||
if (dimValue != 0) {
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValue(dim, dimOrd, dimValue));
|
||||
} else {
|
||||
if (dimValue > pq.top().value
|
||||
|| (dimValue == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValue bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimCount;
|
||||
bottomDim.value = dimValue;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
|
@ -366,63 +329,76 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
|||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
// use fixed-size array to reduce space usage
|
||||
FacetResult[] results = new FacetResult[pq.size()];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValueResult dimValueResult = pq.pop();
|
||||
String dim = dimValueResult.dim;
|
||||
ChildOrdsResult childOrdsResult;
|
||||
// if the childOrdsResult was stored in the map, avoid calling getChildOrdsResult again
|
||||
if (dimToChildOrdsResult.containsKey(dim)) {
|
||||
childOrdsResult = dimToChildOrdsResult.get(dim);
|
||||
} else {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
childOrdsResult = getChildOrdsResult(dimConfig, dimValueResult.dimOrd, topNChildren);
|
||||
DimValue dimValue = pq.pop();
|
||||
assert dimValue != null;
|
||||
String dim = dimValue.dim;
|
||||
TopChildrenForPath topChildrenForPath = null;
|
||||
if (intermediateResults != null) {
|
||||
topChildrenForPath = intermediateResults.get(dim);
|
||||
}
|
||||
// FacetResult requires String[] path, and path is always empty for getTopDims.
|
||||
// pathLength is always equal to 1 when FacetLabel is constructed with
|
||||
// FacetLabel(dim, emptyPath), and therefore, 1 is passed in when calling getLabelValues
|
||||
FacetResult facetResult =
|
||||
new FacetResult(
|
||||
dimValueResult.dim,
|
||||
emptyPath,
|
||||
dimValueResult.value,
|
||||
getLabelValues(childOrdsResult.q, 1),
|
||||
childOrdsResult.childCount);
|
||||
if (topChildrenForPath == null) {
|
||||
FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
|
||||
topChildrenForPath = getTopChildrenForPath(dimConfig, dimValue.dimOrd, topNChildren);
|
||||
}
|
||||
FacetResult facetResult = createFacetResult(topChildrenForPath, dim);
|
||||
assert facetResult != null;
|
||||
results[pq.size()] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create DimValueResult to store the label, dim ordinal and dim count of a dim in priority queue
|
||||
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
||||
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
||||
*/
|
||||
private static class DimValueResult {
|
||||
FacetResult createFacetResult(TopChildrenForPath topChildrenForPath, String dim, String... path)
|
||||
throws IOException {
|
||||
// If the intermediate result is null or there are no children, we return null:
|
||||
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndIntQueue q = topChildrenForPath.childQueue;
|
||||
assert q != null;
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
int[] ordinals = new int[labelValues.length];
|
||||
int[] values = new int[labelValues.length];
|
||||
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
ordinals[i] = ordAndValue.ord;
|
||||
values[i] = ordAndValue.value;
|
||||
}
|
||||
|
||||
FacetLabel[] bulkPath = taxoReader.getBulkPath(ordinals);
|
||||
// The path component we're interested in is the one immediately after the provided path. We
|
||||
// add 1 here to also account for the dim:
|
||||
int childComponentIdx = path.length + 1;
|
||||
for (int i = 0; i < labelValues.length; i++) {
|
||||
labelValues[i] = new LabelAndValue(bulkPath[i].components[childComponentIdx], values[i]);
|
||||
}
|
||||
|
||||
return new FacetResult(
|
||||
dim, path, topChildrenForPath.pathValue, labelValues, topChildrenForPath.childCount);
|
||||
}
|
||||
|
||||
private static class DimValue {
|
||||
String dim;
|
||||
int dimOrd;
|
||||
int value;
|
||||
|
||||
DimValueResult(String dim, int dimOrd, int value) {
|
||||
DimValue(String dim, int dimOrd, int value) {
|
||||
this.dim = dim;
|
||||
this.dimOrd = dimOrd;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create ChildOrdsResult to store dimCount, childCount, and the queue for the dimension's top
|
||||
* children
|
||||
*/
|
||||
private static class ChildOrdsResult {
|
||||
final int aggregatedValue;
|
||||
final int childCount;
|
||||
final TopOrdAndIntQueue q;
|
||||
|
||||
ChildOrdsResult(int aggregatedValue, int childCount, TopOrdAndIntQueue q) {
|
||||
this.aggregatedValue = aggregatedValue;
|
||||
this.childCount = childCount;
|
||||
this.q = q;
|
||||
}
|
||||
}
|
||||
/** Intermediate result to store top children for a given path before resolving labels, etc. */
|
||||
private record TopChildrenForPath(int pathValue, int childCount, TopOrdAndIntQueue childQueue) {}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.facet.taxonomy;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -156,7 +155,7 @@ abstract class TaxonomyFacets extends Facets {
|
|||
}
|
||||
|
||||
// Sort by highest value, tie break by dim:
|
||||
Collections.sort(results, BY_VALUE_THEN_DIM);
|
||||
results.sort(BY_VALUE_THEN_DIM);
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue