mirror of https://github.com/apache/lucene.git
Some refactoring/cleanup of AbstractSortedSetDocValueFacetCounts (#983)
This commit is contained in:
parent
e078bc1cd9
commit
5c3d92d69d
|
@ -75,48 +75,16 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||||
@Override
|
@Override
|
||||||
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
public FacetResult getAllChildren(String dim, String... path) throws IOException {
|
||||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||||
|
ChildIterationCursor iterationCursor = prepareChildIteration(dim, dimConfig, path);
|
||||||
|
if (iterationCursor == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine the path ord and resolve an iterator to its immediate children. The logic for this
|
|
||||||
// depends on whether-or-not the dimension is configured as hierarchical:
|
|
||||||
final int pathOrd;
|
|
||||||
final PrimitiveIterator.OfInt childIterator;
|
|
||||||
if (dimConfig.hierarchical) {
|
|
||||||
DimTree dimTree = state.getDimTree(dim);
|
|
||||||
if (path.length > 0) {
|
|
||||||
pathOrd = (int) dv.lookupTerm(new BytesRef(FacetsConfig.pathToString(dim, path)));
|
|
||||||
} else {
|
|
||||||
// If there's no path, this is a little more efficient to just look up the dim:
|
|
||||||
pathOrd = dimTree.dimStartOrd;
|
|
||||||
}
|
|
||||||
if (pathOrd < 0) {
|
|
||||||
// path was never indexed
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
childIterator = dimTree.iterator(pathOrd);
|
|
||||||
} else {
|
|
||||||
if (path.length > 0) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Field is not configured as hierarchical, path should be 0 length");
|
|
||||||
}
|
|
||||||
OrdRange ordRange = state.getOrdRange(dim);
|
|
||||||
if (ordRange == null) {
|
|
||||||
// means dimension was never indexed
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
pathOrd = ordRange.start;
|
|
||||||
childIterator = ordRange.iterator();
|
|
||||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
|
||||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
|
||||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
|
||||||
// child:
|
|
||||||
childIterator.next();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Compute the actual results:
|
// Compute the actual results:
|
||||||
int pathCount = 0;
|
int pathCount = 0;
|
||||||
List<LabelAndValue> labelValues = new ArrayList<>();
|
List<LabelAndValue> labelValues = new ArrayList<>();
|
||||||
while (childIterator.hasNext()) {
|
while (iterationCursor.childIterator.hasNext()) {
|
||||||
int ord = childIterator.next();
|
int ord = iterationCursor.childIterator.next();
|
||||||
int count = getCount(ord);
|
int count = getCount(ord);
|
||||||
if (count > 0) {
|
if (count > 0) {
|
||||||
pathCount += count;
|
pathCount += count;
|
||||||
|
@ -126,18 +94,8 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dimConfig.hierarchical) {
|
pathCount = adjustPathCountIfNecessary(dimConfig, iterationCursor.pathOrd, pathCount);
|
||||||
pathCount = getCount(pathOrd);
|
|
||||||
} else {
|
|
||||||
// see if pathCount is actually reliable or needs to be reset
|
|
||||||
if (dimConfig.multiValued) {
|
|
||||||
if (dimConfig.requireDimCount) {
|
|
||||||
pathCount = getCount(pathOrd);
|
|
||||||
} else {
|
|
||||||
pathCount = -1; // pathCount is inaccurate at this point, so set it to -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new FacetResult(
|
return new FacetResult(
|
||||||
dim, path, pathCount, labelValues.toArray(new LabelAndValue[0]), labelValues.size());
|
dim, path, pathCount, labelValues.toArray(new LabelAndValue[0]), labelValues.size());
|
||||||
}
|
}
|
||||||
|
@ -276,69 +234,11 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||||
abstract int getCount(int ord);
|
abstract int getCount(int ord);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the top-n children for the given path and iterator of all immediate children of the
|
* Determine the path ord and resolve an iterator to its immediate children. The logic for this
|
||||||
* path. This returns an intermediate result that does the minimal required work, avoiding the
|
* depends on whether-or-not the dimension is configured as hierarchical.
|
||||||
* cost of looking up string labels, etc.
|
|
||||||
*/
|
*/
|
||||||
TopChildrenForPath computeTopChildren(
|
private ChildIterationCursor prepareChildIteration(
|
||||||
PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
|
String dim, DimConfig dimConfig, String... path) throws IOException {
|
||||||
TopOrdAndIntQueue q = null;
|
|
||||||
int bottomCount = 0;
|
|
||||||
int pathCount = 0;
|
|
||||||
int childCount = 0;
|
|
||||||
|
|
||||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
|
||||||
while (childOrds.hasNext()) {
|
|
||||||
int ord = childOrds.next();
|
|
||||||
int count = getCount(ord);
|
|
||||||
if (count > 0) {
|
|
||||||
pathCount += count;
|
|
||||||
childCount++;
|
|
||||||
if (count > bottomCount) {
|
|
||||||
if (reuse == null) {
|
|
||||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
|
||||||
}
|
|
||||||
reuse.ord = ord;
|
|
||||||
reuse.value = count;
|
|
||||||
if (q == null) {
|
|
||||||
// Lazy init, so we don't create this for the
|
|
||||||
// sparse case unnecessarily
|
|
||||||
q = new TopOrdAndIntQueue(topN);
|
|
||||||
}
|
|
||||||
reuse = q.insertWithOverflow(reuse);
|
|
||||||
if (q.size() == topN) {
|
|
||||||
bottomCount = q.top().value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dimConfig.hierarchical) {
|
|
||||||
pathCount = getCount(pathOrd);
|
|
||||||
} else {
|
|
||||||
// see if pathCount is actually reliable or needs to be reset
|
|
||||||
if (dimConfig.multiValued) {
|
|
||||||
if (dimConfig.requireDimCount) {
|
|
||||||
pathCount = getCount(pathOrd);
|
|
||||||
} else {
|
|
||||||
pathCount = -1; // pathCount is inaccurate at this point, so set it to -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new TopChildrenForPath(pathCount, childCount, q);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
|
||||||
* form.
|
|
||||||
*/
|
|
||||||
TopChildrenForPath getTopChildrenForPath(int topN, String dim, String... path)
|
|
||||||
throws IOException {
|
|
||||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
|
||||||
|
|
||||||
// Determine the path ord and resolve an iterator to its immediate children. The logic for this
|
|
||||||
// depends on whether-or-not the dimension is configured as hierarchical:
|
|
||||||
final int pathOrd;
|
final int pathOrd;
|
||||||
final PrimitiveIterator.OfInt childIterator;
|
final PrimitiveIterator.OfInt childIterator;
|
||||||
if (dimConfig.hierarchical) {
|
if (dimConfig.hierarchical) {
|
||||||
|
@ -374,16 +274,96 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return new ChildIterationCursor(pathOrd, childIterator);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine the top-n children for a specified dimension + path. Results are in an intermediate
|
||||||
|
* form.
|
||||||
|
*/
|
||||||
|
private TopChildrenForPath getTopChildrenForPath(int topN, String dim, String... path)
|
||||||
|
throws IOException {
|
||||||
|
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||||
|
ChildIterationCursor iterationCursor = prepareChildIteration(dim, dimConfig, path);
|
||||||
|
if (iterationCursor == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// Compute the actual results:
|
// Compute the actual results:
|
||||||
return computeTopChildren(childIterator, topN, dimConfig, pathOrd);
|
return computeTopChildren(
|
||||||
|
iterationCursor.childIterator, topN, dimConfig, iterationCursor.pathOrd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the top-n children for the given path and iterator of all immediate children of the
|
||||||
|
* path. This returns an intermediate result that does the minimal required work, avoiding the
|
||||||
|
* cost of looking up string labels, etc.
|
||||||
|
*/
|
||||||
|
private TopChildrenForPath computeTopChildren(
|
||||||
|
PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
|
||||||
|
TopOrdAndIntQueue q = null;
|
||||||
|
int bottomCount = 0;
|
||||||
|
int pathCount = 0;
|
||||||
|
int childCount = 0;
|
||||||
|
|
||||||
|
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||||
|
while (childOrds.hasNext()) {
|
||||||
|
int ord = childOrds.next();
|
||||||
|
int count = getCount(ord);
|
||||||
|
if (count > 0) {
|
||||||
|
pathCount += count;
|
||||||
|
childCount++;
|
||||||
|
if (count > bottomCount) {
|
||||||
|
if (reuse == null) {
|
||||||
|
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||||
|
}
|
||||||
|
reuse.ord = ord;
|
||||||
|
reuse.value = count;
|
||||||
|
if (q == null) {
|
||||||
|
// Lazy init, so we don't create this for the
|
||||||
|
// sparse case unnecessarily
|
||||||
|
q = new TopOrdAndIntQueue(topN);
|
||||||
|
}
|
||||||
|
reuse = q.insertWithOverflow(reuse);
|
||||||
|
if (q.size() == topN) {
|
||||||
|
bottomCount = q.top().value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pathCount = adjustPathCountIfNecessary(dimConfig, pathOrd, pathCount);
|
||||||
|
|
||||||
|
return new TopChildrenForPath(pathCount, childCount, q);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int adjustPathCountIfNecessary(DimConfig dimConfig, int pathOrd, int computedCount) {
|
||||||
|
if (dimConfig.hierarchical) {
|
||||||
|
// hierarchical dims index all path counts directly:
|
||||||
|
return getCount(pathOrd);
|
||||||
|
} else {
|
||||||
|
if (dimConfig.multiValued) {
|
||||||
|
if (dimConfig.requireDimCount) {
|
||||||
|
// multi-value dims configured to "require dim counts" also index path counts directly:
|
||||||
|
return getCount(pathOrd);
|
||||||
|
} else {
|
||||||
|
// we're unable to produce accurate counts for multi-value dims that are _not_ configured
|
||||||
|
// to "require dim counts":
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// aggregated counts are correct for single-valued, non-hierarchical dims:
|
||||||
|
return computedCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
* Create a FacetResult for the provided dim + path and intermediate results. Does the extra work
|
||||||
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
* of resolving ordinals -> labels, etc. Will return null if there are no children.
|
||||||
*/
|
*/
|
||||||
FacetResult createFacetResult(TopChildrenForPath topChildrenForPath, String dim, String... path)
|
private FacetResult createFacetResult(
|
||||||
throws IOException {
|
TopChildrenForPath topChildrenForPath, String dim, String... path) throws IOException {
|
||||||
// If the intermediate result is null or there are no children, we return null:
|
// If the intermediate result is null or there are no children, we return null:
|
||||||
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
if (topChildrenForPath == null || topChildrenForPath.childCount == 0) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -417,4 +397,14 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final class ChildIterationCursor {
|
||||||
|
final int pathOrd;
|
||||||
|
final PrimitiveIterator.OfInt childIterator;
|
||||||
|
|
||||||
|
ChildIterationCursor(int pathOrd, PrimitiveIterator.OfInt childIterator) {
|
||||||
|
this.pathOrd = pathOrd;
|
||||||
|
this.childIterator = childIterator;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue