mirror of https://github.com/apache/lucene.git
LUCENE-10325: Add getTopDims functionality to Facets (#747)
This commit is contained in:
parent
0f93130d7b
commit
7c33f04d37
|
@ -48,4 +48,15 @@ public abstract class Facets {
|
|||
* indexed, for example depending on the type of document.
|
||||
*/
|
||||
public abstract List<FacetResult> getAllDims(int topN) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns labels for topN dimensions and their topNChildren sorted by the number of
|
||||
* hits/aggregated values that dimension matched; Results should be the same as calling getAllDims
|
||||
* and then only using the first topNDims; Sub-classes may want to override this implementation
|
||||
* with a more efficient one if they are able.
|
||||
*/
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
List<FacetResult> allResults = getAllDims(topNChildren);
|
||||
return allResults.subList(0, Math.min(topNDims, allResults.size()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.PrimitiveIterator;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
|
@ -49,6 +50,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Compute facets counts from previously indexed {@link SortedSetDocValuesFacetField}, without
|
||||
|
@ -137,6 +139,10 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloaded method to allow getPathResult be called without passing in the dimToChildOrdsResult
|
||||
* parameter
|
||||
*/
|
||||
private FacetResult getPathResult(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
|
@ -145,11 +151,55 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN)
|
||||
throws IOException {
|
||||
return getPathResult(dimConfig, dim, path, pathOrd, childOrds, topN, null);
|
||||
}
|
||||
|
||||
/** Returns path results for a dimension */
|
||||
private FacetResult getPathResult(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
String[] path,
|
||||
int pathOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN,
|
||||
ChildOrdsResult dimToChildOrdsResult)
|
||||
throws IOException {
|
||||
|
||||
ChildOrdsResult childOrdsResult;
|
||||
|
||||
// if getTopDims is called, get results from previously stored dimToChildOrdsResult, otherwise
|
||||
// call getChildOrdsResult to get dimCount, childCount and the queue for the dimension's top
|
||||
// children
|
||||
if (dimToChildOrdsResult != null) {
|
||||
childOrdsResult = dimToChildOrdsResult;
|
||||
} else {
|
||||
childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, pathOrd);
|
||||
}
|
||||
|
||||
if (childOrdsResult.q == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = getLabelValuesFromTopOrdAndIntQueue(childOrdsResult.q);
|
||||
|
||||
if (dimConfig.hierarchical == true) {
|
||||
return new FacetResult(
|
||||
dim, path, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
|
||||
} else {
|
||||
return new FacetResult(
|
||||
dim, emptyPath, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns SortedSetDocValuesChildOrdsResult that contains results of dimCount, childCount, and
|
||||
* the queue for the dimension's top children to populate FacetResult in getPathResult.
|
||||
*/
|
||||
private ChildOrdsResult getChildOrdsResult(
|
||||
PrimitiveIterator.OfInt childOrds, int topN, FacetsConfig.DimConfig dimConfig, int pathOrd) {
|
||||
|
||||
TopOrdAndIntQueue q = null;
|
||||
|
||||
int bottomCount = 0;
|
||||
|
||||
int dimCount = 0;
|
||||
int childCount = 0;
|
||||
|
||||
|
@ -178,20 +228,9 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
|
||||
if (q == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
|
||||
}
|
||||
|
||||
if (dimConfig.hierarchical == false) {
|
||||
if (dimConfig.hierarchical == true) {
|
||||
dimCount = counts[pathOrd];
|
||||
} else {
|
||||
// see if dimCount is actually reliable or needs to be reset
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
|
@ -200,10 +239,47 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
dimCount = -1; // dimCount is in accurate at this point, so set it to -1
|
||||
}
|
||||
}
|
||||
return new FacetResult(dim, emptyPath, dimCount, labelValues, childCount);
|
||||
} else {
|
||||
return new FacetResult(dim, path, counts[pathOrd], labelValues, childCount);
|
||||
}
|
||||
|
||||
return new ChildOrdsResult(dimCount, childCount, q);
|
||||
}
|
||||
|
||||
/** Returns label values for dims. */
|
||||
private LabelAndValue[] getLabelValuesFromTopOrdAndIntQueue(TopOrdAndIntQueue q)
|
||||
throws IOException {
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for (int i = labelValues.length - 1; i >= 0; i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
assert ordAndValue != null;
|
||||
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
|
||||
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
|
||||
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
|
||||
}
|
||||
return labelValues;
|
||||
}
|
||||
|
||||
/** Returns value/count of a dimension. */
|
||||
private int getDimValue(
|
||||
FacetsConfig.DimConfig dimConfig,
|
||||
String dim,
|
||||
int dimOrd,
|
||||
PrimitiveIterator.OfInt childOrds,
|
||||
int topN,
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult) {
|
||||
|
||||
// if dimConfig.hierarchical == true || dim is multiValued and dim count has been aggregated at
|
||||
// indexing time, return dimCount directly
|
||||
if (dimConfig.hierarchical == true || (dimConfig.multiValued && dimConfig.requireDimCount)) {
|
||||
return counts[dimOrd];
|
||||
}
|
||||
|
||||
// if dimCount was not aggregated at indexing time, iterate over childOrds to get dimCount
|
||||
ChildOrdsResult childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, dimOrd);
|
||||
|
||||
// if no early termination, store dim and childOrdsResult into a hashmap to avoid calling
|
||||
// getChildOrdsResult again in getPathResult
|
||||
dimToChildOrdsResult.put(dim, childOrdsResult);
|
||||
return childOrdsResult.dimCount;
|
||||
}
|
||||
|
||||
private void countOneSegment(
|
||||
|
@ -366,33 +442,53 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
return counts[ord];
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloaded method to allow getFacetResultForDim be called without passing in the
|
||||
* dimToChildOrdsResult parameter
|
||||
*/
|
||||
private FacetResult getFacetResultForDim(String dim, int topNChildren) throws IOException {
|
||||
return getFacetResultForDim(dim, topNChildren, null);
|
||||
}
|
||||
|
||||
/** Returns FacetResult for a dimension. */
|
||||
private FacetResult getFacetResultForDim(
|
||||
String dim, int topNChildren, ChildOrdsResult dimToChildOrdsResult) throws IOException {
|
||||
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
|
||||
if (dimConfig.hierarchical) {
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
int dimOrd = dimTree.dimStartOrd;
|
||||
return getPathResult(
|
||||
dimConfig,
|
||||
dim,
|
||||
emptyPath,
|
||||
dimOrd,
|
||||
dimTree.iterator(),
|
||||
topNChildren,
|
||||
dimToChildOrdsResult);
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
return getPathResult(
|
||||
dimConfig, dim, emptyPath, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getAllDims(int topN) throws IOException {
|
||||
|
||||
List<FacetResult> results = new ArrayList<>();
|
||||
for (String dim : state.getDims()) {
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
if (dimConfig.hierarchical) {
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
int dimOrd = dimTree.dimStartOrd;
|
||||
FacetResult fr = getPathResult(dimConfig, dim, emptyPath, dimOrd, dimTree.iterator(), topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
}
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
FacetResult fr = getPathResult(dimConfig, dim, emptyPath, dimOrd, childIt, topN);
|
||||
if (fr != null) {
|
||||
results.add(fr);
|
||||
}
|
||||
FacetResult factResult = getFacetResultForDim(dim, topN);
|
||||
if (factResult != null) {
|
||||
results.add(factResult);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -411,7 +507,114 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
}
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
|
||||
if (topNDims <= 0 || topNChildren <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0");
|
||||
}
|
||||
|
||||
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
|
||||
// string values.
|
||||
PriorityQueue<DimValueResult> pq =
|
||||
new PriorityQueue<>(topNDims) {
|
||||
@Override
|
||||
protected boolean lessThan(DimValueResult a, DimValueResult b) {
|
||||
if (a.value > b.value) {
|
||||
return false;
|
||||
} else if (a.value < b.value) {
|
||||
return true;
|
||||
} else {
|
||||
return a.dim.compareTo(b.dim) > 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
|
||||
int dimCount;
|
||||
|
||||
for (String dim : state.getDims()) {
|
||||
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
|
||||
if (dimConfig.hierarchical) {
|
||||
DimTree dimTree = state.getDimTree(dim);
|
||||
int dimOrd = dimTree.dimStartOrd;
|
||||
// get dim value
|
||||
dimCount =
|
||||
getDimValue(
|
||||
dimConfig, dim, dimOrd, dimTree.iterator(), topNChildren, dimToChildOrdsResult);
|
||||
} else {
|
||||
OrdRange ordRange = state.getOrdRange(dim);
|
||||
int dimOrd = ordRange.start;
|
||||
PrimitiveIterator.OfInt childIt = ordRange.iterator();
|
||||
if (dimConfig.multiValued && dimConfig.requireDimCount) {
|
||||
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
|
||||
// the dimension and we need to skip past it so the iterator is positioned on the first
|
||||
// child:
|
||||
childIt.next();
|
||||
}
|
||||
dimCount = getDimValue(dimConfig, dim, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
|
||||
}
|
||||
|
||||
if (dimCount != 0) {
|
||||
// use priority queue to store DimValueResult for topNDims
|
||||
if (pq.size() < topNDims) {
|
||||
pq.add(new DimValueResult(dim, dimCount));
|
||||
} else {
|
||||
if (dimCount > pq.top().value
|
||||
|| (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
|
||||
DimValueResult bottomDim = pq.top();
|
||||
bottomDim.dim = dim;
|
||||
bottomDim.value = dimCount;
|
||||
pq.updateTop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get FacetResult for topNDims
|
||||
int resultSize = pq.size();
|
||||
FacetResult[] results = new FacetResult[resultSize];
|
||||
|
||||
while (pq.size() > 0) {
|
||||
DimValueResult dimValueResult = pq.pop();
|
||||
FacetResult facetResult =
|
||||
getFacetResultForDim(
|
||||
dimValueResult.dim, topNChildren, dimToChildOrdsResult.get(dimValueResult.dim));
|
||||
resultSize--;
|
||||
results[resultSize] = facetResult;
|
||||
}
|
||||
return Arrays.asList(results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates ChildOrdsResult to store dimCount, childCount, and the queue for the dimension's top
|
||||
* children
|
||||
*/
|
||||
private static class ChildOrdsResult {
|
||||
final int dimCount;
|
||||
final int childCount;
|
||||
final TopOrdAndIntQueue q;
|
||||
|
||||
ChildOrdsResult(int dimCount, int childCount, TopOrdAndIntQueue q) {
|
||||
this.dimCount = dimCount;
|
||||
this.childCount = childCount;
|
||||
this.q = q;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates DimValueResult to store the label and value of dim in order to sort by these two
|
||||
* fields.
|
||||
*/
|
||||
private static class DimValueResult {
|
||||
String dim;
|
||||
int value;
|
||||
|
||||
DimValueResult(String dim, int value) {
|
||||
this.dim = dim;
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -358,6 +358,19 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
"dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n",
|
||||
allResults.get(1).toString());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topNDimsResult = r.facets.getTopDims(2, 1);
|
||||
assertEquals(2, topNDimsResult.size());
|
||||
assertEquals(
|
||||
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n", topNDimsResult.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n",
|
||||
topNDimsResult.get(1).toString());
|
||||
|
||||
// test getTopDims(0, 1)
|
||||
List<FacetResult> topDimsResults2 = r.facets.getTopDims(0, 1);
|
||||
assertEquals(0, topDimsResults2.size());
|
||||
|
||||
// More interesting case: drill-down on two fields
|
||||
ddq = new DrillDownQuery(config);
|
||||
ddq.add("Author", "Lisa");
|
||||
|
@ -581,6 +594,17 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
"dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n",
|
||||
allResults.get(1).toString());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topNDimsResult = r.facets.getTopDims(1, 2);
|
||||
assertEquals(1, topNDimsResult.size());
|
||||
assertEquals(
|
||||
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
|
||||
topNDimsResult.get(0).toString());
|
||||
|
||||
// test getTopDims(10, 10) and expect same results from getAllDims(10)
|
||||
List<FacetResult> allDimsResults = r.facets.getTopDims(10, 10);
|
||||
assertEquals(allResults, allDimsResults);
|
||||
|
||||
// More interesting case: drill-down on two fields
|
||||
ddq = new DrillDownQuery(config);
|
||||
ddq.add("Author", "Lisa");
|
||||
|
@ -1843,6 +1867,13 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n",
|
||||
allResults.get(0).toString());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(1, 2);
|
||||
assertEquals(1, topNDimsResult.size());
|
||||
assertEquals(
|
||||
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
|
||||
topNDimsResult.get(0).toString());
|
||||
|
||||
// More interesting case: drill-down on two fields
|
||||
ddq = new DrillDownQuery(config);
|
||||
ddq.add("Author", "Lisa");
|
||||
|
|
|
@ -155,6 +155,21 @@ public class TestLongValueFacetCounts extends LuceneTestCase {
|
|||
"dim=field path=[] value=101 childCount=6\n 0 (20)\n 1 (20)\n 2 (20)\n "
|
||||
+ "3 (20)\n 4 (20)\n 9223372036854775807 (1)\n",
|
||||
result.get(0).toString());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> getTopDimResult = facets.getTopDims(1, 1);
|
||||
assertEquals(1, getTopDimResult.size());
|
||||
assertEquals(
|
||||
"dim=field path=[] value=101 childCount=6\n 0 (20)\n", getTopDimResult.get(0).toString());
|
||||
|
||||
// test getTopDims(10, 10) and expect same results from getAllDims(10)
|
||||
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
|
||||
assertEquals(result, allDimsResults);
|
||||
|
||||
// test getTopDims(0, 1)
|
||||
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
|
||||
assertEquals(0, topDimsResults2.size());
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
|
|
@ -387,6 +387,11 @@ public class TestStringValueFacetCounts extends FacetTestCase {
|
|||
assertEquals(1, allDims.size());
|
||||
assertEquals(facetResult, allDims.get(0));
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(2, topN);
|
||||
assertEquals(1, topNDimsResult.size());
|
||||
assertEquals(facetResult, topNDimsResult.get(0));
|
||||
|
||||
// This is a little strange, but we request all labels at this point so that when we
|
||||
// secondarily sort by label value in order to compare to the expected results, we have
|
||||
// all the values. See LUCENE-9991:
|
||||
|
|
|
@ -243,6 +243,21 @@ public class TestRangeFacetCounts extends FacetTestCase {
|
|||
"dim=field path=[] value=22 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (1)\n",
|
||||
result.get(0).toString());
|
||||
|
||||
// test getAllDims(1)
|
||||
List<FacetResult> test1Child = facets.getAllDims(1);
|
||||
assertEquals(1, test1Child.size());
|
||||
assertEquals(
|
||||
"dim=field path=[] value=22 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (1)\n",
|
||||
test1Child.get(0).toString());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(1, 1);
|
||||
assertEquals(test1Child, topNDimsResult);
|
||||
|
||||
// test getTopDims(0, 1)
|
||||
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
|
||||
assertEquals(0, topDimsResults2.size());
|
||||
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
|
|
|
@ -104,6 +104,41 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
"dim=b path=[] value=2 childCount=2\n buzz (2)\n baz (1)\n",
|
||||
facets.getTopChildren(10, "b").toString());
|
||||
|
||||
// test getAllDims
|
||||
List<FacetResult> results = facets.getAllDims(10);
|
||||
assertEquals(2, results.size());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=2\n buzz (2)\n baz (1)\n",
|
||||
results.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=-1 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n",
|
||||
results.get(1).toString());
|
||||
|
||||
// test getTopDims(10, 10) and expect same results from getAllDims(10)
|
||||
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
|
||||
assertEquals(results, allDimsResults);
|
||||
|
||||
// test getTopDims(2, 1)
|
||||
List<FacetResult> topDimsResults = facets.getTopDims(2, 1);
|
||||
assertEquals(2, topDimsResults.size());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=2\n buzz (2)\n", topDimsResults.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=-1 childCount=3\n foo (2)\n", topDimsResults.get(1).toString());
|
||||
|
||||
// test getAllDims
|
||||
List<FacetResult> results2 = facets.getAllDims(1);
|
||||
assertEquals(2, results2.size());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=2\n buzz (2)\n", results2.get(0).toString());
|
||||
|
||||
// test getTopDims(1, 1)
|
||||
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 1);
|
||||
assertEquals(1, topDimsResults1.size());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=2\n buzz (2)\n",
|
||||
topDimsResults1.get(0).toString());
|
||||
|
||||
// DrillDown:
|
||||
DrillDownQuery q = new DrillDownQuery(config);
|
||||
q.add("a", "foo");
|
||||
|
@ -117,6 +152,149 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// test tricky combinations of the three config: MultiValued, Hierarchical, and RequireDimCount of
|
||||
// a dim
|
||||
public void testCombinationsOfConfig() throws Exception {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
|
||||
// case 1: dimension "a" is hierarchical and non-multiValued
|
||||
// expect returns counts[pathOrd]
|
||||
config.setMultiValued("a", false);
|
||||
config.setHierarchical("a", true);
|
||||
|
||||
// case 2: dimension "b" is hierarchical and multiValued and setRequireDimCount = true
|
||||
// expect returns counts[pathOrd]
|
||||
config.setMultiValued("b", true);
|
||||
config.setHierarchical("b", true);
|
||||
config.setRequireDimCount("b", true);
|
||||
|
||||
// case 3: dimension "c" is hierarchical and multiValued and setRequireDimCount != true
|
||||
// expect always returns counts[pathOrd] for Hierarchical = true
|
||||
config.setMultiValued("c", true);
|
||||
config.setHierarchical("c", true);
|
||||
|
||||
// case 4: dimension "d" is non-hierarchical but multiValued and setRequireDimCount = true
|
||||
// expect returns counts[pathOrd]
|
||||
config.setMultiValued("d", true);
|
||||
config.setHierarchical("d", false);
|
||||
config.setRequireDimCount("d", true);
|
||||
|
||||
// case 4: dimension "e" that is non-hierarchical and multiValued and setRequireDimCount = false
|
||||
// expect returns -1, this is the only case that we reset dimCount to -1
|
||||
config.setMultiValued("e", true);
|
||||
config.setHierarchical("e", false);
|
||||
config.setRequireDimCount("e", false);
|
||||
|
||||
// case 5: dimension "f" that it is non-hierarchical and non-multiValued and expect returns
|
||||
// counts[pathOrd]
|
||||
config.setMultiValued("f", false);
|
||||
config.setHierarchical("f", false);
|
||||
|
||||
// case 6: expect returns counts[pathOrd] for dims with setHierarchical = true
|
||||
config.setHierarchical("g", true);
|
||||
|
||||
// case 7: expect returns counts[pathOrd] for dims with setHierarchical = true
|
||||
config.setHierarchical("g-2", false);
|
||||
|
||||
// case 8: expect returns counts[pathOrd] for dims with setHierarchical = true
|
||||
config.setRequireDimCount("h", true);
|
||||
config.setMultiValued("h", true);
|
||||
|
||||
try (Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "bar"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "zoo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("d", "baz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("e", "buzz"));
|
||||
doc.add(new SortedSetDocValuesFacetField("f", "buzze"));
|
||||
doc.add(new SortedSetDocValuesFacetField("g", "buzzel"));
|
||||
doc.add(new SortedSetDocValuesFacetField("g-2", "buzzell"));
|
||||
doc.add(new SortedSetDocValuesFacetField("h", "buzzele"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
// NRT open
|
||||
try (IndexReader r = writer.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
// Per-top-reader state:
|
||||
SortedSetDocValuesReaderState state =
|
||||
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
|
||||
|
||||
ExecutorService exec = randomExecutorServiceOrNull();
|
||||
try {
|
||||
Facets facets = getAllFacets(searcher, state, exec);
|
||||
assertEquals(
|
||||
"dim=a path=[] value=1 childCount=1\n foo (1)\n",
|
||||
facets.getTopChildren(10, "a").toString());
|
||||
// value for dim b should be 1 since it's multivalued but _does_ require dim counts:
|
||||
assertEquals(
|
||||
"dim=b path=[] value=1 childCount=1\n bar (1)\n",
|
||||
facets.getTopChildren(10, "b").toString());
|
||||
assertEquals(
|
||||
"dim=c path=[] value=1 childCount=1\n zoo (1)\n",
|
||||
facets.getTopChildren(10, "c").toString());
|
||||
assertEquals(
|
||||
"dim=d path=[] value=1 childCount=1\n baz (1)\n",
|
||||
facets.getTopChildren(10, "d").toString());
|
||||
// value for dim e should be -1 since it's multivalued but doesn't require dim counts:
|
||||
assertEquals(
|
||||
"dim=e path=[] value=-1 childCount=1\n buzz (1)\n",
|
||||
facets.getTopChildren(10, "e").toString());
|
||||
assertEquals(
|
||||
"dim=f path=[] value=1 childCount=1\n buzze (1)\n",
|
||||
facets.getTopChildren(10, "f").toString());
|
||||
assertEquals(
|
||||
"dim=g path=[] value=1 childCount=1\n buzzel (1)\n",
|
||||
facets.getTopChildren(10, "g").toString());
|
||||
assertEquals(
|
||||
"dim=g-2 path=[] value=1 childCount=1\n buzzell (1)\n",
|
||||
facets.getTopChildren(10, "g-2").toString());
|
||||
assertEquals(
|
||||
"dim=h path=[] value=1 childCount=1\n buzzele (1)\n",
|
||||
facets.getTopChildren(10, "h").toString());
|
||||
|
||||
// test getAllDims
|
||||
List<FacetResult> results = facets.getAllDims(10);
|
||||
assertEquals(9, results.size());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=1 childCount=1\n foo (1)\n", results.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=1 childCount=1\n bar (1)\n", results.get(1).toString());
|
||||
assertEquals(
|
||||
"dim=c path=[] value=1 childCount=1\n zoo (1)\n", results.get(2).toString());
|
||||
assertEquals(
|
||||
"dim=d path=[] value=1 childCount=1\n baz (1)\n", results.get(3).toString());
|
||||
assertEquals(
|
||||
"dim=f path=[] value=1 childCount=1\n buzze (1)\n", results.get(4).toString());
|
||||
assertEquals(
|
||||
"dim=g path=[] value=1 childCount=1\n buzzel (1)\n", results.get(5).toString());
|
||||
assertEquals(
|
||||
"dim=g-2 path=[] value=1 childCount=1\n buzzell (1)\n", results.get(6).toString());
|
||||
assertEquals(
|
||||
"dim=h path=[] value=1 childCount=1\n buzzele (1)\n", results.get(7).toString());
|
||||
assertEquals(
|
||||
"dim=e path=[] value=-1 childCount=1\n buzz (1)\n", results.get(8).toString());
|
||||
|
||||
// test getTopDims(10, 10) and expect same results from getAllDims(10)
|
||||
List<FacetResult> allTopDimsResults = facets.getTopDims(10, 10);
|
||||
assertEquals(results, allTopDimsResults);
|
||||
|
||||
// test getTopDims(n, 10)
|
||||
if (allTopDimsResults.size() > 0) {
|
||||
for (int i = 1; i < results.size(); i++) {
|
||||
assertEquals(results.subList(0, i), facets.getTopDims(i, 10));
|
||||
}
|
||||
}
|
||||
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasicHierarchical() throws Exception {
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
config.setMultiValued("a", true);
|
||||
|
@ -702,6 +880,11 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
doc.add(new SortedSetDocValuesFacetField("a", "foo3"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "bar2"));
|
||||
doc.add(new SortedSetDocValuesFacetField("c", "baz1"));
|
||||
doc.add(new SortedSetDocValuesFacetField("d", "biz1"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("d", "biz2"));
|
||||
writer.addDocument(config.build(doc));
|
||||
|
||||
// NRT open
|
||||
|
@ -719,7 +902,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
// Ask for top 10 labels for any dims that have counts:
|
||||
List<FacetResult> results = facets.getAllDims(10);
|
||||
|
||||
assertEquals(3, results.size());
|
||||
assertEquals(4, results.size());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n",
|
||||
results.get(0).toString());
|
||||
|
@ -727,7 +910,42 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n",
|
||||
results.get(1).toString());
|
||||
assertEquals(
|
||||
"dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(2).toString());
|
||||
"dim=d path=[] value=2 childCount=2\n biz1 (1)\n biz2 (1)\n",
|
||||
results.get(2).toString());
|
||||
assertEquals(
|
||||
"dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(3).toString());
|
||||
|
||||
// test getAllDims with topN = 1, sort by dim names when values are equal
|
||||
List<FacetResult> top1results = facets.getAllDims(1);
|
||||
assertEquals(4, results.size());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n", top1results.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n", top1results.get(1).toString());
|
||||
assertEquals(
|
||||
"dim=d path=[] value=2 childCount=2\n biz1 (1)\n", top1results.get(2).toString());
|
||||
assertEquals(
|
||||
"dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(3).toString());
|
||||
|
||||
// test getTopDims(1, 1)
|
||||
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 1);
|
||||
assertEquals(1, topDimsResults1.size());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n",
|
||||
topDimsResults1.get(0).toString());
|
||||
|
||||
// test top 2 dims that have the same counts, expect to sort by dim names
|
||||
List<FacetResult> topDimsResults2 = facets.getTopDims(3, 2);
|
||||
assertEquals(3, topDimsResults2.size());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n",
|
||||
topDimsResults2.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n",
|
||||
topDimsResults2.get(1).toString());
|
||||
assertEquals(
|
||||
"dim=d path=[] value=2 childCount=2\n biz1 (1)\n biz2 (1)\n",
|
||||
topDimsResults2.get(2).toString());
|
||||
|
||||
Collection<Accountable> resources = state.getChildResources();
|
||||
assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
|
||||
|
@ -795,6 +1013,12 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
assertEquals(
|
||||
"dim=e path=[] value=1 childCount=1\n biz (1)\n", results.get(1).toString());
|
||||
|
||||
// test getTopDims(1, 1)
|
||||
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 1);
|
||||
assertEquals(1, topDimsResults1.size());
|
||||
assertEquals(
|
||||
"dim=d path=[] value=2 childCount=1\n foo (2)\n", results.get(0).toString());
|
||||
|
||||
Collection<Accountable> resources = state.getChildResources();
|
||||
assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
|
||||
if (searcher.getIndexReader().leaves().size() > 1) {
|
||||
|
@ -1012,6 +1236,12 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
// sortTies(actual);
|
||||
|
||||
assertEquals(expected, actual);
|
||||
|
||||
// test getTopDims(1, 10)
|
||||
if (actual.size() > 0) {
|
||||
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 10);
|
||||
assertEquals(actual.get(0), topDimsResults1.get(0));
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (exec != null) exec.shutdownNow();
|
||||
|
@ -1221,6 +1451,13 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
|
||||
assertEquals(expectedAllDims, actualAllDims);
|
||||
|
||||
// test getTopDims(n, 10)
|
||||
if (actualAllDims.size() > 0) {
|
||||
for (int i = 1; i < actualAllDims.size(); i++) {
|
||||
assertEquals(actualAllDims.subList(0, i), facets.getTopDims(i, 10));
|
||||
}
|
||||
}
|
||||
|
||||
// Dfs through top children
|
||||
for (FacetResult dimResult : actualAllDims) {
|
||||
if (config.getDimConfig(dimResult.dim).hierarchical) {
|
||||
|
|
|
@ -163,6 +163,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
|
||||
Document doc = new Document();
|
||||
doc.add(new FacetField("a", "foo1"));
|
||||
doc.add(new FacetField("b", "aar1"));
|
||||
writer.addDocument(config.build(taxoWriter, doc));
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -201,9 +202,41 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n",
|
||||
results.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results.get(1).toString());
|
||||
"dim=b path=[] value=3 childCount=3\n aar1 (1)\n bar1 (1)\n bar2 (1)\n",
|
||||
results.get(1).toString());
|
||||
assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(2).toString());
|
||||
|
||||
// test getAllDims with topN = 1, sort by dim names when values are equal
|
||||
List<FacetResult> top1results = facets.getAllDims(1);
|
||||
|
||||
assertEquals(3, results.size());
|
||||
assertEquals("dim=a path=[] value=3 childCount=3\n foo3 (1)\n", top1results.get(0).toString());
|
||||
assertEquals("dim=b path=[] value=3 childCount=3\n bar2 (1)\n", top1results.get(1).toString());
|
||||
assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(2).toString());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
|
||||
assertEquals(2, topNDimsResult.size());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=3 childCount=3\n foo3 (1)\n", topNDimsResult.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=3 childCount=3\n bar2 (1)\n", topNDimsResult.get(1).toString());
|
||||
|
||||
// test getTopDims(10, 10) and expect same results from getAllDims(10)
|
||||
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
|
||||
assertEquals(results, allDimsResults);
|
||||
|
||||
// test getTopDims(0, 1)
|
||||
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
|
||||
assertEquals(0, topDimsResults2.size());
|
||||
|
||||
// test getTopDims(1, 0) with topNChildren = 0
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
facets.getTopDims(1, 0);
|
||||
});
|
||||
|
||||
writer.close();
|
||||
IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
|
||||
}
|
||||
|
@ -590,10 +623,28 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
Facets facets =
|
||||
getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config);
|
||||
|
||||
for (FacetResult result : facets.getAllDims(10)) {
|
||||
List<FacetResult> allDimsResult = facets.getAllDims(10);
|
||||
for (FacetResult result : allDimsResult) {
|
||||
assertEquals(r.numDocs(), result.value.intValue());
|
||||
}
|
||||
|
||||
// test default implementation of getTopDims
|
||||
if (allDimsResult.size() > 0) {
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(1, 10);
|
||||
assertEquals(allDimsResult.get(0), topNDimsResult.get(0));
|
||||
}
|
||||
|
||||
// test getTopDims(0, 1)
|
||||
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
|
||||
assertEquals(0, topDimsResults2.size());
|
||||
|
||||
// test getTopDims(1, 0) with topNChildren = 0
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
facets.getTopDims(1, 0);
|
||||
});
|
||||
|
||||
iw.close();
|
||||
IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
|
||||
}
|
||||
|
@ -623,6 +674,12 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
assertEquals(
|
||||
"calling getFacetResults twice should return the .equals()=true result", res1, res2);
|
||||
|
||||
// test default implementation of getTopDims
|
||||
if (res1.size() > 0) {
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(1, 10);
|
||||
assertEquals(res1.get(0), topNDimsResult.get(0));
|
||||
}
|
||||
|
||||
iw.close();
|
||||
IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
|
||||
}
|
||||
|
@ -809,6 +866,12 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
|
|||
|
||||
assertEquals(expected, actual);
|
||||
|
||||
// test default implementation of getTopDims
|
||||
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(actual.size(), 10);
|
||||
sortTies(topNDimsResult);
|
||||
assertEquals(actual, topNDimsResult);
|
||||
|
||||
// Test facet labels for each matching test doc
|
||||
List<List<FacetLabel>> actualLabels = getAllTaxonomyFacetLabels(null, tr, fc);
|
||||
assertEquals(expectedLabels.size(), actualLabels.size());
|
||||
|
|
|
@ -195,6 +195,29 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
|
|||
assertEquals(
|
||||
"dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results.get(2).toString());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
|
||||
assertEquals(2, topNDimsResult.size());
|
||||
assertEquals(
|
||||
"dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n", topNDimsResult.get(0).toString());
|
||||
assertEquals(
|
||||
"dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n", topNDimsResult.get(1).toString());
|
||||
|
||||
// test getTopDims(10, 10) and expect same results from getAllDims(10)
|
||||
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
|
||||
assertEquals(results, allDimsResults);
|
||||
|
||||
// test getTopDims(0, 1)
|
||||
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
|
||||
assertEquals(0, topDimsResults2.size());
|
||||
|
||||
// test getTopDims(1, 0) with topNChildren = 0
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
facets.getTopDims(1, 0);
|
||||
});
|
||||
|
||||
IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
|
||||
}
|
||||
|
||||
|
@ -237,6 +260,10 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
|
|||
List<FacetResult> results = facets.getAllDims(10);
|
||||
assertTrue(results.isEmpty());
|
||||
|
||||
// test default implementation of getTopDims
|
||||
List<FacetResult> topDimsResults = facets.getTopDims(10, 10);
|
||||
assertTrue(topDimsResults.isEmpty());
|
||||
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
|
@ -510,6 +537,12 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
|
|||
|
||||
List<FacetResult> actual = facets.getAllDims(10);
|
||||
|
||||
// test default implementation of getTopDims
|
||||
if (actual.size() > 0) {
|
||||
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 10);
|
||||
assertEquals(actual.get(0), topDimsResults1.get(0));
|
||||
}
|
||||
|
||||
// Messy: fixup ties
|
||||
sortTies(actual);
|
||||
|
||||
|
|
Loading…
Reference in New Issue