LUCENE-10325: Add getTopDims functionality to Facets (#747)

This commit is contained in:
Yuting Gan 2022-03-28 15:54:07 -07:00 committed by GitHub
parent 0f93130d7b
commit 7c33f04d37
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 660 additions and 47 deletions

View File

@ -48,4 +48,15 @@ public abstract class Facets {
* indexed, for example depending on the type of document. * indexed, for example depending on the type of document.
*/ */
public abstract List<FacetResult> getAllDims(int topN) throws IOException; public abstract List<FacetResult> getAllDims(int topN) throws IOException;
/**
* Returns labels for topN dimensions and their topNChildren sorted by the number of
* hits/aggregated values that dimension matched; Results should be the same as calling getAllDims
* and then only using the first topNDims; Sub-classes may want to override this implementation
* with a more efficient one if they are able.
*/
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
List<FacetResult> allResults = getAllDims(topNChildren);
return allResults.subList(0, Math.min(topNDims, allResults.size()));
}
} }

View File

@ -21,6 +21,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.PrimitiveIterator; import java.util.PrimitiveIterator;
import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.FacetResult;
@ -49,6 +50,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PriorityQueue;
/** /**
* Compute facets counts from previously indexed {@link SortedSetDocValuesFacetField}, without * Compute facets counts from previously indexed {@link SortedSetDocValuesFacetField}, without
@ -137,6 +139,10 @@ public class SortedSetDocValuesFacetCounts extends Facets {
} }
} }
/**
* Overloaded method to allow getPathResult be called without passing in the dimToChildOrdsResult
* parameter
*/
private FacetResult getPathResult( private FacetResult getPathResult(
FacetsConfig.DimConfig dimConfig, FacetsConfig.DimConfig dimConfig,
String dim, String dim,
@ -145,11 +151,55 @@ public class SortedSetDocValuesFacetCounts extends Facets {
PrimitiveIterator.OfInt childOrds, PrimitiveIterator.OfInt childOrds,
int topN) int topN)
throws IOException { throws IOException {
return getPathResult(dimConfig, dim, path, pathOrd, childOrds, topN, null);
}
/** Returns path results for a dimension */
private FacetResult getPathResult(
FacetsConfig.DimConfig dimConfig,
String dim,
String[] path,
int pathOrd,
PrimitiveIterator.OfInt childOrds,
int topN,
ChildOrdsResult dimToChildOrdsResult)
throws IOException {
ChildOrdsResult childOrdsResult;
// if getTopDims is called, get results from previously stored dimToChildOrdsResult, otherwise
// call getChildOrdsResult to get dimCount, childCount and the queue for the dimension's top
// children
if (dimToChildOrdsResult != null) {
childOrdsResult = dimToChildOrdsResult;
} else {
childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, pathOrd);
}
if (childOrdsResult.q == null) {
return null;
}
LabelAndValue[] labelValues = getLabelValuesFromTopOrdAndIntQueue(childOrdsResult.q);
if (dimConfig.hierarchical == true) {
return new FacetResult(
dim, path, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
} else {
return new FacetResult(
dim, emptyPath, childOrdsResult.dimCount, labelValues, childOrdsResult.childCount);
}
}
/**
* Returns SortedSetDocValuesChildOrdsResult that contains results of dimCount, childCount, and
* the queue for the dimension's top children to populate FacetResult in getPathResult.
*/
private ChildOrdsResult getChildOrdsResult(
PrimitiveIterator.OfInt childOrds, int topN, FacetsConfig.DimConfig dimConfig, int pathOrd) {
TopOrdAndIntQueue q = null; TopOrdAndIntQueue q = null;
int bottomCount = 0; int bottomCount = 0;
int dimCount = 0; int dimCount = 0;
int childCount = 0; int childCount = 0;
@ -178,20 +228,9 @@ public class SortedSetDocValuesFacetCounts extends Facets {
} }
} }
if (q == null) { if (dimConfig.hierarchical == true) {
return null; dimCount = counts[pathOrd];
} } else {
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
assert ordAndValue != null;
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
}
if (dimConfig.hierarchical == false) {
// see if dimCount is actually reliable or needs to be reset // see if dimCount is actually reliable or needs to be reset
if (dimConfig.multiValued) { if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) { if (dimConfig.requireDimCount) {
@ -200,10 +239,47 @@ public class SortedSetDocValuesFacetCounts extends Facets {
dimCount = -1; // dimCount is in accurate at this point, so set it to -1 dimCount = -1; // dimCount is in accurate at this point, so set it to -1
} }
} }
return new FacetResult(dim, emptyPath, dimCount, labelValues, childCount);
} else {
return new FacetResult(dim, path, counts[pathOrd], labelValues, childCount);
} }
return new ChildOrdsResult(dimCount, childCount, q);
}
/** Returns label values for dims. */
private LabelAndValue[] getLabelValuesFromTopOrdAndIntQueue(TopOrdAndIntQueue q)
throws IOException {
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
assert ordAndValue != null;
final BytesRef term = dv.lookupOrd(ordAndValue.ord);
String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
labelValues[i] = new LabelAndValue(parts[parts.length - 1], ordAndValue.value);
}
return labelValues;
}
/** Returns value/count of a dimension. */
private int getDimValue(
FacetsConfig.DimConfig dimConfig,
String dim,
int dimOrd,
PrimitiveIterator.OfInt childOrds,
int topN,
HashMap<String, ChildOrdsResult> dimToChildOrdsResult) {
// if dimConfig.hierarchical == true || dim is multiValued and dim count has been aggregated at
// indexing time, return dimCount directly
if (dimConfig.hierarchical == true || (dimConfig.multiValued && dimConfig.requireDimCount)) {
return counts[dimOrd];
}
// if dimCount was not aggregated at indexing time, iterate over childOrds to get dimCount
ChildOrdsResult childOrdsResult = getChildOrdsResult(childOrds, topN, dimConfig, dimOrd);
// if no early termination, store dim and childOrdsResult into a hashmap to avoid calling
// getChildOrdsResult again in getPathResult
dimToChildOrdsResult.put(dim, childOrdsResult);
return childOrdsResult.dimCount;
} }
private void countOneSegment( private void countOneSegment(
@ -366,19 +442,31 @@ public class SortedSetDocValuesFacetCounts extends Facets {
return counts[ord]; return counts[ord];
} }
@Override /**
public List<FacetResult> getAllDims(int topN) throws IOException { * Overloaded method to allow getFacetResultForDim be called without passing in the
* dimToChildOrdsResult parameter
*/
private FacetResult getFacetResultForDim(String dim, int topNChildren) throws IOException {
return getFacetResultForDim(dim, topNChildren, null);
}
/** Returns FacetResult for a dimension. */
private FacetResult getFacetResultForDim(
String dim, int topNChildren, ChildOrdsResult dimToChildOrdsResult) throws IOException {
List<FacetResult> results = new ArrayList<>();
for (String dim : state.getDims()) {
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim); FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
if (dimConfig.hierarchical) { if (dimConfig.hierarchical) {
DimTree dimTree = state.getDimTree(dim); DimTree dimTree = state.getDimTree(dim);
int dimOrd = dimTree.dimStartOrd; int dimOrd = dimTree.dimStartOrd;
FacetResult fr = getPathResult(dimConfig, dim, emptyPath, dimOrd, dimTree.iterator(), topN); return getPathResult(
if (fr != null) { dimConfig,
results.add(fr); dim,
} emptyPath,
dimOrd,
dimTree.iterator(),
topNChildren,
dimToChildOrdsResult);
} else { } else {
OrdRange ordRange = state.getOrdRange(dim); OrdRange ordRange = state.getOrdRange(dim);
int dimOrd = ordRange.start; int dimOrd = ordRange.start;
@ -389,11 +477,19 @@ public class SortedSetDocValuesFacetCounts extends Facets {
// child: // child:
childIt.next(); childIt.next();
} }
FacetResult fr = getPathResult(dimConfig, dim, emptyPath, dimOrd, childIt, topN); return getPathResult(
if (fr != null) { dimConfig, dim, emptyPath, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
results.add(fr);
} }
} }
@Override
public List<FacetResult> getAllDims(int topN) throws IOException {
List<FacetResult> results = new ArrayList<>();
for (String dim : state.getDims()) {
FacetResult factResult = getFacetResultForDim(dim, topN);
if (factResult != null) {
results.add(factResult);
}
} }
// Sort by highest count: // Sort by highest count:
@ -411,7 +507,114 @@ public class SortedSetDocValuesFacetCounts extends Facets {
} }
} }
}); });
return results; return results;
} }
@Override
public List<FacetResult> getTopDims(int topNDims, int topNChildren) throws IOException {
if (topNDims <= 0 || topNChildren <= 0) {
throw new IllegalArgumentException("topN must be > 0");
}
// Creates priority queue to store top dimensions and sort by their aggregated values/hits and
// string values.
PriorityQueue<DimValueResult> pq =
new PriorityQueue<>(topNDims) {
@Override
protected boolean lessThan(DimValueResult a, DimValueResult b) {
if (a.value > b.value) {
return false;
} else if (a.value < b.value) {
return true;
} else {
return a.dim.compareTo(b.dim) > 0;
}
}
};
HashMap<String, ChildOrdsResult> dimToChildOrdsResult = new HashMap<>();
int dimCount;
for (String dim : state.getDims()) {
FacetsConfig.DimConfig dimConfig = stateConfig.getDimConfig(dim);
if (dimConfig.hierarchical) {
DimTree dimTree = state.getDimTree(dim);
int dimOrd = dimTree.dimStartOrd;
// get dim value
dimCount =
getDimValue(
dimConfig, dim, dimOrd, dimTree.iterator(), topNChildren, dimToChildOrdsResult);
} else {
OrdRange ordRange = state.getOrdRange(dim);
int dimOrd = ordRange.start;
PrimitiveIterator.OfInt childIt = ordRange.iterator();
if (dimConfig.multiValued && dimConfig.requireDimCount) {
// If the dim is multi-valued and requires dim counts, we know we've explicitly indexed
// the dimension and we need to skip past it so the iterator is positioned on the first
// child:
childIt.next();
}
dimCount = getDimValue(dimConfig, dim, dimOrd, childIt, topNChildren, dimToChildOrdsResult);
}
if (dimCount != 0) {
// use priority queue to store DimValueResult for topNDims
if (pq.size() < topNDims) {
pq.add(new DimValueResult(dim, dimCount));
} else {
if (dimCount > pq.top().value
|| (dimCount == pq.top().value && dim.compareTo(pq.top().dim) < 0)) {
DimValueResult bottomDim = pq.top();
bottomDim.dim = dim;
bottomDim.value = dimCount;
pq.updateTop();
}
}
}
}
// get FacetResult for topNDims
int resultSize = pq.size();
FacetResult[] results = new FacetResult[resultSize];
while (pq.size() > 0) {
DimValueResult dimValueResult = pq.pop();
FacetResult facetResult =
getFacetResultForDim(
dimValueResult.dim, topNChildren, dimToChildOrdsResult.get(dimValueResult.dim));
resultSize--;
results[resultSize] = facetResult;
}
return Arrays.asList(results);
}
/**
* Creates ChildOrdsResult to store dimCount, childCount, and the queue for the dimension's top
* children
*/
private static class ChildOrdsResult {
final int dimCount;
final int childCount;
final TopOrdAndIntQueue q;
ChildOrdsResult(int dimCount, int childCount, TopOrdAndIntQueue q) {
this.dimCount = dimCount;
this.childCount = childCount;
this.q = q;
}
}
/**
* Creates DimValueResult to store the label and value of dim in order to sort by these two
* fields.
*/
private static class DimValueResult {
String dim;
int value;
DimValueResult(String dim, int value) {
this.dim = dim;
this.value = value;
}
}
} }

View File

@ -358,6 +358,19 @@ public class TestDrillSideways extends FacetTestCase {
"dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", "dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n",
allResults.get(1).toString()); allResults.get(1).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = r.facets.getTopDims(2, 1);
assertEquals(2, topNDimsResult.size());
assertEquals(
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n", topNDimsResult.get(0).toString());
assertEquals(
"dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n",
topNDimsResult.get(1).toString());
// test getTopDims(0, 1)
List<FacetResult> topDimsResults2 = r.facets.getTopDims(0, 1);
assertEquals(0, topDimsResults2.size());
// More interesting case: drill-down on two fields // More interesting case: drill-down on two fields
ddq = new DrillDownQuery(config); ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa"); ddq.add("Author", "Lisa");
@ -581,6 +594,17 @@ public class TestDrillSideways extends FacetTestCase {
"dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n", "dim=Publish Date path=[] value=3 childCount=2\n 2010 (2)\n 2012 (1)\n",
allResults.get(1).toString()); allResults.get(1).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = r.facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
topNDimsResult.get(0).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = r.facets.getTopDims(10, 10);
assertEquals(allResults, allDimsResults);
// More interesting case: drill-down on two fields // More interesting case: drill-down on two fields
ddq = new DrillDownQuery(config); ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa"); ddq.add("Author", "Lisa");
@ -1843,6 +1867,13 @@ public class TestDrillSideways extends FacetTestCase {
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n",
allResults.get(0).toString()); allResults.get(0).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
"dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
topNDimsResult.get(0).toString());
// More interesting case: drill-down on two fields // More interesting case: drill-down on two fields
ddq = new DrillDownQuery(config); ddq = new DrillDownQuery(config);
ddq.add("Author", "Lisa"); ddq.add("Author", "Lisa");

View File

@ -155,6 +155,21 @@ public class TestLongValueFacetCounts extends LuceneTestCase {
"dim=field path=[] value=101 childCount=6\n 0 (20)\n 1 (20)\n 2 (20)\n " "dim=field path=[] value=101 childCount=6\n 0 (20)\n 1 (20)\n 2 (20)\n "
+ "3 (20)\n 4 (20)\n 9223372036854775807 (1)\n", + "3 (20)\n 4 (20)\n 9223372036854775807 (1)\n",
result.get(0).toString()); result.get(0).toString());
// test default implementation of getTopDims
List<FacetResult> getTopDimResult = facets.getTopDims(1, 1);
assertEquals(1, getTopDimResult.size());
assertEquals(
"dim=field path=[] value=101 childCount=6\n 0 (20)\n", getTopDimResult.get(0).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
assertEquals(result, allDimsResults);
// test getTopDims(0, 1)
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
assertEquals(0, topDimsResults2.size());
r.close(); r.close();
d.close(); d.close();
} }

View File

@ -387,6 +387,11 @@ public class TestStringValueFacetCounts extends FacetTestCase {
assertEquals(1, allDims.size()); assertEquals(1, allDims.size());
assertEquals(facetResult, allDims.get(0)); assertEquals(facetResult, allDims.get(0));
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(2, topN);
assertEquals(1, topNDimsResult.size());
assertEquals(facetResult, topNDimsResult.get(0));
// This is a little strange, but we request all labels at this point so that when we // This is a little strange, but we request all labels at this point so that when we
// secondarily sort by label value in order to compare to the expected results, we have // secondarily sort by label value in order to compare to the expected results, we have
// all the values. See LUCENE-9991: // all the values. See LUCENE-9991:

View File

@ -243,6 +243,21 @@ public class TestRangeFacetCounts extends FacetTestCase {
"dim=field path=[] value=22 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (1)\n", "dim=field path=[] value=22 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (1)\n",
result.get(0).toString()); result.get(0).toString());
// test getAllDims(1)
List<FacetResult> test1Child = facets.getAllDims(1);
assertEquals(1, test1Child.size());
assertEquals(
"dim=field path=[] value=22 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (1)\n",
test1Child.get(0).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(1, 1);
assertEquals(test1Child, topNDimsResult);
// test getTopDims(0, 1)
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
assertEquals(0, topDimsResults2.size());
r.close(); r.close();
d.close(); d.close();
} }

View File

@ -104,6 +104,41 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
"dim=b path=[] value=2 childCount=2\n buzz (2)\n baz (1)\n", "dim=b path=[] value=2 childCount=2\n buzz (2)\n baz (1)\n",
facets.getTopChildren(10, "b").toString()); facets.getTopChildren(10, "b").toString());
// test getAllDims
List<FacetResult> results = facets.getAllDims(10);
assertEquals(2, results.size());
assertEquals(
"dim=b path=[] value=2 childCount=2\n buzz (2)\n baz (1)\n",
results.get(0).toString());
assertEquals(
"dim=a path=[] value=-1 childCount=3\n foo (2)\n bar (1)\n zoo (1)\n",
results.get(1).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
assertEquals(results, allDimsResults);
// test getTopDims(2, 1)
List<FacetResult> topDimsResults = facets.getTopDims(2, 1);
assertEquals(2, topDimsResults.size());
assertEquals(
"dim=b path=[] value=2 childCount=2\n buzz (2)\n", topDimsResults.get(0).toString());
assertEquals(
"dim=a path=[] value=-1 childCount=3\n foo (2)\n", topDimsResults.get(1).toString());
// test getAllDims
List<FacetResult> results2 = facets.getAllDims(1);
assertEquals(2, results2.size());
assertEquals(
"dim=b path=[] value=2 childCount=2\n buzz (2)\n", results2.get(0).toString());
// test getTopDims(1, 1)
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 1);
assertEquals(1, topDimsResults1.size());
assertEquals(
"dim=b path=[] value=2 childCount=2\n buzz (2)\n",
topDimsResults1.get(0).toString());
// DrillDown: // DrillDown:
DrillDownQuery q = new DrillDownQuery(config); DrillDownQuery q = new DrillDownQuery(config);
q.add("a", "foo"); q.add("a", "foo");
@ -117,6 +152,149 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
} }
} }
// test tricky combinations of the three config: MultiValued, Hierarchical, and RequireDimCount of
// a dim
public void testCombinationsOfConfig() throws Exception {
FacetsConfig config = new FacetsConfig();
// case 1: dimension "a" is hierarchical and non-multiValued
// expect returns counts[pathOrd]
config.setMultiValued("a", false);
config.setHierarchical("a", true);
// case 2: dimension "b" is hierarchical and multiValued and setRequireDimCount = true
// expect returns counts[pathOrd]
config.setMultiValued("b", true);
config.setHierarchical("b", true);
config.setRequireDimCount("b", true);
// case 3: dimension "c" is hierarchical and multiValued and setRequireDimCount != true
// expect always returns counts[pathOrd] for Hierarchical = true
config.setMultiValued("c", true);
config.setHierarchical("c", true);
// case 4: dimension "d" is non-hierarchical but multiValued and setRequireDimCount = true
// expect returns counts[pathOrd]
config.setMultiValued("d", true);
config.setHierarchical("d", false);
config.setRequireDimCount("d", true);
// case 4: dimension "e" that is non-hierarchical and multiValued and setRequireDimCount = false
// expect returns -1, this is the only case that we reset dimCount to -1
config.setMultiValued("e", true);
config.setHierarchical("e", false);
config.setRequireDimCount("e", false);
// case 5: dimension "f" that it is non-hierarchical and non-multiValued and expect returns
// counts[pathOrd]
config.setMultiValued("f", false);
config.setHierarchical("f", false);
// case 6: expect returns counts[pathOrd] for dims with setHierarchical = true
config.setHierarchical("g", true);
// case 7: expect returns counts[pathOrd] for dims with setHierarchical = true
config.setHierarchical("g-2", false);
// case 8: expect returns counts[pathOrd] for dims with setHierarchical = true
config.setRequireDimCount("h", true);
config.setMultiValued("h", true);
try (Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
Document doc = new Document();
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
doc.add(new SortedSetDocValuesFacetField("b", "bar"));
doc.add(new SortedSetDocValuesFacetField("c", "zoo"));
doc.add(new SortedSetDocValuesFacetField("d", "baz"));
doc.add(new SortedSetDocValuesFacetField("e", "buzz"));
doc.add(new SortedSetDocValuesFacetField("f", "buzze"));
doc.add(new SortedSetDocValuesFacetField("g", "buzzel"));
doc.add(new SortedSetDocValuesFacetField("g-2", "buzzell"));
doc.add(new SortedSetDocValuesFacetField("h", "buzzele"));
writer.addDocument(config.build(doc));
// NRT open
try (IndexReader r = writer.getReader()) {
IndexSearcher searcher = newSearcher(r);
// Per-top-reader state:
SortedSetDocValuesReaderState state =
new DefaultSortedSetDocValuesReaderState(searcher.getIndexReader(), config);
ExecutorService exec = randomExecutorServiceOrNull();
try {
Facets facets = getAllFacets(searcher, state, exec);
assertEquals(
"dim=a path=[] value=1 childCount=1\n foo (1)\n",
facets.getTopChildren(10, "a").toString());
// value for dim b should be 1 since it's multivalued but _does_ require dim counts:
assertEquals(
"dim=b path=[] value=1 childCount=1\n bar (1)\n",
facets.getTopChildren(10, "b").toString());
assertEquals(
"dim=c path=[] value=1 childCount=1\n zoo (1)\n",
facets.getTopChildren(10, "c").toString());
assertEquals(
"dim=d path=[] value=1 childCount=1\n baz (1)\n",
facets.getTopChildren(10, "d").toString());
// value for dim e should be -1 since it's multivalued but doesn't require dim counts:
assertEquals(
"dim=e path=[] value=-1 childCount=1\n buzz (1)\n",
facets.getTopChildren(10, "e").toString());
assertEquals(
"dim=f path=[] value=1 childCount=1\n buzze (1)\n",
facets.getTopChildren(10, "f").toString());
assertEquals(
"dim=g path=[] value=1 childCount=1\n buzzel (1)\n",
facets.getTopChildren(10, "g").toString());
assertEquals(
"dim=g-2 path=[] value=1 childCount=1\n buzzell (1)\n",
facets.getTopChildren(10, "g-2").toString());
assertEquals(
"dim=h path=[] value=1 childCount=1\n buzzele (1)\n",
facets.getTopChildren(10, "h").toString());
// test getAllDims
List<FacetResult> results = facets.getAllDims(10);
assertEquals(9, results.size());
assertEquals(
"dim=a path=[] value=1 childCount=1\n foo (1)\n", results.get(0).toString());
assertEquals(
"dim=b path=[] value=1 childCount=1\n bar (1)\n", results.get(1).toString());
assertEquals(
"dim=c path=[] value=1 childCount=1\n zoo (1)\n", results.get(2).toString());
assertEquals(
"dim=d path=[] value=1 childCount=1\n baz (1)\n", results.get(3).toString());
assertEquals(
"dim=f path=[] value=1 childCount=1\n buzze (1)\n", results.get(4).toString());
assertEquals(
"dim=g path=[] value=1 childCount=1\n buzzel (1)\n", results.get(5).toString());
assertEquals(
"dim=g-2 path=[] value=1 childCount=1\n buzzell (1)\n", results.get(6).toString());
assertEquals(
"dim=h path=[] value=1 childCount=1\n buzzele (1)\n", results.get(7).toString());
assertEquals(
"dim=e path=[] value=-1 childCount=1\n buzz (1)\n", results.get(8).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allTopDimsResults = facets.getTopDims(10, 10);
assertEquals(results, allTopDimsResults);
// test getTopDims(n, 10)
if (allTopDimsResults.size() > 0) {
for (int i = 1; i < results.size(); i++) {
assertEquals(results.subList(0, i), facets.getTopDims(i, 10));
}
}
} finally {
if (exec != null) exec.shutdownNow();
}
}
}
}
public void testBasicHierarchical() throws Exception { public void testBasicHierarchical() throws Exception {
FacetsConfig config = new FacetsConfig(); FacetsConfig config = new FacetsConfig();
config.setMultiValued("a", true); config.setMultiValued("a", true);
@ -702,6 +880,11 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
doc.add(new SortedSetDocValuesFacetField("a", "foo3")); doc.add(new SortedSetDocValuesFacetField("a", "foo3"));
doc.add(new SortedSetDocValuesFacetField("b", "bar2")); doc.add(new SortedSetDocValuesFacetField("b", "bar2"));
doc.add(new SortedSetDocValuesFacetField("c", "baz1")); doc.add(new SortedSetDocValuesFacetField("c", "baz1"));
doc.add(new SortedSetDocValuesFacetField("d", "biz1"));
writer.addDocument(config.build(doc));
doc = new Document();
doc.add(new SortedSetDocValuesFacetField("d", "biz2"));
writer.addDocument(config.build(doc)); writer.addDocument(config.build(doc));
// NRT open // NRT open
@ -719,7 +902,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
// Ask for top 10 labels for any dims that have counts: // Ask for top 10 labels for any dims that have counts:
List<FacetResult> results = facets.getAllDims(10); List<FacetResult> results = facets.getAllDims(10);
assertEquals(3, results.size()); assertEquals(4, results.size());
assertEquals( assertEquals(
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", "dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n",
results.get(0).toString()); results.get(0).toString());
@ -727,7 +910,42 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", "dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n",
results.get(1).toString()); results.get(1).toString());
assertEquals( assertEquals(
"dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(2).toString()); "dim=d path=[] value=2 childCount=2\n biz1 (1)\n biz2 (1)\n",
results.get(2).toString());
assertEquals(
"dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(3).toString());
// test getAllDims with topN = 1, sort by dim names when values are equal
List<FacetResult> top1results = facets.getAllDims(1);
assertEquals(4, results.size());
assertEquals(
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n", top1results.get(0).toString());
assertEquals(
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n", top1results.get(1).toString());
assertEquals(
"dim=d path=[] value=2 childCount=2\n biz1 (1)\n", top1results.get(2).toString());
assertEquals(
"dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(3).toString());
// test getTopDims(1, 1)
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 1);
assertEquals(1, topDimsResults1.size());
assertEquals(
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n",
topDimsResults1.get(0).toString());
// test top 2 dims that have the same counts, expect to sort by dim names
List<FacetResult> topDimsResults2 = facets.getTopDims(3, 2);
assertEquals(3, topDimsResults2.size());
assertEquals(
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n",
topDimsResults2.get(0).toString());
assertEquals(
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n",
topDimsResults2.get(1).toString());
assertEquals(
"dim=d path=[] value=2 childCount=2\n biz1 (1)\n biz2 (1)\n",
topDimsResults2.get(2).toString());
Collection<Accountable> resources = state.getChildResources(); Collection<Accountable> resources = state.getChildResources();
assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
@ -795,6 +1013,12 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
assertEquals( assertEquals(
"dim=e path=[] value=1 childCount=1\n biz (1)\n", results.get(1).toString()); "dim=e path=[] value=1 childCount=1\n biz (1)\n", results.get(1).toString());
// test getTopDims(1, 1)
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 1);
assertEquals(1, topDimsResults1.size());
assertEquals(
"dim=d path=[] value=2 childCount=1\n foo (2)\n", results.get(0).toString());
Collection<Accountable> resources = state.getChildResources(); Collection<Accountable> resources = state.getChildResources();
assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); assertTrue(state.toString().contains(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
if (searcher.getIndexReader().leaves().size() > 1) { if (searcher.getIndexReader().leaves().size() > 1) {
@ -1012,6 +1236,12 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
// sortTies(actual); // sortTies(actual);
assertEquals(expected, actual); assertEquals(expected, actual);
// test getTopDims(1, 10)
if (actual.size() > 0) {
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 10);
assertEquals(actual.get(0), topDimsResults1.get(0));
}
} }
} finally { } finally {
if (exec != null) exec.shutdownNow(); if (exec != null) exec.shutdownNow();
@ -1221,6 +1451,13 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
assertEquals(expectedAllDims, actualAllDims); assertEquals(expectedAllDims, actualAllDims);
// test getTopDims(n, 10)
if (actualAllDims.size() > 0) {
for (int i = 1; i < actualAllDims.size(); i++) {
assertEquals(actualAllDims.subList(0, i), facets.getTopDims(i, 10));
}
}
// Dfs through top children // Dfs through top children
for (FacetResult dimResult : actualAllDims) { for (FacetResult dimResult : actualAllDims) {
if (config.getDimConfig(dimResult.dim).hierarchical) { if (config.getDimConfig(dimResult.dim).hierarchical) {

View File

@ -163,6 +163,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
Document doc = new Document(); Document doc = new Document();
doc.add(new FacetField("a", "foo1")); doc.add(new FacetField("a", "foo1"));
doc.add(new FacetField("b", "aar1"));
writer.addDocument(config.build(taxoWriter, doc)); writer.addDocument(config.build(taxoWriter, doc));
if (random().nextBoolean()) { if (random().nextBoolean()) {
@ -201,9 +202,41 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
"dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n", "dim=a path=[] value=3 childCount=3\n foo1 (1)\n foo2 (1)\n foo3 (1)\n",
results.get(0).toString()); results.get(0).toString());
assertEquals( assertEquals(
"dim=b path=[] value=2 childCount=2\n bar1 (1)\n bar2 (1)\n", results.get(1).toString()); "dim=b path=[] value=3 childCount=3\n aar1 (1)\n bar1 (1)\n bar2 (1)\n",
results.get(1).toString());
assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(2).toString()); assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", results.get(2).toString());
// test getAllDims with topN = 1, sort by dim names when values are equal
List<FacetResult> top1results = facets.getAllDims(1);
assertEquals(3, results.size());
assertEquals("dim=a path=[] value=3 childCount=3\n foo3 (1)\n", top1results.get(0).toString());
assertEquals("dim=b path=[] value=3 childCount=3\n bar2 (1)\n", top1results.get(1).toString());
assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(2).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
assertEquals(2, topNDimsResult.size());
assertEquals(
"dim=a path=[] value=3 childCount=3\n foo3 (1)\n", topNDimsResult.get(0).toString());
assertEquals(
"dim=b path=[] value=3 childCount=3\n bar2 (1)\n", topNDimsResult.get(1).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
assertEquals(results, allDimsResults);
// test getTopDims(0, 1)
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
assertEquals(0, topDimsResults2.size());
// test getTopDims(1, 0) with topNChildren = 0
expectThrows(
IllegalArgumentException.class,
() -> {
facets.getTopDims(1, 0);
});
writer.close(); writer.close();
IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir); IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
} }
@ -590,10 +623,28 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
Facets facets = Facets facets =
getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config); getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config);
for (FacetResult result : facets.getAllDims(10)) { List<FacetResult> allDimsResult = facets.getAllDims(10);
for (FacetResult result : allDimsResult) {
assertEquals(r.numDocs(), result.value.intValue()); assertEquals(r.numDocs(), result.value.intValue());
} }
// test default implementation of getTopDims
if (allDimsResult.size() > 0) {
List<FacetResult> topNDimsResult = facets.getTopDims(1, 10);
assertEquals(allDimsResult.get(0), topNDimsResult.get(0));
}
// test getTopDims(0, 1)
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
assertEquals(0, topDimsResults2.size());
// test getTopDims(1, 0) with topNChildren = 0
expectThrows(
IllegalArgumentException.class,
() -> {
facets.getTopDims(1, 0);
});
iw.close(); iw.close();
IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir); IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
} }
@ -623,6 +674,12 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
assertEquals( assertEquals(
"calling getFacetResults twice should return the .equals()=true result", res1, res2); "calling getFacetResults twice should return the .equals()=true result", res1, res2);
// test default implementation of getTopDims
if (res1.size() > 0) {
List<FacetResult> topNDimsResult = facets.getTopDims(1, 10);
assertEquals(res1.get(0), topNDimsResult.get(0));
}
iw.close(); iw.close();
IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir); IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
} }
@ -809,6 +866,12 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
assertEquals(expected, actual); assertEquals(expected, actual);
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(actual.size(), 10);
sortTies(topNDimsResult);
assertEquals(actual, topNDimsResult);
// Test facet labels for each matching test doc // Test facet labels for each matching test doc
List<List<FacetLabel>> actualLabels = getAllTaxonomyFacetLabels(null, tr, fc); List<List<FacetLabel>> actualLabels = getAllTaxonomyFacetLabels(null, tr, fc);
assertEquals(expectedLabels.size(), actualLabels.size()); assertEquals(expectedLabels.size(), actualLabels.size());

View File

@ -195,6 +195,29 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
assertEquals( assertEquals(
"dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results.get(2).toString()); "dim=c path=[] value=30.0 childCount=1\n baz1 (30.0)\n", results.get(2).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
assertEquals(2, topNDimsResult.size());
assertEquals(
"dim=a path=[] value=60.0 childCount=3\n foo3 (30.0)\n", topNDimsResult.get(0).toString());
assertEquals(
"dim=b path=[] value=50.0 childCount=2\n bar2 (30.0)\n", topNDimsResult.get(1).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);
assertEquals(results, allDimsResults);
// test getTopDims(0, 1)
List<FacetResult> topDimsResults2 = facets.getTopDims(0, 1);
assertEquals(0, topDimsResults2.size());
// test getTopDims(1, 0) with topNChildren = 0
expectThrows(
IllegalArgumentException.class,
() -> {
facets.getTopDims(1, 0);
});
IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir); IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
} }
@ -237,6 +260,10 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
List<FacetResult> results = facets.getAllDims(10); List<FacetResult> results = facets.getAllDims(10);
assertTrue(results.isEmpty()); assertTrue(results.isEmpty());
// test default implementation of getTopDims
List<FacetResult> topDimsResults = facets.getTopDims(10, 10);
assertTrue(topDimsResults.isEmpty());
expectThrows( expectThrows(
IllegalArgumentException.class, IllegalArgumentException.class,
() -> { () -> {
@ -510,6 +537,12 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
List<FacetResult> actual = facets.getAllDims(10); List<FacetResult> actual = facets.getAllDims(10);
// test default implementation of getTopDims
if (actual.size() > 0) {
List<FacetResult> topDimsResults1 = facets.getTopDims(1, 10);
assertEquals(actual.get(0), topDimsResults1.get(0));
}
// Messy: fixup ties // Messy: fixup ties
sortTies(actual); sortTies(actual);