SOLR-6328: return missing count for facet.missing=true even if limit=0

* facet.missing is independent of facet.limit. So, even for limit=0,
  missing counts should be return if facet.missing=true
This commit is contained in:
Munendra S N 2019-08-19 20:46:04 +05:30
parent 6c94f659d2
commit 0654c2496d
9 changed files with 168 additions and 37 deletions

View File

@ -157,6 +157,8 @@ Bug Fixes
* SOLR-13701: Fixed JWTAuthPlugin to update metrics prior to continuing w/other filters or returning error (hossman)
* SOLR-6328: facet.missing=true should return missing count even when facet.limit=0 (hossman, Munendra S N)
Other Changes
----------------------

View File

@ -168,6 +168,10 @@ public class DocValuesFacets {
// IDEA: we could also maintain a count of "other"... everything that fell outside
// of the top 'N'
if (limit == 0) {
return finalize(res, searcher, schemaField, docs, missingCount, missing);
}
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;

View File

@ -238,6 +238,11 @@ final class NumericFacets {
}
}
final NamedList<Integer> result = new NamedList<>();
if (limit == 0) {
return finalize(result, missingCount, missing);
}
// 2. select top-k facet values
final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
final PriorityQueue<Entry> pq;
@ -275,7 +280,6 @@ final class NumericFacets {
// 4. build the NamedList
final ValueSource vs = ft.getValueSource(sf, null);
final NamedList<Integer> result = new NamedList<>();
// This stuff is complicated because if facet.mincount=0, the counts needs
// to be merged with terms from the terms dict
@ -399,10 +403,7 @@ final class NumericFacets {
}
}
if (missing) {
result.add(null, missingCount);
}
return result;
return finalize(result, missingCount, missing);
}
private static NamedList<Integer> getCountsMultiValued(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
@ -449,6 +450,11 @@ final class NumericFacets {
}
}
if (limit == 0) {
NamedList<Integer> result = new NamedList<>();
return finalize(result, missingCount, missing);
}
// 2. select top-k facet values
final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
final PriorityQueue<Entry> pq;
@ -498,6 +504,10 @@ final class NumericFacets {
// Once facet.mincount=0 is supported we'll need to add logic similar to the SingleValue case, but obtaining values
// with count 0 from DocValues
return finalize(result, missingCount, missing);
}
private static NamedList<Integer> finalize(NamedList<Integer> result, int missingCount, boolean missing) {
if (missing) {
result.add(null, missingCount);
}

View File

@ -174,6 +174,11 @@ class PerSegmentSingleValuedFaceting {
}
}
if (limit == 0) {
NamedList<Integer> res = new NamedList<>();
return finalize(res, missingCount, hasMissingCount);
}
FacetCollector collector;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
collector = new CountSortedFacetCollector(offset, limit, mincount);
@ -237,6 +242,11 @@ class PerSegmentSingleValuedFaceting {
res.setName(i, ft.indexedToReadable(res.getName(i)));
}
return finalize(res, missingCount, hasMissingCount);
}
private NamedList<Integer> finalize(NamedList<Integer> res, int missingCount, boolean hasMissingCount)
throws IOException {
if (missing) {
if (!hasMissingCount) {
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,fieldName);

View File

@ -441,14 +441,18 @@ public class SimpleFacets {
final int threads = parsed.threads;
int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
if (limit == 0) return new NamedList<>();
boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// when limit=0 and missing=false then return empty list
if (limit == 0 && !missing) return new NamedList<>();
if (mincount==null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
mincount = (zeros!=null && !zeros) ? 1 : 0;
// current default is to include zeros.
}
boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// default to sorting if there is a limit.
String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
@ -949,6 +953,11 @@ public class SimpleFacets {
* don't enum if we get our max from them
*/
final NamedList<Integer> res = new NamedList<>();
if (limit == 0) {
return finalize(res, searcher, docs, field, missing);
}
// Minimum term docFreq in order to use the filterCache for that term.
int minDfFilterCache = global.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);
@ -966,7 +975,6 @@ public class SimpleFacets {
boolean sortByCount = sort.equals("count") || sort.equals("true");
final int maxsize = limit>=0 ? offset+limit : Integer.MAX_VALUE-1;
final BoundedTreeSet<CountPair<BytesRef,Integer>> queue = sortByCount ? new BoundedTreeSet<CountPair<BytesRef,Integer>>(maxsize) : null;
final NamedList<Integer> res = new NamedList<>();
int min=mincount-1; // the smallest value in the top 'N' values
int off=offset;
@ -1108,6 +1116,11 @@ public class SimpleFacets {
}
}
return finalize(res, searcher, docs, field, missing);
}
private static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, DocSet docs,
String field, boolean missing) throws IOException {
if (missing) {
res.add(null, getFieldMissingCount(searcher,docs,field));
}

View File

@ -261,25 +261,27 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
}
// if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
if (params.getInt("facet.limit", 100)!=0) { // it bypasses all processing, and we can go to empty validation
if (!(params.getInt("facet.limit", 100) == 0 &&
!params.getBool("facet.missing", false))) {
// it bypasses all processing, and we can go to empty validation
if (exists && params.getInt("facet.mincount", 0)>1) {
assertQEx("no mincount on facet.exists",
rand.nextBoolean() ? "facet.exists":"facet.mincount",
req(params), ErrorCode.BAD_REQUEST);
continue;
}
// facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS
// facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS
final boolean notEnum = method != null && !method.equals("enum");
final boolean trieField = trieFields.matcher(ftype.fname).matches();
if ((notEnum || trieField) && exists) {
assertQEx("facet.exists only when enum or ommitted",
assertQEx("facet.exists only when enum or ommitted",
"facet.exists", req(params), ErrorCode.BAD_REQUEST);
continue;
}
if (exists && sf.getType().isPointField()) {
// PointFields don't yet support "enum" method or the "facet.exists" parameter
assertQEx("Expecting failure, since ",
"facet.exists=true is requested, but facet.method=enum can't be used with " + sf.getName(),
assertQEx("Expecting failure, since ",
"facet.exists=true is requested, but facet.method=enum can't be used with " + sf.getName(),
req(params), ErrorCode.BAD_REQUEST);
continue;
}
@ -370,10 +372,9 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
int fromIndex = offset > stratified.size() ? stratified.size() : offset;
stratified = stratified.subList(fromIndex,
end > stratified.size() ? stratified.size() : end);
if (params.getInt("facet.limit", 100)>0) { /// limit=0 omits even miss count
stratified.addAll(stratas.get(null));
}
stratified.addAll(stratas.get(null));
facetSortedByIndex.clear();
facetSortedByIndex.addAll(stratified);
});

View File

@ -91,25 +91,23 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas
}
// sanity check limit=0 w/ mincount=0 & missing=true
//
// SOLR-6328: doesn't work for single node, so can't work for distrib either (yet)
//
// PivotFacetField's init of needRefinementAtThisLevel as needing potential change
//
// rsp = query( "q", "*:*",
// "rows", "0",
// "facet","true",
// "f.company_t.facet.limit", "10",
// "facet.pivot","special_s,bogus_s,company_t",
// "facet.missing", "true",
// FacetParams.FACET_LIMIT, "0",
// FacetParams.FACET_PIVOT_MINCOUNT,"0");
// pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t");
// assertEquals(1, pivots.size()); // only the missing
// assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s
// assertEquals(pivots.toString(), 1, pivots.get(0).getPivot());
// assertPivot("bogus_s", null, docNumber, pivots.get(0).getPivot().get(0));
// // TODO: some asserts on company results
rsp = query( "q", "*:*",
"rows", "0",
"facet","true",
"f.company_t.facet.limit", "10",
"facet.pivot","special_s,bogus_s,company_t",
"facet.missing", "true",
FacetParams.FACET_LIMIT, "0",
FacetParams.FACET_PIVOT_MINCOUNT,"0");
pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t");
assertEquals(1, pivots.size()); // only the missing
assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s
assertEquals(pivots.toString(), 1, pivots.get(0).getPivot().size());
assertPivot("bogus_s", null, docNumber - 5 , pivots.get(0).getPivot().get(0)); // 5 docs w/special_s
PivotField bogus = pivots.get(0).getPivot().get(0);
assertEquals(bogus.toString(), 11, bogus.getPivot().size());
// last value would always be missing docs
assertPivot("company_t", null, 2, bogus.getPivot().get(10)); // 2 docs w/company_t
// basic check w/ default sort, limit, & mincount==0
rsp = query( "q", "*:*",

View File

@ -376,6 +376,18 @@ public class FacetPivotSmallTest extends SolrTestCaseJ4 {
facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", // wrong missing company count
facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots
);
SolrParams missingC = SolrParams.wrapDefaults(missingA,
params(FacetParams.FACET_LIMIT, "0", "facet.sort", "index"));
assertQ(req(missingC), facetPivotPrefix + "/arr[@name='pivot'][count(.) > 0]", // not enough values for pivot
facetPivotPrefix + "[1]/null[@name='value'][.='']", // not the missing place value
facetPivotPrefix + "[1]/int[@name='count'][.=2]", // wrong missing place count
facetPivotPrefix + "[1]/arr[@name='pivot'][count(.) > 0]", // not enough sub-pivots for missing place
facetPivotPrefix + "[1]/arr[@name='pivot']/lst[1]/null[@name='value'][.='']", // not the missing company value
facetPivotPrefix + "[1]/arr[@name='pivot']/lst[1]/int[@name='count'][.=1]", // wrong missing company count
facetPivotPrefix + "[1]/arr[@name='pivot']/lst[1][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots
);
}
public void testPivotFacetIndexSortMincountAndLimit() throws Exception {

View File

@ -597,6 +597,75 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
}
}
@Test
public void testFacetMissing() {
SolrParams commonParams = params("q", "foo_s:A", "rows", "0", "facet", "true", "facet.missing", "true");
// with facet.limit!=0 and facet.missing=true
assertQ(
req(commonParams, "facet.field", "trait_s", "facet.limit", "1"),
"//lst[@name='facet_counts']/lst[@name='facet_fields']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']",
"*[count(//lst[@name='trait_s']/int)=2]",
"//lst[@name='trait_s']/int[@name='Obnoxious'][.='2']",
"//lst[@name='trait_s']/int[.='1']"
);
// with facet.limit=0 and facet.missing=true
assertQ(
req(commonParams, "facet.field", "trait_s", "facet.limit", "0"),
"//lst[@name='facet_counts']/lst[@name='facet_fields']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']",
"*[count(//lst[@name='trait_s']/int)=1]",
"//lst[@name='trait_s']/int[.='1']"
);
// facet.method=enum
assertQ(
req(commonParams, "facet.field", "trait_s", "facet.limit", "0", "facet.method", "enum"),
"//lst[@name='facet_counts']/lst[@name='facet_fields']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']",
"*[count(//lst[@name='trait_s']/int)=1]",
"//lst[@name='trait_s']/int[.='1']"
);
assertQ(
req(commonParams, "facet.field", "trait_s", "facet.limit", "0", "facet.mincount", "1",
"facet.method", "uif"),
"//lst[@name='facet_counts']/lst[@name='facet_fields']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']",
"*[count(//lst[@name='trait_s']/int)=1]",
"//lst[@name='trait_s']/int[.='1']"
);
// facet.method=fcs
assertQ(
req(commonParams, "facet.field", "trait_s", "facet.limit", "0", "facet.method", "fcs"),
"//lst[@name='facet_counts']/lst[@name='facet_fields']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']",
"*[count(//lst[@name='trait_s']/int)=1]",
"//lst[@name='trait_s']/int[.='1']"
);
// facet.missing=true on numeric field
assertQ(
req(commonParams, "facet.field", "range_facet_f", "facet.limit", "1", "facet.mincount", "1"),
"//lst[@name='facet_counts']/lst[@name='facet_fields']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='range_facet_f']",
"*[count(//lst[@name='range_facet_f']/int)=2]",
"//lst[@name='range_facet_f']/int[.='0']"
);
// facet.limit=0
assertQ(
req(commonParams, "facet.field", "range_facet_f", "facet.limit", "0", "facet.mincount", "1"),
"//lst[@name='facet_counts']/lst[@name='facet_fields']",
"//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='range_facet_f']",
"*[count(//lst[@name='range_facet_f']/int)=1]",
"//lst[@name='range_facet_f']/int[.='0']"
);
}
@Test
public void testSimpleFacetCounts() {
@ -2274,11 +2343,12 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"group","true"
,"group.field",g
,"group.facet","true"
,"facet.missing","true"
);
assertQ("test facet.exclude for grouped facets",
groupReq
,"*[count(//lst[@name='facet_fields']/lst/int)=10]"
,"*[count(//lst[@name='facet_fields']/lst/int)=11]"
,pre+"/int[1][@name='CCC'][.='3']"
,pre+"/int[2][@name='CCC"+termSuffix+"'][.='3']"
,pre+"/int[3][@name='BBB'][.='2']"
@ -2289,6 +2359,17 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,pre+"/int[8][@name='BB"+termSuffix+"'][.='1']"
,pre+"/int[9][@name='CC'][.='1']"
,pre+"/int[10][@name='CC"+termSuffix+"'][.='1']"
,pre+"/int[11][.='1']"
);
ModifiableSolrParams modifiableSolrParams = new ModifiableSolrParams(groupReq.getParams());
modifiableSolrParams.set("facet.limit", "0");
groupReq.setParams(modifiableSolrParams);
assertQ("test facet.exclude for grouped facets with facet.limit=0, facet.missing=true",
groupReq
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
,pre+"/int[.='1']"
);
}