From 0654c2496d3d624f0ddfbe6e31e1755e157b2266 Mon Sep 17 00:00:00 2001 From: Munendra S N Date: Mon, 19 Aug 2019 20:46:04 +0530 Subject: [PATCH] SOLR-6328: return missing count for facet.missing=true even if limit=0 * facet.missing is independent of facet.limit. So, even for limit=0, missing counts should be return if facet.missing=true --- solr/CHANGES.txt | 2 + .../apache/solr/request/DocValuesFacets.java | 4 + .../apache/solr/request/NumericFacets.java | 20 +++-- .../PerSegmentSingleValuedFaceting.java | 10 +++ .../org/apache/solr/request/SimpleFacets.java | 19 ++++- .../org/apache/solr/TestRandomFaceting.java | 19 +++-- .../DistributedFacetPivotLargeTest.java | 36 ++++---- .../component/FacetPivotSmallTest.java | 12 +++ .../apache/solr/request/SimpleFacetsTest.java | 83 ++++++++++++++++++- 9 files changed, 168 insertions(+), 37 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a7492b62fe4..a17b9e725cf 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -157,6 +157,8 @@ Bug Fixes * SOLR-13701: Fixed JWTAuthPlugin to update metrics prior to continuing w/other filters or returning error (hossman) +* SOLR-6328: facet.missing=true should return missing count even when facet.limit=0 (hossman, Munendra S N) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java index d77c73de48d..7a002294a06 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java @@ -168,6 +168,10 @@ public class DocValuesFacets { // IDEA: we could also maintain a count of "other"... everything that fell outside // of the top 'N' + if (limit == 0) { + return finalize(res, searcher, schemaField, docs, missingCount, missing); + } + int off=offset; int lim=limit>=0 ? limit : Integer.MAX_VALUE; diff --git a/solr/core/src/java/org/apache/solr/request/NumericFacets.java b/solr/core/src/java/org/apache/solr/request/NumericFacets.java index 4ef9f26b08b..c185cb5a0e5 100644 --- a/solr/core/src/java/org/apache/solr/request/NumericFacets.java +++ b/solr/core/src/java/org/apache/solr/request/NumericFacets.java @@ -238,6 +238,11 @@ final class NumericFacets { } } + final NamedList result = new NamedList<>(); + if (limit == 0) { + return finalize(result, missingCount, missing); + } + // 2. select top-k facet values final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size); final PriorityQueue pq; @@ -275,7 +280,6 @@ final class NumericFacets { // 4. build the NamedList final ValueSource vs = ft.getValueSource(sf, null); - final NamedList result = new NamedList<>(); // This stuff is complicated because if facet.mincount=0, the counts needs // to be merged with terms from the terms dict @@ -399,10 +403,7 @@ final class NumericFacets { } } - if (missing) { - result.add(null, missingCount); - } - return result; + return finalize(result, missingCount, missing); } private static NamedList getCountsMultiValued(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException { @@ -449,6 +450,11 @@ final class NumericFacets { } } + if (limit == 0) { + NamedList result = new NamedList<>(); + return finalize(result, missingCount, missing); + } + // 2. select top-k facet values final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size); final PriorityQueue pq; @@ -498,6 +504,10 @@ final class NumericFacets { // Once facet.mincount=0 is supported we'll need to add logic similar to the SingleValue case, but obtaining values // with count 0 from DocValues + return finalize(result, missingCount, missing); + } + + private static NamedList finalize(NamedList result, int missingCount, boolean missing) { if (missing) { result.add(null, missingCount); } diff --git a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java index 48837e0c6ab..4ff769f96dd 100644 --- a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java +++ b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java @@ -174,6 +174,11 @@ class PerSegmentSingleValuedFaceting { } } + if (limit == 0) { + NamedList res = new NamedList<>(); + return finalize(res, missingCount, hasMissingCount); + } + FacetCollector collector; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { collector = new CountSortedFacetCollector(offset, limit, mincount); @@ -237,6 +242,11 @@ class PerSegmentSingleValuedFaceting { res.setName(i, ft.indexedToReadable(res.getName(i))); } + return finalize(res, missingCount, hasMissingCount); + } + + private NamedList finalize(NamedList res, int missingCount, boolean hasMissingCount) + throws IOException { if (missing) { if (!hasMissingCount) { missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,fieldName); diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java index 561df4f4c7d..b1acea03a85 100644 --- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java +++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java @@ -441,14 +441,18 @@ public class SimpleFacets { final int threads = parsed.threads; int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0); int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100); - if (limit == 0) return new NamedList<>(); + boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false); + + // when limit=0 and missing=false then return empty list + if (limit == 0 && !missing) return new NamedList<>(); + if (mincount==null) { Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS); // mincount = (zeros!=null && zeros) ? 0 : 1; mincount = (zeros!=null && !zeros) ? 1 : 0; // current default is to include zeros. } - boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false); + // default to sorting if there is a limit. String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX); String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX); @@ -949,6 +953,11 @@ public class SimpleFacets { * don't enum if we get our max from them */ + final NamedList res = new NamedList<>(); + if (limit == 0) { + return finalize(res, searcher, docs, field, missing); + } + // Minimum term docFreq in order to use the filterCache for that term. int minDfFilterCache = global.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0); @@ -966,7 +975,6 @@ public class SimpleFacets { boolean sortByCount = sort.equals("count") || sort.equals("true"); final int maxsize = limit>=0 ? offset+limit : Integer.MAX_VALUE-1; final BoundedTreeSet> queue = sortByCount ? new BoundedTreeSet>(maxsize) : null; - final NamedList res = new NamedList<>(); int min=mincount-1; // the smallest value in the top 'N' values int off=offset; @@ -1108,6 +1116,11 @@ public class SimpleFacets { } } + return finalize(res, searcher, docs, field, missing); + } + + private static NamedList finalize(NamedList res, SolrIndexSearcher searcher, DocSet docs, + String field, boolean missing) throws IOException { if (missing) { res.add(null, getFieldMissingCount(searcher,docs,field)); } diff --git a/solr/core/src/test/org/apache/solr/TestRandomFaceting.java b/solr/core/src/test/org/apache/solr/TestRandomFaceting.java index 443f79cb34c..d7beee06f88 100644 --- a/solr/core/src/test/org/apache/solr/TestRandomFaceting.java +++ b/solr/core/src/test/org/apache/solr/TestRandomFaceting.java @@ -261,25 +261,27 @@ public class TestRandomFaceting extends SolrTestCaseJ4 { } // if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails - if (params.getInt("facet.limit", 100)!=0) { // it bypasses all processing, and we can go to empty validation + if (!(params.getInt("facet.limit", 100) == 0 && + !params.getBool("facet.missing", false))) { + // it bypasses all processing, and we can go to empty validation if (exists && params.getInt("facet.mincount", 0)>1) { assertQEx("no mincount on facet.exists", rand.nextBoolean() ? "facet.exists":"facet.mincount", req(params), ErrorCode.BAD_REQUEST); continue; } - // facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS + // facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS final boolean notEnum = method != null && !method.equals("enum"); final boolean trieField = trieFields.matcher(ftype.fname).matches(); if ((notEnum || trieField) && exists) { - assertQEx("facet.exists only when enum or ommitted", + assertQEx("facet.exists only when enum or ommitted", "facet.exists", req(params), ErrorCode.BAD_REQUEST); continue; } if (exists && sf.getType().isPointField()) { // PointFields don't yet support "enum" method or the "facet.exists" parameter - assertQEx("Expecting failure, since ", - "facet.exists=true is requested, but facet.method=enum can't be used with " + sf.getName(), + assertQEx("Expecting failure, since ", + "facet.exists=true is requested, but facet.method=enum can't be used with " + sf.getName(), req(params), ErrorCode.BAD_REQUEST); continue; } @@ -370,10 +372,9 @@ public class TestRandomFaceting extends SolrTestCaseJ4 { int fromIndex = offset > stratified.size() ? stratified.size() : offset; stratified = stratified.subList(fromIndex, end > stratified.size() ? stratified.size() : end); - - if (params.getInt("facet.limit", 100)>0) { /// limit=0 omits even miss count - stratified.addAll(stratas.get(null)); - } + + stratified.addAll(stratas.get(null)); + facetSortedByIndex.clear(); facetSortedByIndex.addAll(stratified); }); diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java index cc311358b90..7938e23897e 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java @@ -91,25 +91,23 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas } // sanity check limit=0 w/ mincount=0 & missing=true - // - // SOLR-6328: doesn't work for single node, so can't work for distrib either (yet) - // - // PivotFacetField's init of needRefinementAtThisLevel as needing potential change - // - // rsp = query( "q", "*:*", - // "rows", "0", - // "facet","true", - // "f.company_t.facet.limit", "10", - // "facet.pivot","special_s,bogus_s,company_t", - // "facet.missing", "true", - // FacetParams.FACET_LIMIT, "0", - // FacetParams.FACET_PIVOT_MINCOUNT,"0"); - // pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t"); - // assertEquals(1, pivots.size()); // only the missing - // assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s - // assertEquals(pivots.toString(), 1, pivots.get(0).getPivot()); - // assertPivot("bogus_s", null, docNumber, pivots.get(0).getPivot().get(0)); - // // TODO: some asserts on company results + rsp = query( "q", "*:*", + "rows", "0", + "facet","true", + "f.company_t.facet.limit", "10", + "facet.pivot","special_s,bogus_s,company_t", + "facet.missing", "true", + FacetParams.FACET_LIMIT, "0", + FacetParams.FACET_PIVOT_MINCOUNT,"0"); + pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t"); + assertEquals(1, pivots.size()); // only the missing + assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s + assertEquals(pivots.toString(), 1, pivots.get(0).getPivot().size()); + assertPivot("bogus_s", null, docNumber - 5 , pivots.get(0).getPivot().get(0)); // 5 docs w/special_s + PivotField bogus = pivots.get(0).getPivot().get(0); + assertEquals(bogus.toString(), 11, bogus.getPivot().size()); + // last value would always be missing docs + assertPivot("company_t", null, 2, bogus.getPivot().get(10)); // 2 docs w/company_t // basic check w/ default sort, limit, & mincount==0 rsp = query( "q", "*:*", diff --git a/solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java b/solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java index 0f4ec0630be..18131839e61 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/FacetPivotSmallTest.java @@ -376,6 +376,18 @@ public class FacetPivotSmallTest extends SolrTestCaseJ4 { facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5]/int[@name='count'][.=1]", // wrong missing company count facetPivotPrefix + "[5]/arr[@name='pivot']/lst[5][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots ); + + SolrParams missingC = SolrParams.wrapDefaults(missingA, + params(FacetParams.FACET_LIMIT, "0", "facet.sort", "index")); + + assertQ(req(missingC), facetPivotPrefix + "/arr[@name='pivot'][count(.) > 0]", // not enough values for pivot + facetPivotPrefix + "[1]/null[@name='value'][.='']", // not the missing place value + facetPivotPrefix + "[1]/int[@name='count'][.=2]", // wrong missing place count + facetPivotPrefix + "[1]/arr[@name='pivot'][count(.) > 0]", // not enough sub-pivots for missing place + facetPivotPrefix + "[1]/arr[@name='pivot']/lst[1]/null[@name='value'][.='']", // not the missing company value + facetPivotPrefix + "[1]/arr[@name='pivot']/lst[1]/int[@name='count'][.=1]", // wrong missing company count + facetPivotPrefix + "[1]/arr[@name='pivot']/lst[1][not(arr[@name='pivot'])]" // company shouldn't have sub-pivots + ); } public void testPivotFacetIndexSortMincountAndLimit() throws Exception { diff --git a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java index 809c68d2e02..e17c821b75c 100644 --- a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java +++ b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java @@ -597,6 +597,75 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { } } + @Test + public void testFacetMissing() { + SolrParams commonParams = params("q", "foo_s:A", "rows", "0", "facet", "true", "facet.missing", "true"); + + // with facet.limit!=0 and facet.missing=true + assertQ( + req(commonParams, "facet.field", "trait_s", "facet.limit", "1"), + "//lst[@name='facet_counts']/lst[@name='facet_fields']", + "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']", + "*[count(//lst[@name='trait_s']/int)=2]", + "//lst[@name='trait_s']/int[@name='Obnoxious'][.='2']", + "//lst[@name='trait_s']/int[.='1']" + ); + + // with facet.limit=0 and facet.missing=true + assertQ( + req(commonParams, "facet.field", "trait_s", "facet.limit", "0"), + "//lst[@name='facet_counts']/lst[@name='facet_fields']", + "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']", + "*[count(//lst[@name='trait_s']/int)=1]", + "//lst[@name='trait_s']/int[.='1']" + ); + + // facet.method=enum + assertQ( + req(commonParams, "facet.field", "trait_s", "facet.limit", "0", "facet.method", "enum"), + "//lst[@name='facet_counts']/lst[@name='facet_fields']", + "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']", + "*[count(//lst[@name='trait_s']/int)=1]", + "//lst[@name='trait_s']/int[.='1']" + ); + + assertQ( + req(commonParams, "facet.field", "trait_s", "facet.limit", "0", "facet.mincount", "1", + "facet.method", "uif"), + "//lst[@name='facet_counts']/lst[@name='facet_fields']", + "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']", + "*[count(//lst[@name='trait_s']/int)=1]", + "//lst[@name='trait_s']/int[.='1']" + ); + + // facet.method=fcs + assertQ( + req(commonParams, "facet.field", "trait_s", "facet.limit", "0", "facet.method", "fcs"), + "//lst[@name='facet_counts']/lst[@name='facet_fields']", + "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='trait_s']", + "*[count(//lst[@name='trait_s']/int)=1]", + "//lst[@name='trait_s']/int[.='1']" + ); + + // facet.missing=true on numeric field + assertQ( + req(commonParams, "facet.field", "range_facet_f", "facet.limit", "1", "facet.mincount", "1"), + "//lst[@name='facet_counts']/lst[@name='facet_fields']", + "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='range_facet_f']", + "*[count(//lst[@name='range_facet_f']/int)=2]", + "//lst[@name='range_facet_f']/int[.='0']" + ); + + // facet.limit=0 + assertQ( + req(commonParams, "facet.field", "range_facet_f", "facet.limit", "0", "facet.mincount", "1"), + "//lst[@name='facet_counts']/lst[@name='facet_fields']", + "//lst[@name='facet_counts']/lst[@name='facet_fields']/lst[@name='range_facet_f']", + "*[count(//lst[@name='range_facet_f']/int)=1]", + "//lst[@name='range_facet_f']/int[.='0']" + ); + } + @Test public void testSimpleFacetCounts() { @@ -2274,11 +2343,12 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"group","true" ,"group.field",g ,"group.facet","true" + ,"facet.missing","true" ); assertQ("test facet.exclude for grouped facets", groupReq - ,"*[count(//lst[@name='facet_fields']/lst/int)=10]" + ,"*[count(//lst[@name='facet_fields']/lst/int)=11]" ,pre+"/int[1][@name='CCC'][.='3']" ,pre+"/int[2][@name='CCC"+termSuffix+"'][.='3']" ,pre+"/int[3][@name='BBB'][.='2']" @@ -2289,6 +2359,17 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,pre+"/int[8][@name='BB"+termSuffix+"'][.='1']" ,pre+"/int[9][@name='CC'][.='1']" ,pre+"/int[10][@name='CC"+termSuffix+"'][.='1']" + ,pre+"/int[11][.='1']" + ); + + ModifiableSolrParams modifiableSolrParams = new ModifiableSolrParams(groupReq.getParams()); + modifiableSolrParams.set("facet.limit", "0"); + groupReq.setParams(modifiableSolrParams); + + assertQ("test facet.exclude for grouped facets with facet.limit=0, facet.missing=true", + groupReq + ,"*[count(//lst[@name='facet_fields']/lst/int)=1]" + ,pre+"/int[.='1']" ); }