From e4e990b993d6872f6345b7d064efb8ca22ee6556 Mon Sep 17 00:00:00 2001 From: Dennis Gove Date: Mon, 23 May 2016 15:13:37 -0400 Subject: [PATCH] SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode --- solr/CHANGES.txt | 3 +++ .../solr/handler/component/FacetComponent.java | 18 ++++++++++++++---- .../apache/solr/cloud/TestCloudPivotFacet.java | 9 +++++++++ .../apache/solr/common/params/FacetParams.java | 17 +++++++++++++++++ 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 78594f33257..f52dc9bdaf8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -142,6 +142,9 @@ New Features * SOLR-8323, SOLR-9113: Add CollectionStateWatcher API (Alan Woodward, Scott Blum) +* SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode. + (Keith Laban, Dennis Gove) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java index 396dc3cea16..26b2e597bab 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java @@ -563,9 +563,16 @@ public class FacetComponent extends SearchComponent { // set the initial limit higher to increase accuracy dff.initialLimit = doOverRequestMath(dff.initialLimit, dff.overrequestRatio, dff.overrequestCount); - dff.initialMincount = 0; // TODO: we could change this to 1, but would - // then need more refinement for small facet - // result sets? + + // If option FACET_DISTRIB_MCO is turned on then we will use 1 as the initial + // minCount (unless the user explicitly set it to something less than 1). If + // option FACET_DISTRIB_MCO is turned off then we will use 0 as the initial + // minCount regardless of what the user might have provided (prior to the + // addition of the FACET_DISTRIB_MCO option the default logic was to use 0). + // As described in issues SOLR-8559 and SOLR-8988 the use of 1 provides a + // significant performance boost. + dff.initialMincount = dff.mco ? Math.min(dff.minCount, 1) : 0; + } else { // if limit==-1, then no need to artificially lower mincount to 0 if // it's 1 @@ -1415,6 +1422,7 @@ public class FacetComponent extends SearchComponent { public int initialLimit; // how many terms requested in first phase public int initialMincount; // mincount param sent to each shard + public boolean mco; public double overrequestRatio; public int overrequestCount; public boolean needRefinements; @@ -1433,7 +1441,9 @@ public class FacetComponent extends SearchComponent { = params.getFieldDouble(field, FacetParams.FACET_OVERREQUEST_RATIO, 1.5); this.overrequestCount = params.getFieldInt(field, FacetParams.FACET_OVERREQUEST_COUNT, 10); - + + this.mco + = params.getFieldBool(field, FacetParams.FACET_DISTRIB_MCO, false); } void add(int shardNum, NamedList shardCounts, int numRequested) { diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java index a29e35764d4..460c5017054 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPivotFacet.java @@ -53,6 +53,7 @@ import static org.apache.solr.common.params.FacetParams.FACET_OVERREQUEST_RATIO; import static org.apache.solr.common.params.FacetParams.FACET_PIVOT; import static org.apache.solr.common.params.FacetParams.FACET_PIVOT_MINCOUNT; import static org.apache.solr.common.params.FacetParams.FACET_SORT; +import static org.apache.solr.common.params.FacetParams.FACET_DISTRIB_MCO; /** *

@@ -84,6 +85,8 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { // param used by test purely for tracing & validation private static String TRACE_MIN = "_test_min"; // param used by test purely for tracing & validation + private static String TRACE_DISTRIB_MIN = "_test_distrib_min"; + // param used by test purely for tracing & validation private static String TRACE_MISS = "_test_miss"; // param used by test purely for tracing & validation private static String TRACE_SORT = "_test_sort"; @@ -190,6 +193,12 @@ public class TestCloudPivotFacet extends AbstractFullDistribZkTestBase { // trace param for validation baseP.add(TRACE_MIN, min); } + + if (random().nextBoolean()) { + pivotP.add(FACET_DISTRIB_MCO, "true"); + // trace param for validation + baseP.add(TRACE_DISTRIB_MIN, "true"); + } if (random().nextBoolean()) { String missing = ""+random().nextBoolean(); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java index ee2e91b4667..e014c86fd29 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java @@ -122,6 +122,23 @@ public interface FacetParams { public static final String FACET_OVERREQUEST_COUNT = FACET_OVERREQUEST + ".count"; + public static final String FACET_DISTRIB = FACET + ".distrib"; + + /** + * If we are returning facet field counts, are sorting those facets by their count, and the minimum count to return is > 0, + * then allow the use of facet.mincount = 1 in cloud mode. To enable this use facet.distrib.mco=true. + * + * i.e. If the following three conditions are met in cloud mode: facet.sort=count, facet.limit > 0, facet.mincount > 0. + * Then use facet.mincount=1. + * + * Previously and by default facet.mincount will be explicitly set to 0 when in cloud mode for this condition. + * In SOLR-8599 and SOLR-8988, significant performance increase has been seen when enabling this optimization. + * + * Note: enabling this flag has no effect when the conditions above are not met. For those other cases the default behavior is sufficient. + */ + + public static final String FACET_DISTRIB_MCO = FACET_DISTRIB + ".mco"; + /** * Comma separated list of fields to pivot *