From bde99bad2ecbcce75b40be3265e74bb48f6bd7d7 Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Fri, 1 Jul 2016 17:10:23 +0200
Subject: [PATCH] Use a static default precision for the cardinality
 aggregation. #19215

Today the default precision for the cardinality aggregation depends on how many
parent bucket aggregations it had. The reasoning was that the more parent bucket
aggregations, the more buckets the cardinality had to be computed on. And this
number could be huge depending on what the parent aggregations actually are.

However now that we run terms aggregations in breadth-first mode by default when
there are sub aggregations, it is less likely that we have to run the cardinality
aggregation on kagilions of buckets. So we could use a static default, which will
be less confusing to users.
---
 .../CardinalityAggregatorFactory.java         | 30 ++++---------------
 .../metrics/cardinality-aggregation.asciidoc  |  4 +--
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java
index ff3e4959adf..dad9ba46e6d 100644
--- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java
+++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java
@@ -23,7 +23,6 @@ import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.aggregations.AggregatorFactory;
 import org.elasticsearch.search.aggregations.InternalAggregation.Type;
-import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator;
 import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
 import org.elasticsearch.search.aggregations.support.AggregationContext;
 import org.elasticsearch.search.aggregations.support.ValuesSource;
@@ -48,36 +47,19 @@ public class CardinalityAggregatorFactory extends ValuesSourceAggregatorFactory<
     @Override
     protected Aggregator createUnmapped(Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
             throws IOException {
-        return new CardinalityAggregator(name, null, precision(parent), context, parent, pipelineAggregators, metaData);
+        return new CardinalityAggregator(name, null, precision(), context, parent, pipelineAggregators, metaData);
     }
 
     @Override
     protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket,
             List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
-        return new CardinalityAggregator(name, valuesSource, precision(parent), context, parent, pipelineAggregators,
+        return new CardinalityAggregator(name, valuesSource, precision(), context, parent, pipelineAggregators,
                 metaData);
     }
 
-    private int precision(Aggregator parent) {
-        return precisionThreshold == null ? defaultPrecision(parent) : HyperLogLogPlusPlus.precisionFromThreshold(precisionThreshold);
-    }
-
-    /*
-     * If one of the parent aggregators is a MULTI_BUCKET one, we might want to lower the precision
-     * because otherwise it might be memory-intensive. On the other hand, for top-level aggregators
-     * we try to focus on accuracy.
-     */
-    private static int defaultPrecision(Aggregator parent) {
-        int precision = HyperLogLogPlusPlus.DEFAULT_PRECISION;
-        while (parent != null) {
-            if (parent instanceof SingleBucketAggregator == false) {
-                // if the parent creates buckets, we subtract 5 to the precision,
-                // which will effectively divide the memory usage of each counter by 32
-                precision -= 5;
-            }
-            parent = parent.parent();
-        }
-
-        return Math.max(precision, HyperLogLogPlusPlus.MIN_PRECISION);
+    private int precision() {
+        return precisionThreshold == null
+                ? HyperLogLogPlusPlus.DEFAULT_PRECISION
+                : HyperLogLogPlusPlus.precisionFromThreshold(precisionThreshold);
     }
 }
diff --git a/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc b/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc
index 9c4ee59cccf..fcb866cff26 100644
--- a/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc
+++ b/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc
@@ -45,9 +45,7 @@ experimental[The `precision_threshold` option is specific to the current interna
 defines a unique count below which counts are expected to be close to
 accurate. Above this value, counts might become a bit more fuzzy. The maximum
 supported value is 40000, thresholds above this number will have the same
-effect as a threshold of 40000.
-Default value depends on the number of parent aggregations that multiple
-create buckets (such as terms or histograms).
+effect as a threshold of 40000. The default values is +3000+.
 
 ==== Counts are approximate