From 3868bcc7b81d26d2e1fdb89d84cae9a1099c0fa4 Mon Sep 17 00:00:00 2001 From: Christos Soulios <1561376+csoulios@users.noreply.github.com> Date: Mon, 13 Jul 2020 19:36:33 +0300 Subject: [PATCH] [7.x] Histogram integration on Histogram field type (#59431) Backports #58930 to 7.x Implements histogram aggregation over histogram fields as requested in #53285. --- .../bucket/histogram-aggregation.asciidoc | 90 +++++++ .../mapping/types/histogram.asciidoc | 1 + .../histogram/HistogramAggregatorFactory.java | 1 - .../HistogramAggregatorSupplier.java | 4 +- .../bucket/histogram/InternalHistogram.java | 15 +- .../xpack/analytics/AnalyticsPlugin.java | 5 +- .../AnalyticsAggregatorFactory.java | 19 +- .../HistoBackedHistogramAggregator.java | 94 +++++++ .../metrics/HistoBackedAvgAggregator.java | 4 +- ...stoBackedHDRPercentileRanksAggregator.java | 4 +- .../HistoBackedHDRPercentilesAggregator.java | 4 +- .../metrics/HistoBackedSumAggregator.java | 4 +- ...ackedTDigestPercentileRanksAggregator.java | 4 +- ...stoBackedTDigestPercentilesAggregator.java | 4 +- .../HistoBackedHistogramAggregatorTests.java | 232 ++++++++++++++++++ .../test/analytics/histogram.yml | 20 ++ 16 files changed, 483 insertions(+), 22 deletions(-) rename server/src/main/java/org/elasticsearch/search/aggregations/{support => bucket/histogram}/HistogramAggregatorSupplier.java (94%) rename x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/{metrics => }/AnalyticsAggregatorFactory.java (78%) create mode 100644 x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java diff --git a/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc index d657a687ff2..5de8332f5a4 100644 --- a/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc @@ -286,3 +286,93 @@ POST /sales/_search?size=0 // TEST[setup:sales] <1> Documents without a value in the `quantity` field will fall into the same bucket as documents that have the value `0`. + +[[search-aggregations-bucket-histogram-aggregation-histogram-fields]] +==== Histogram fields + +Running a histogram aggregation over histogram fields computes the total number of counts for each interval. + +For example, executing a histogram aggregation against the following index that stores pre-aggregated histograms +with latency metrics (in milliseconds) for different networks: + +[source,console] +-------------------------------------------------- +PUT metrics_index/_doc/1 +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [1, 3, 8, 12, 15], + "counts" : [3, 7, 23, 12, 6] + } +} + +PUT metrics_index/_doc/2 +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [1, 6, 8, 12, 14], + "counts" : [8, 17, 8, 7, 6] + } +} + +POST /metrics_index/_search?size=0 +{ + "aggs" : { + "latency_buckets" : { + "histogram" : { + "field" : "latency_histo", + "interval" : 5 + } + } + } +} +-------------------------------------------------- + + +The `histogram` aggregation will sum the counts of each interval computed based on the `values` and +return the following output: + +[source,console-result] +-------------------------------------------------- +{ + ... + "aggregations": { + "prices" : { + "buckets": [ + { + "key": 0.0, + "doc_count": 18 + }, + { + "key": 5.0, + "doc_count": 48 + }, + { + "key": 10.0, + "doc_count": 25 + }, + { + "key": 15.0, + "doc_count": 6 + } + ] + } + } +} +-------------------------------------------------- +// TESTRESPONSE[skip:test not setup] + +[IMPORTANT] +======== +Histogram aggregation is a bucket aggregation, which partitions documents into buckets rather than calculating metrics over fields like +metrics aggregations do. Each bucket represents a collection of documents which sub-aggregations can run on. +On the other hand, a histogram field is a pre-aggregated field representing multiple values inside a single field: +buckets of numerical data and a count of items/documents for each bucket. This mismatch between the histogram aggregations expected input +(expecting raw documents) and the histogram field (that provides summary information) limits the outcome of the aggregation +to only the doc counts for each bucket. + + +**Consequently, when executing a histogram aggregation over a histogram field, no sub-aggregations are allowed.** +======== + +Also, when running histogram aggregation over histogram field the `missing` parameter is not supported. diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index b1c98834d12..4e4b923d5a7 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -41,6 +41,7 @@ following aggregations and queries: * <> aggregation * <> aggregation * <> aggregation +* <> aggregation * <> query [[mapping-types-histogram-building-histogram]] diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java index be22776023d..5de12ca93b8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java @@ -29,7 +29,6 @@ import org.elasticsearch.search.aggregations.CardinalityUpperBound; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.support.AggregatorSupplier; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; -import org.elasticsearch.search.aggregations.support.HistogramAggregatorSupplier; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/HistogramAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorSupplier.java similarity index 94% rename from server/src/main/java/org/elasticsearch/search/aggregations/support/HistogramAggregatorSupplier.java rename to server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorSupplier.java index c6450a2a7b2..d5ad8b01eff 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/HistogramAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorSupplier.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.search.aggregations.support; +package org.elasticsearch.search.aggregations.bucket.histogram; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java index 73aab00c253..b7bc60679ab 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java @@ -152,12 +152,12 @@ public final class InternalHistogram extends InternalMultiBucketAggregation buckets, BucketOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, - DocValueFormat formatter, boolean keyed, Map metadata) { + public InternalHistogram( + String name, + List buckets, + BucketOrder order, + long minDocCount, + EmptyBucketInfo emptyBucketInfo, + DocValueFormat formatter, + boolean keyed, + Map metadata) { super(name, metadata); this.buckets = buckets; this.order = order; diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java index 51629c0f53a..ea0e9282b3e 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java @@ -31,7 +31,7 @@ import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.watcher.ResourceWatcherService; import org.elasticsearch.xpack.analytics.action.TransportAnalyticsStatsAction; -import org.elasticsearch.xpack.analytics.aggregations.metrics.AnalyticsAggregatorFactory; +import org.elasticsearch.xpack.analytics.aggregations.AnalyticsAggregatorFactory; import org.elasticsearch.xpack.analytics.normalize.NormalizePipelineAggregationBuilder; import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder; import org.elasticsearch.xpack.analytics.boxplot.InternalBoxplot; @@ -157,7 +157,8 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi AnalyticsAggregatorFactory::registerPercentileRanksAggregator, AnalyticsAggregatorFactory::registerHistoBackedSumAggregator, AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator, - AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator + AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator, + AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator ); } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/AnalyticsAggregatorFactory.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/AnalyticsAggregatorFactory.java similarity index 78% rename from x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/AnalyticsAggregatorFactory.java rename to x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/AnalyticsAggregatorFactory.java index 24e25793c58..9ae2eb61098 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/AnalyticsAggregatorFactory.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/AnalyticsAggregatorFactory.java @@ -3,8 +3,10 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -package org.elasticsearch.xpack.analytics.aggregations.metrics; +package org.elasticsearch.xpack.analytics.aggregations; +import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregatorSupplier; import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.MetricAggregatorSupplier; import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; @@ -15,6 +17,14 @@ import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; import org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.ValueCountAggregationBuilder; import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; +import org.elasticsearch.xpack.analytics.aggregations.bucket.histogram.HistoBackedHistogramAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedAvgAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentileRanksAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentilesAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedSumAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentileRanksAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentilesAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedValueCountAggregator; import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType; public class AnalyticsAggregatorFactory { @@ -83,4 +93,11 @@ public class AnalyticsAggregatorFactory { (MetricAggregatorSupplier) HistoBackedAvgAggregator::new ); } + + public static void registerHistoBackedHistogramAggregator(ValuesSourceRegistry.Builder builder) { + builder.register(HistogramAggregationBuilder.NAME, + AnalyticsValuesSourceType.HISTOGRAM, + (HistogramAggregatorSupplier) HistoBackedHistogramAggregator::new + ); + } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java new file mode 100644 index 00000000000..05b052f7118 --- /dev/null +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java @@ -0,0 +1,94 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.analytics.aggregations.bucket.histogram; + +import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.BucketOrder; +import org.elasticsearch.search.aggregations.CardinalityUpperBound; +import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; +import org.elasticsearch.search.aggregations.bucket.histogram.AbstractHistogramAggregator; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource; + +import java.io.IOException; +import java.util.Map; + +public class HistoBackedHistogramAggregator extends AbstractHistogramAggregator { + + private final HistogramValuesSource.Histogram valuesSource; + + public HistoBackedHistogramAggregator( + String name, + AggregatorFactories factories, + double interval, + double offset, + BucketOrder order, + boolean keyed, + long minDocCount, + double minBound, + double maxBound, + ValuesSourceConfig valuesSourceConfig, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinalityUpperBound, + Map metadata) throws IOException { + super(name, factories, interval, offset, order, keyed, minDocCount, minBound, maxBound, + valuesSourceConfig.format(), context, parent, cardinalityUpperBound, metadata); + + // TODO: Stop using null here + this.valuesSource = valuesSourceConfig.hasValues() ? (HistogramValuesSource.Histogram) valuesSourceConfig.getValuesSource() : null; + + // Sub aggregations are not allowed when running histogram agg over histograms + if (subAggregators().length > 0) { + throw new IllegalArgumentException("Histogram aggregation on histogram fields does not support sub-aggregations"); + } + } + + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, + final LeafBucketCollector sub) throws IOException { + if (valuesSource == null) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } + + final HistogramValues values = valuesSource.getHistogramValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long owningBucketOrd) throws IOException { + if (values.advanceExact(doc)) { + final HistogramValue sketch = values.histogram(); + + double previousKey = Double.NEGATIVE_INFINITY; + while (sketch.next()) { + final double value = sketch.value(); + final int count = sketch.count(); + + double key = Math.floor((value - offset) / interval); + assert key >= previousKey; + long bucketOrd = bucketOrds.add(owningBucketOrd, Double.doubleToLongBits(key)); + if (bucketOrd < 0) { // already seen + bucketOrd = -1 - bucketOrd; + collectExistingBucket(sub, doc, bucketOrd); + } else { + collectBucket(sub, doc, bucketOrd); + } + // We have added the document already. We should increment doc_count by count - 1 + // so that we have added it count times. + incrementBucketDocCount(bucketOrd, count - 1); + previousKey = key; + } + } + } + }; + } +} diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedAvgAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedAvgAggregator.java index 424c17e4f91..ae3bfdb128f 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedAvgAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedAvgAggregator.java @@ -32,7 +32,7 @@ import java.util.Map; * Average aggregator operating over histogram datatypes {@link HistogramValuesSource} * The aggregation computes weighted average by taking counts into consideration for each value */ -class HistoBackedAvgAggregator extends NumericMetricsAggregator.SingleValue { +public class HistoBackedAvgAggregator extends NumericMetricsAggregator.SingleValue { private final HistogramValuesSource.Histogram valuesSource; @@ -41,7 +41,7 @@ class HistoBackedAvgAggregator extends NumericMetricsAggregator.SingleValue { DoubleArray compensations; DocValueFormat format; - HistoBackedAvgAggregator( + public HistoBackedAvgAggregator( String name, ValuesSourceConfig valuesSourceConfig, SearchContext context, diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentileRanksAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentileRanksAggregator.java index c88be4a3d27..1cf231e58d5 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentileRanksAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentileRanksAggregator.java @@ -17,9 +17,9 @@ import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.Map; -class HistoBackedHDRPercentileRanksAggregator extends AbstractHistoBackedHDRPercentilesAggregator { +public class HistoBackedHDRPercentileRanksAggregator extends AbstractHistoBackedHDRPercentilesAggregator { - HistoBackedHDRPercentileRanksAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, + public HistoBackedHDRPercentileRanksAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat format, Map metadata) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, format, metadata); diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentilesAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentilesAggregator.java index 736bb3169ba..d6dfd2014a6 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentilesAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedHDRPercentilesAggregator.java @@ -19,8 +19,8 @@ import java.util.Map; public class HistoBackedHDRPercentilesAggregator extends AbstractHistoBackedHDRPercentilesAggregator { - HistoBackedHDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, - int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, + public HistoBackedHDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, + double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, Map metadata) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, formatter, metadata); diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedSumAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedSumAggregator.java index 662c21a6bf2..f7796ac0237 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedSumAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedSumAggregator.java @@ -33,7 +33,7 @@ import java.util.Map; * The aggregator sums each histogram value multiplied by its count. * Eg for a histogram of response times, this is an approximate "total time spent". */ -class HistoBackedSumAggregator extends NumericMetricsAggregator.SingleValue { +public class HistoBackedSumAggregator extends NumericMetricsAggregator.SingleValue { private final HistogramValuesSource.Histogram valuesSource; private final DocValueFormat format; @@ -41,7 +41,7 @@ class HistoBackedSumAggregator extends NumericMetricsAggregator.SingleValue { private DoubleArray sums; private DoubleArray compensations; - HistoBackedSumAggregator( + public HistoBackedSumAggregator( String name, ValuesSourceConfig valuesSourceConfig, SearchContext context, diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentileRanksAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentileRanksAggregator.java index 5acba9d1ac6..7d5abc31ecf 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentileRanksAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentileRanksAggregator.java @@ -17,9 +17,9 @@ import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.Map; -class HistoBackedTDigestPercentileRanksAggregator extends AbstractHistoBackedTDigestPercentilesAggregator { +public class HistoBackedTDigestPercentileRanksAggregator extends AbstractHistoBackedTDigestPercentilesAggregator { - HistoBackedTDigestPercentileRanksAggregator(String name, + public HistoBackedTDigestPercentileRanksAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentilesAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentilesAggregator.java index d8ce85f4776..66001948661 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentilesAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedTDigestPercentilesAggregator.java @@ -17,9 +17,9 @@ import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.Map; -class HistoBackedTDigestPercentilesAggregator extends AbstractHistoBackedTDigestPercentilesAggregator { +public class HistoBackedTDigestPercentilesAggregator extends AbstractHistoBackedTDigestPercentilesAggregator { - HistoBackedTDigestPercentilesAggregator(String name, + public HistoBackedTDigestPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java new file mode 100644 index 00000000000..934dbbd1ef2 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java @@ -0,0 +1,232 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.analytics.aggregations.bucket.histogram; + +import com.tdunning.math.stats.Centroid; +import com.tdunning.math.stats.TDigest; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import static java.util.Collections.singleton; + +public class HistoBackedHistogramAggregatorTests extends AggregatorTestCase { + + private static final String FIELD_NAME = "field"; + + public void testHistograms() throws Exception { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24}))); + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20}))); + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30}))); + + HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg") + .field(FIELD_NAME) + .interval(5); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + InternalHistogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, defaultFieldType(FIELD_NAME)); + assertEquals(9, histogram.getBuckets().size()); + assertEquals(-10d, histogram.getBuckets().get(0).getKey()); + assertEquals(1, histogram.getBuckets().get(0).getDocCount()); + assertEquals(-5d, histogram.getBuckets().get(1).getKey()); + assertEquals(0, histogram.getBuckets().get(1).getDocCount()); + assertEquals(0d, histogram.getBuckets().get(2).getKey()); + assertEquals(3, histogram.getBuckets().get(2).getDocCount()); + assertEquals(5d, histogram.getBuckets().get(3).getKey()); + assertEquals(3, histogram.getBuckets().get(3).getDocCount()); + assertEquals(10d, histogram.getBuckets().get(4).getKey()); + assertEquals(4, histogram.getBuckets().get(4).getDocCount()); + assertEquals(15d, histogram.getBuckets().get(5).getKey()); + assertEquals(0, histogram.getBuckets().get(5).getDocCount()); + assertEquals(20d, histogram.getBuckets().get(6).getKey()); + assertEquals(2, histogram.getBuckets().get(6).getDocCount()); + assertEquals(25d, histogram.getBuckets().get(7).getKey()); + assertEquals(0, histogram.getBuckets().get(7).getDocCount()); + assertEquals(30d, histogram.getBuckets().get(8).getKey()); + assertEquals(1, histogram.getBuckets().get(8).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(histogram)); + } + } + } + + public void testMinDocCount() throws Exception { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {0, 1.2, 10, 12, 24}))); + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20}))); + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 10, 10, 30, 90}))); + + HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg") + .field(FIELD_NAME) + .interval(5) + .minDocCount(2); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + InternalHistogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, defaultFieldType(FIELD_NAME)); + assertEquals(4, histogram.getBuckets().size()); + assertEquals(0d, histogram.getBuckets().get(0).getKey()); + assertEquals(3, histogram.getBuckets().get(0).getDocCount()); + assertEquals(5d, histogram.getBuckets().get(1).getKey()); + assertEquals(3, histogram.getBuckets().get(1).getDocCount()); + assertEquals(10d, histogram.getBuckets().get(2).getKey()); + assertEquals(4, histogram.getBuckets().get(2).getDocCount()); + assertEquals(20d, histogram.getBuckets().get(3).getKey()); + assertEquals(2, histogram.getBuckets().get(3).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(histogram)); + } + } + } + + public void testRandomOffset() throws Exception { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + // Note, these values are carefully chosen to ensure that no matter what offset we pick, no two can end up in the same bucket + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3.2, 9.3}))); + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 }))); + + final double offset = randomDouble(); + final double interval = 5; + final double expectedOffset = offset % interval; + HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg") + .field(FIELD_NAME) + .interval(interval) + .offset(offset) + .minDocCount(1); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + InternalHistogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, defaultFieldType(FIELD_NAME)); + + assertEquals(3, histogram.getBuckets().size()); + assertEquals(-10 + expectedOffset, histogram.getBuckets().get(0).getKey()); + assertEquals(1, histogram.getBuckets().get(0).getDocCount()); + + assertEquals(expectedOffset, histogram.getBuckets().get(1).getKey()); + assertEquals(2, histogram.getBuckets().get(1).getDocCount()); + + assertEquals(5 + expectedOffset, histogram.getBuckets().get(2).getKey()); + assertEquals(1, histogram.getBuckets().get(2).getDocCount()); + + assertTrue(AggregationInspectionHelper.hasValue(histogram)); + } + } + } + + public void testExtendedBounds() throws Exception { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-4.5, 4.3}))); + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 }))); + + HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg") + .field(FIELD_NAME) + .interval(5) + .extendedBounds(-12, 13); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + InternalHistogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, defaultFieldType(FIELD_NAME)); + assertEquals(6, histogram.getBuckets().size()); + assertEquals(-15d, histogram.getBuckets().get(0).getKey()); + assertEquals(0, histogram.getBuckets().get(0).getDocCount()); + assertEquals(-10d, histogram.getBuckets().get(1).getKey()); + assertEquals(0, histogram.getBuckets().get(1).getDocCount()); + assertEquals(-5d, histogram.getBuckets().get(2).getKey()); + assertEquals(2, histogram.getBuckets().get(2).getDocCount()); + assertEquals(0d, histogram.getBuckets().get(3).getKey()); + assertEquals(2, histogram.getBuckets().get(3).getDocCount()); + assertEquals(5d, histogram.getBuckets().get(4).getKey()); + assertEquals(0, histogram.getBuckets().get(4).getDocCount()); + assertEquals(10d, histogram.getBuckets().get(5).getKey()); + assertEquals(0, histogram.getBuckets().get(5).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(histogram)); + } + } + } + + /** + * Test that sub-aggregations are not supported + */ + public void testSubAggs() throws Exception { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-4.5, 4.3}))); + w.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-5, 3.2 }))); + + HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg") + .field(FIELD_NAME) + .interval(5) + .extendedBounds(-12, 13) + .subAggregation(new TopHitsAggregationBuilder("top_hits")); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, defaultFieldType(FIELD_NAME)) + ); + + assertEquals("Histogram aggregation on histogram fields does not support sub-aggregations", e.getMessage()); + } + } + } + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + TDigest histogram = new TDigestState(100.0); //default + for (double value : values) { + histogram.add(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + histogram.compress(); + Collection centroids = histogram.centroids(); + Iterator iterator = centroids.iterator(); + while ( iterator.hasNext()) { + Centroid centroid = iterator.next(); + streamOutput.writeVInt(centroid.count()); + streamOutput.writeDouble(centroid.mean()); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + @Override + protected List getSearchPlugins() { + return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY)); + } + + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new HistogramAggregationBuilder("_name").field(fieldName); + } + + private MappedFieldType defaultFieldType(String fieldName) { + return new HistogramFieldMapper.HistogramFieldType(fieldName, true, Collections.emptyMap()); + } + +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml index d1fb34c8b81..168f66e498f 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml @@ -46,4 +46,24 @@ setup: - match: { aggregations.histo_avg.value: 0.3125} +--- +"Histogram over histograms": + - do: + search: + index: "test" + body: + size: 0 + aggs: + histo: + histogram: + field: "latency" + interval: 0.3 + + + - match: { hits.total.value: 2 } + - length: { aggregations.histo.buckets: 2 } + - match: { aggregations.histo.buckets.0.key: 0.0 } + - match: { aggregations.histo.buckets.0.doc_count: 20 } + - match: { aggregations.histo.buckets.1.key: 0.3 } + - match: { aggregations.histo.buckets.1.doc_count: 60 }