From ad79a2b6a1a3a406e6c232f508f56b338ca3b770 Mon Sep 17 00:00:00 2001 From: Christos Soulios <1561376+csoulios@users.noreply.github.com> Date: Mon, 21 Sep 2020 12:53:56 +0300 Subject: [PATCH] [7.x] Histogram field type support for min/max aggregations (#62689) Implement min/max aggregations for histogram fields. Backports #62532 --- .../metrics/max-aggregation.asciidoc | 51 ++++++ .../metrics/min-aggregation.asciidoc | 51 ++++++ .../mapping/types/histogram.asciidoc | 2 + .../xpack/analytics/AnalyticsPlugin.java | 18 +- .../AnalyticsAggregatorFactory.java | 13 ++ .../metrics/HistoBackedMaxAggregator.java | 117 +++++++++++++ .../metrics/HistoBackedMinAggregator.java | 119 ++++++++++++++ .../HistoBackedMaxAggregatorTests.java | 155 ++++++++++++++++++ .../HistoBackedMinAggregatorTests.java | 155 ++++++++++++++++++ .../test/analytics/histogram.yml | 8 + 10 files changed, 681 insertions(+), 8 deletions(-) create mode 100644 x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregator.java create mode 100644 x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregator.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregatorTests.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregatorTests.java diff --git a/docs/reference/aggregations/metrics/max-aggregation.asciidoc b/docs/reference/aggregations/metrics/max-aggregation.asciidoc index a3d228e5c92..a506fb2aa14 100644 --- a/docs/reference/aggregations/metrics/max-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/max-aggregation.asciidoc @@ -139,3 +139,54 @@ POST /sales/_search <1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. + +[[search-aggregations-metrics-max-aggregation-histogram-fields]] +==== Histogram fields + +When `max` is computed on <>, the result of the aggregation is the maximum +of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored. + +For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: + +[source,console] +-------------------------------------------------- +PUT metrics_index/_doc/1 +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> + "counts" : [3, 7, 23, 12, 6] <2> + } +} + +PUT metrics_index/_doc/2 +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> + "counts" : [8, 17, 8, 7, 6] <2> + } +} + +POST /metrics_index/_search?size=0 +{ + "aggs" : { + "min_latency" : { "min" : { "field" : "latency_histo" } } + } +} +-------------------------------------------------- + +The `max` aggregation will return the maximum value of all histogram fields: + +[source,console-result] +-------------------------------------------------- +{ + ... + "aggregations": { + "min_latency": { + "value": 0.5 + } + } +} +-------------------------------------------------- +// TESTRESPONSE[skip:test not setup] diff --git a/docs/reference/aggregations/metrics/min-aggregation.asciidoc b/docs/reference/aggregations/metrics/min-aggregation.asciidoc index 3bc0e08d59a..c388c4b62a4 100644 --- a/docs/reference/aggregations/metrics/min-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/min-aggregation.asciidoc @@ -140,3 +140,54 @@ POST /sales/_search <1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. + +[[search-aggregations-metrics-min-aggregation-histogram-fields]] +==== Histogram fields + +When `min` is computed on <>, the result of the aggregation is the minimum +of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored. + +For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks: + +[source,console] +-------------------------------------------------- +PUT metrics_index/_doc/1 +{ + "network.name" : "net-1", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> + "counts" : [3, 7, 23, 12, 6] <2> + } +} + +PUT metrics_index/_doc/2 +{ + "network.name" : "net-2", + "latency_histo" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> + "counts" : [8, 17, 8, 7, 6] <2> + } +} + +POST /metrics_index/_search?size=0 +{ + "aggs" : { + "min_latency" : { "min" : { "field" : "latency_histo" } } + } +} +-------------------------------------------------- + +The `min` aggregation will return the minimum value of all histogram fields: + +[source,console-result] +-------------------------------------------------- +{ + ... + "aggregations": { + "min_latency": { + "value": 0.1 + } + } +} +-------------------------------------------------- +// TESTRESPONSE[skip:test not setup] diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index 4aadf04480e..e47ea875e1a 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -35,6 +35,8 @@ binary <> and not indexed. Its size in bytes is at most Because the data is not indexed, you only can use `histogram` fields for the following aggregations and queries: +* <> aggregation +* <> aggregation * <> aggregation * <> aggregation * <> aggregation diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java index 2a1161249e6..da8a2944684 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java @@ -160,14 +160,16 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi @Override public List> getAggregationExtentions() { - return org.elasticsearch.common.collect.List.of( - AnalyticsAggregatorFactory::registerPercentilesAggregator, - AnalyticsAggregatorFactory::registerPercentileRanksAggregator, - AnalyticsAggregatorFactory::registerHistoBackedSumAggregator, - AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator, - AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator, - AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator - ); + return org.elasticsearch.common.collect.List.of( + AnalyticsAggregatorFactory::registerPercentilesAggregator, + AnalyticsAggregatorFactory::registerPercentileRanksAggregator, + AnalyticsAggregatorFactory::registerHistoBackedSumAggregator, + AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator, + AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator, + AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator, + AnalyticsAggregatorFactory::registerHistoBackedMinggregator, + AnalyticsAggregatorFactory::registerHistoBackedMaxggregator + ); } @Override diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/AnalyticsAggregatorFactory.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/AnalyticsAggregatorFactory.java index 9f7e8fc7fe0..e694fc7b782 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/AnalyticsAggregatorFactory.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/AnalyticsAggregatorFactory.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.analytics.aggregations; import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentilesConfig; @@ -18,6 +20,8 @@ import org.elasticsearch.xpack.analytics.aggregations.bucket.histogram.HistoBack import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedAvgAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentileRanksAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentilesAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMaxAggregator; +import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMinAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedSumAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentileRanksAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentilesAggregator; @@ -87,4 +91,13 @@ public class AnalyticsAggregatorFactory { HistoBackedHistogramAggregator::new, true); } + + public static void registerHistoBackedMinggregator(ValuesSourceRegistry.Builder builder) { + builder.register(MinAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMinAggregator::new, true); + } + + public static void registerHistoBackedMaxggregator(ValuesSourceRegistry.Builder builder) { + builder.register(MaxAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMaxAggregator::new, true); + } + } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregator.java new file mode 100644 index 00000000000..671ce113f7d --- /dev/null +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregator.java @@ -0,0 +1,117 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.aggregations.metrics; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.DoubleArray; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; +import org.elasticsearch.search.aggregations.metrics.InternalMax; +import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource; + +import java.io.IOException; +import java.util.Map; + +public class HistoBackedMaxAggregator extends NumericMetricsAggregator.SingleValue { + + private final HistogramValuesSource.Histogram valuesSource; + final DocValueFormat formatter; + DoubleArray maxes; + + public HistoBackedMaxAggregator( + String name, + ValuesSourceConfig config, + SearchContext context, + Aggregator parent, + Map metadata + ) throws IOException { + super(name, context, parent, metadata); + this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null; + if (valuesSource != null) { + maxes = context.bigArrays().newDoubleArray(1, false); + maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY); + } + this.formatter = config.format(); + } + + @Override + public ScoreMode scoreMode() { + return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { + if (valuesSource == null) { + if (parent != null) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } else { + // we have no parent and the values source is empty so we can skip collecting hits. + throw new CollectionTerminatedException(); + } + } + + final BigArrays bigArrays = context.bigArrays(); + final HistogramValues values = valuesSource.getHistogramValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + + @Override + public void collect(int doc, long bucket) throws IOException { + if (bucket >= maxes.size()) { + long from = maxes.size(); + maxes = bigArrays.grow(maxes, bucket + 1); + maxes.fill(from, maxes.size(), Double.NEGATIVE_INFINITY); + } + if (values.advanceExact(doc)) { + final HistogramValue sketch = values.histogram(); + while (sketch.next()) { + double value = sketch.value(); + double max = maxes.get(bucket); + max = Math.max(max, value); + maxes.set(bucket, max); + } + } + } + }; + } + + @Override + public double metric(long owningBucketOrd) { + if (valuesSource == null || owningBucketOrd >= maxes.size()) { + return Double.NEGATIVE_INFINITY; + } + return maxes.get(owningBucketOrd); + } + + @Override + public InternalAggregation buildAggregation(long bucket) { + if (valuesSource == null || bucket >= maxes.size()) { + return buildEmptyAggregation(); + } + return new InternalMax(name, maxes.get(bucket), formatter, metadata()); + } + + @Override + public InternalAggregation buildEmptyAggregation() { + return new InternalMax(name, Double.NEGATIVE_INFINITY, formatter, metadata()); + } + + @Override + public void doClose() { + Releasables.close(maxes); + } +} diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregator.java new file mode 100644 index 00000000000..8e20e342531 --- /dev/null +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregator.java @@ -0,0 +1,119 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.aggregations.metrics; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.DoubleArray; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; +import org.elasticsearch.search.aggregations.metrics.InternalMin; +import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource; + +import java.io.IOException; +import java.util.Map; + +public class HistoBackedMinAggregator extends NumericMetricsAggregator.SingleValue { + + private final HistogramValuesSource.Histogram valuesSource; + final DocValueFormat format; + DoubleArray mins; + + public HistoBackedMinAggregator( + String name, + ValuesSourceConfig config, + SearchContext context, + Aggregator parent, + Map metadata + ) throws IOException { + super(name, context, parent, metadata); + this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null; + if (valuesSource != null) { + mins = context.bigArrays().newDoubleArray(1, false); + mins.fill(0, mins.size(), Double.POSITIVE_INFINITY); + } + this.format = config.format(); + } + + @Override + public ScoreMode scoreMode() { + return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { + if (valuesSource == null) { + if (parent == null) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } else { + // we have no parent and the values source is empty so we can skip collecting hits. + throw new CollectionTerminatedException(); + } + } + + final BigArrays bigArrays = context.bigArrays(); + final HistogramValues values = valuesSource.getHistogramValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + + @Override + public void collect(int doc, long bucket) throws IOException { + if (bucket >= mins.size()) { + long from = mins.size(); + mins = bigArrays.grow(mins, bucket + 1); + mins.fill(from, mins.size(), Double.POSITIVE_INFINITY); + } + + if (values.advanceExact(doc)) { + final HistogramValue sketch = values.histogram(); + while (sketch.next()) { + double value = sketch.value(); + double min = mins.get(bucket); + min = Math.min(min, value); + mins.set(bucket, min); + } + } + + } + }; + } + + @Override + public double metric(long owningBucketOrd) { + if (valuesSource == null || owningBucketOrd >= mins.size()) { + return Double.POSITIVE_INFINITY; + } + return mins.get(owningBucketOrd); + } + + @Override + public InternalAggregation buildAggregation(long bucket) { + if (valuesSource == null || bucket >= mins.size()) { + return buildEmptyAggregation(); + } + return new InternalMin(name, mins.get(bucket), format, metadata()); + } + + @Override + public InternalAggregation buildEmptyAggregation() { + return new InternalMin(name, Double.POSITIVE_INFINITY, format, metadata()); + } + + @Override + public void doClose() { + Releasables.close(mins); + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregatorTests.java new file mode 100644 index 00000000000..91ab35ce101 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMaxAggregatorTests.java @@ -0,0 +1,155 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.aggregations.metrics; + +import com.tdunning.math.stats.Centroid; +import com.tdunning.math.stats.TDigest; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalMax; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; +import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType; +import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; +import static org.elasticsearch.search.aggregations.AggregationBuilders.max; + +public class HistoBackedMaxAggregatorTests extends AggregatorTestCase { + + private static final String FIELD_NAME = "field"; + + public void testNoDocs() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + // Intentionally not writing any docs + }, max -> { + assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d); + assertFalse(AggregationInspectionHelper.hasValue(max)); + }); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10}))); + iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20}))); + }, max -> { + assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d); + assertFalse(AggregationInspectionHelper.hasValue(max)); + }); + } + + public void testSimpleHistogram() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))); + iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20}))); + iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))); + }, max -> { + assertEquals(90d, max.getValue(), 0.01d); + assertTrue(AggregationInspectionHelper.hasValue(max)); + }); + } + + public void testQueryFiltering() throws IOException { + testCase(new TermQuery(new Term("match", "yes")), iw -> { + iw.addDocument(Arrays.asList( + new StringField("match", "yes", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "yes", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {5.3, 6, 20})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "no", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "no", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {3, 1.2, 100})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "yes", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})) + ); + }, min -> { + assertEquals(90d, min.getValue(), 0.01d); + assertTrue(AggregationInspectionHelper.hasValue(min)); + }); + } + + private void testCase(Query query, + CheckedConsumer indexer, + Consumer verify) throws IOException { + testCase(max("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType()); + } + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + TDigest histogram = new TDigestState(100.0); //default + for (double value : values) { + histogram.add(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + histogram.compress(); + Collection centroids = histogram.centroids(); + Iterator iterator = centroids.iterator(); + while ( iterator.hasNext()) { + Centroid centroid = iterator.next(); + streamOutput.writeVInt(centroid.count()); + streamOutput.writeDouble(centroid.mean()); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + @Override + protected List getSearchPlugins() { + return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY)); + } + + @Override + protected List getSupportedValuesSourceTypes() { + // Note: this is the same list as Core, plus Analytics + return org.elasticsearch.common.collect.List.of( + CoreValuesSourceType.NUMERIC, + CoreValuesSourceType.BOOLEAN, + CoreValuesSourceType.DATE, + AnalyticsValuesSourceType.HISTOGRAM + ); + } + + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new MaxAggregationBuilder("_name").field(fieldName); + } + + private MappedFieldType defaultFieldType() { + return new HistogramFieldMapper.HistogramFieldType(HistoBackedMaxAggregatorTests.FIELD_NAME, true, Collections.emptyMap()); + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregatorTests.java new file mode 100644 index 00000000000..674f4a548fd --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistoBackedMinAggregatorTests.java @@ -0,0 +1,155 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.aggregations.metrics; + +import com.tdunning.math.stats.Centroid; +import com.tdunning.math.stats.TDigest; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalMin; +import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; +import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType; +import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; +import static org.elasticsearch.search.aggregations.AggregationBuilders.min; + +public class HistoBackedMinAggregatorTests extends AggregatorTestCase { + + private static final String FIELD_NAME = "field"; + + public void testNoDocs() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + // Intentionally not writing any docs + }, min -> { + assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d); + assertFalse(AggregationInspectionHelper.hasValue(min)); + }); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10}))); + iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20}))); + }, min -> { + assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d); + assertFalse(AggregationInspectionHelper.hasValue(min)); + }); + } + + public void testSimpleHistogram() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))); + iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20}))); + iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))); + }, min -> { + assertEquals(-10d, min.getValue(), 0.01d); + assertTrue(AggregationInspectionHelper.hasValue(min)); + }); + } + + public void testQueryFiltering() throws IOException { + testCase(new TermQuery(new Term("match", "yes")), iw -> { + iw.addDocument(Arrays.asList( + new StringField("match", "yes", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "yes", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {5.3, 6, 20})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "no", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "no", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})) + ); + iw.addDocument(Arrays.asList( + new StringField("match", "yes", Field.Store.NO), + getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})) + ); + }, min -> { + assertEquals(-10d, min.getValue(), 0.01d); + assertTrue(AggregationInspectionHelper.hasValue(min)); + }); + } + + private void testCase(Query query, + CheckedConsumer indexer, + Consumer verify) throws IOException { + testCase(min("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType()); + } + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + TDigest histogram = new TDigestState(100.0); //default + for (double value : values) { + histogram.add(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + histogram.compress(); + Collection centroids = histogram.centroids(); + Iterator iterator = centroids.iterator(); + while ( iterator.hasNext()) { + Centroid centroid = iterator.next(); + streamOutput.writeVInt(centroid.count()); + streamOutput.writeDouble(centroid.mean()); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + @Override + protected List getSearchPlugins() { + return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY)); + } + + @Override + protected List getSupportedValuesSourceTypes() { + // Note: this is the same list as Core, plus Analytics + return org.elasticsearch.common.collect.List.of( + CoreValuesSourceType.NUMERIC, + CoreValuesSourceType.BOOLEAN, + CoreValuesSourceType.DATE, + AnalyticsValuesSourceType.HISTOGRAM + ); + } + + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new MinAggregationBuilder("_name").field(fieldName); + } + + private MappedFieldType defaultFieldType() { + return new HistogramFieldMapper.HistogramFieldType(HistoBackedMinAggregatorTests.FIELD_NAME, true, Collections.emptyMap()); + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml index 168f66e498f..2cf01961540 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/analytics/histogram.yml @@ -39,11 +39,19 @@ setup: histo_avg: avg: field: "latency" + histo_min: + min: + field: "latency" + histo_max: + max: + field: "latency" - match: { hits.total.value: 2 } - match: { aggregations.histo_sum.value: 25 } - match: { aggregations.histo_value_count.value: 80 } - match: { aggregations.histo_avg.value: 0.3125} + - match: { aggregations.histo_min.value: 0} + - match: { aggregations.histo_max.value: 0.5} ---