[7.x] Histogram field type support for min/max aggregations (#62689)
Implement min/max aggregations for histogram fields. Backports #62532
This commit is contained in:
parent
178b25fc4b
commit
ad79a2b6a1
|
@ -139,3 +139,54 @@ POST /sales/_search
|
||||||
|
|
||||||
<1> Documents without a value in the `grade` field will fall into the same
|
<1> Documents without a value in the `grade` field will fall into the same
|
||||||
bucket as documents that have the value `10`.
|
bucket as documents that have the value `10`.
|
||||||
|
|
||||||
|
[[search-aggregations-metrics-max-aggregation-histogram-fields]]
|
||||||
|
==== Histogram fields
|
||||||
|
|
||||||
|
When `max` is computed on <<histogram,histogram fields>>, the result of the aggregation is the maximum
|
||||||
|
of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored.
|
||||||
|
|
||||||
|
For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:
|
||||||
|
|
||||||
|
[source,console]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT metrics_index/_doc/1
|
||||||
|
{
|
||||||
|
"network.name" : "net-1",
|
||||||
|
"latency_histo" : {
|
||||||
|
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||||
|
"counts" : [3, 7, 23, 12, 6] <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT metrics_index/_doc/2
|
||||||
|
{
|
||||||
|
"network.name" : "net-2",
|
||||||
|
"latency_histo" : {
|
||||||
|
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||||
|
"counts" : [8, 17, 8, 7, 6] <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
POST /metrics_index/_search?size=0
|
||||||
|
{
|
||||||
|
"aggs" : {
|
||||||
|
"min_latency" : { "min" : { "field" : "latency_histo" } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
The `max` aggregation will return the maximum value of all histogram fields:
|
||||||
|
|
||||||
|
[source,console-result]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"aggregations": {
|
||||||
|
"min_latency": {
|
||||||
|
"value": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// TESTRESPONSE[skip:test not setup]
|
||||||
|
|
|
@ -140,3 +140,54 @@ POST /sales/_search
|
||||||
|
|
||||||
<1> Documents without a value in the `grade` field will fall into the same
|
<1> Documents without a value in the `grade` field will fall into the same
|
||||||
bucket as documents that have the value `10`.
|
bucket as documents that have the value `10`.
|
||||||
|
|
||||||
|
[[search-aggregations-metrics-min-aggregation-histogram-fields]]
|
||||||
|
==== Histogram fields
|
||||||
|
|
||||||
|
When `min` is computed on <<histogram,histogram fields>>, the result of the aggregation is the minimum
|
||||||
|
of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored.
|
||||||
|
|
||||||
|
For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:
|
||||||
|
|
||||||
|
[source,console]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT metrics_index/_doc/1
|
||||||
|
{
|
||||||
|
"network.name" : "net-1",
|
||||||
|
"latency_histo" : {
|
||||||
|
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||||
|
"counts" : [3, 7, 23, 12, 6] <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT metrics_index/_doc/2
|
||||||
|
{
|
||||||
|
"network.name" : "net-2",
|
||||||
|
"latency_histo" : {
|
||||||
|
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||||
|
"counts" : [8, 17, 8, 7, 6] <2>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
POST /metrics_index/_search?size=0
|
||||||
|
{
|
||||||
|
"aggs" : {
|
||||||
|
"min_latency" : { "min" : { "field" : "latency_histo" } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
|
The `min` aggregation will return the minimum value of all histogram fields:
|
||||||
|
|
||||||
|
[source,console-result]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
...
|
||||||
|
"aggregations": {
|
||||||
|
"min_latency": {
|
||||||
|
"value": 0.1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// TESTRESPONSE[skip:test not setup]
|
||||||
|
|
|
@ -35,6 +35,8 @@ binary <<doc-values,doc values>> and not indexed. Its size in bytes is at most
|
||||||
Because the data is not indexed, you only can use `histogram` fields for the
|
Because the data is not indexed, you only can use `histogram` fields for the
|
||||||
following aggregations and queries:
|
following aggregations and queries:
|
||||||
|
|
||||||
|
* <<search-aggregations-metrics-min-aggregation-histogram-fields,min>> aggregation
|
||||||
|
* <<search-aggregations-metrics-max-aggregation-histogram-fields,max>> aggregation
|
||||||
* <<search-aggregations-metrics-sum-aggregation-histogram-fields,sum>> aggregation
|
* <<search-aggregations-metrics-sum-aggregation-histogram-fields,sum>> aggregation
|
||||||
* <<search-aggregations-metrics-valuecount-aggregation-histogram-fields,value_count>> aggregation
|
* <<search-aggregations-metrics-valuecount-aggregation-histogram-fields,value_count>> aggregation
|
||||||
* <<search-aggregations-metrics-avg-aggregation-histogram-fields,avg>> aggregation
|
* <<search-aggregations-metrics-avg-aggregation-histogram-fields,avg>> aggregation
|
||||||
|
|
|
@ -166,7 +166,9 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
|
||||||
AnalyticsAggregatorFactory::registerHistoBackedSumAggregator,
|
AnalyticsAggregatorFactory::registerHistoBackedSumAggregator,
|
||||||
AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator,
|
AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator,
|
||||||
AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator,
|
AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator,
|
||||||
AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator
|
AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator,
|
||||||
|
AnalyticsAggregatorFactory::registerHistoBackedMinggregator,
|
||||||
|
AnalyticsAggregatorFactory::registerHistoBackedMaxggregator
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,8 @@ package org.elasticsearch.xpack.analytics.aggregations;
|
||||||
|
|
||||||
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
|
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
|
||||||
import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder;
|
import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder;
|
||||||
import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder;
|
import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder;
|
||||||
import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder;
|
import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder;
|
||||||
import org.elasticsearch.search.aggregations.metrics.PercentilesConfig;
|
import org.elasticsearch.search.aggregations.metrics.PercentilesConfig;
|
||||||
|
@ -18,6 +20,8 @@ import org.elasticsearch.xpack.analytics.aggregations.bucket.histogram.HistoBack
|
||||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedAvgAggregator;
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedAvgAggregator;
|
||||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentileRanksAggregator;
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentileRanksAggregator;
|
||||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentilesAggregator;
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentilesAggregator;
|
||||||
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMaxAggregator;
|
||||||
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMinAggregator;
|
||||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedSumAggregator;
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedSumAggregator;
|
||||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentileRanksAggregator;
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentileRanksAggregator;
|
||||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentilesAggregator;
|
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentilesAggregator;
|
||||||
|
@ -87,4 +91,13 @@ public class AnalyticsAggregatorFactory {
|
||||||
HistoBackedHistogramAggregator::new,
|
HistoBackedHistogramAggregator::new,
|
||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void registerHistoBackedMinggregator(ValuesSourceRegistry.Builder builder) {
|
||||||
|
builder.register(MinAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMinAggregator::new, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void registerHistoBackedMaxggregator(ValuesSourceRegistry.Builder builder) {
|
||||||
|
builder.register(MaxAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMaxAggregator::new, true);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,117 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.CollectionTerminatedException;
|
||||||
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
import org.elasticsearch.common.lease.Releasables;
|
||||||
|
import org.elasticsearch.common.util.BigArrays;
|
||||||
|
import org.elasticsearch.common.util.DoubleArray;
|
||||||
|
import org.elasticsearch.index.fielddata.HistogramValue;
|
||||||
|
import org.elasticsearch.index.fielddata.HistogramValues;
|
||||||
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
|
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||||
|
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||||
|
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.InternalMax;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
|
||||||
|
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class HistoBackedMaxAggregator extends NumericMetricsAggregator.SingleValue {
|
||||||
|
|
||||||
|
private final HistogramValuesSource.Histogram valuesSource;
|
||||||
|
final DocValueFormat formatter;
|
||||||
|
DoubleArray maxes;
|
||||||
|
|
||||||
|
public HistoBackedMaxAggregator(
|
||||||
|
String name,
|
||||||
|
ValuesSourceConfig config,
|
||||||
|
SearchContext context,
|
||||||
|
Aggregator parent,
|
||||||
|
Map<String, Object> metadata
|
||||||
|
) throws IOException {
|
||||||
|
super(name, context, parent, metadata);
|
||||||
|
this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null;
|
||||||
|
if (valuesSource != null) {
|
||||||
|
maxes = context.bigArrays().newDoubleArray(1, false);
|
||||||
|
maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY);
|
||||||
|
}
|
||||||
|
this.formatter = config.format();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
|
||||||
|
if (valuesSource == null) {
|
||||||
|
if (parent != null) {
|
||||||
|
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||||
|
} else {
|
||||||
|
// we have no parent and the values source is empty so we can skip collecting hits.
|
||||||
|
throw new CollectionTerminatedException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final BigArrays bigArrays = context.bigArrays();
|
||||||
|
final HistogramValues values = valuesSource.getHistogramValues(ctx);
|
||||||
|
return new LeafBucketCollectorBase(sub, values) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc, long bucket) throws IOException {
|
||||||
|
if (bucket >= maxes.size()) {
|
||||||
|
long from = maxes.size();
|
||||||
|
maxes = bigArrays.grow(maxes, bucket + 1);
|
||||||
|
maxes.fill(from, maxes.size(), Double.NEGATIVE_INFINITY);
|
||||||
|
}
|
||||||
|
if (values.advanceExact(doc)) {
|
||||||
|
final HistogramValue sketch = values.histogram();
|
||||||
|
while (sketch.next()) {
|
||||||
|
double value = sketch.value();
|
||||||
|
double max = maxes.get(bucket);
|
||||||
|
max = Math.max(max, value);
|
||||||
|
maxes.set(bucket, max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double metric(long owningBucketOrd) {
|
||||||
|
if (valuesSource == null || owningBucketOrd >= maxes.size()) {
|
||||||
|
return Double.NEGATIVE_INFINITY;
|
||||||
|
}
|
||||||
|
return maxes.get(owningBucketOrd);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InternalAggregation buildAggregation(long bucket) {
|
||||||
|
if (valuesSource == null || bucket >= maxes.size()) {
|
||||||
|
return buildEmptyAggregation();
|
||||||
|
}
|
||||||
|
return new InternalMax(name, maxes.get(bucket), formatter, metadata());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InternalAggregation buildEmptyAggregation() {
|
||||||
|
return new InternalMax(name, Double.NEGATIVE_INFINITY, formatter, metadata());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doClose() {
|
||||||
|
Releasables.close(maxes);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,119 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.CollectionTerminatedException;
|
||||||
|
import org.apache.lucene.search.ScoreMode;
|
||||||
|
import org.elasticsearch.common.lease.Releasables;
|
||||||
|
import org.elasticsearch.common.util.BigArrays;
|
||||||
|
import org.elasticsearch.common.util.DoubleArray;
|
||||||
|
import org.elasticsearch.index.fielddata.HistogramValue;
|
||||||
|
import org.elasticsearch.index.fielddata.HistogramValues;
|
||||||
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
|
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||||
|
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||||
|
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.InternalMin;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
|
||||||
|
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
||||||
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class HistoBackedMinAggregator extends NumericMetricsAggregator.SingleValue {
|
||||||
|
|
||||||
|
private final HistogramValuesSource.Histogram valuesSource;
|
||||||
|
final DocValueFormat format;
|
||||||
|
DoubleArray mins;
|
||||||
|
|
||||||
|
public HistoBackedMinAggregator(
|
||||||
|
String name,
|
||||||
|
ValuesSourceConfig config,
|
||||||
|
SearchContext context,
|
||||||
|
Aggregator parent,
|
||||||
|
Map<String, Object> metadata
|
||||||
|
) throws IOException {
|
||||||
|
super(name, context, parent, metadata);
|
||||||
|
this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null;
|
||||||
|
if (valuesSource != null) {
|
||||||
|
mins = context.bigArrays().newDoubleArray(1, false);
|
||||||
|
mins.fill(0, mins.size(), Double.POSITIVE_INFINITY);
|
||||||
|
}
|
||||||
|
this.format = config.format();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScoreMode scoreMode() {
|
||||||
|
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
|
||||||
|
if (valuesSource == null) {
|
||||||
|
if (parent == null) {
|
||||||
|
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||||
|
} else {
|
||||||
|
// we have no parent and the values source is empty so we can skip collecting hits.
|
||||||
|
throw new CollectionTerminatedException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final BigArrays bigArrays = context.bigArrays();
|
||||||
|
final HistogramValues values = valuesSource.getHistogramValues(ctx);
|
||||||
|
return new LeafBucketCollectorBase(sub, values) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc, long bucket) throws IOException {
|
||||||
|
if (bucket >= mins.size()) {
|
||||||
|
long from = mins.size();
|
||||||
|
mins = bigArrays.grow(mins, bucket + 1);
|
||||||
|
mins.fill(from, mins.size(), Double.POSITIVE_INFINITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (values.advanceExact(doc)) {
|
||||||
|
final HistogramValue sketch = values.histogram();
|
||||||
|
while (sketch.next()) {
|
||||||
|
double value = sketch.value();
|
||||||
|
double min = mins.get(bucket);
|
||||||
|
min = Math.min(min, value);
|
||||||
|
mins.set(bucket, min);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double metric(long owningBucketOrd) {
|
||||||
|
if (valuesSource == null || owningBucketOrd >= mins.size()) {
|
||||||
|
return Double.POSITIVE_INFINITY;
|
||||||
|
}
|
||||||
|
return mins.get(owningBucketOrd);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InternalAggregation buildAggregation(long bucket) {
|
||||||
|
if (valuesSource == null || bucket >= mins.size()) {
|
||||||
|
return buildEmptyAggregation();
|
||||||
|
}
|
||||||
|
return new InternalMin(name, mins.get(bucket), format, metadata());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InternalAggregation buildEmptyAggregation() {
|
||||||
|
return new InternalMin(name, Double.POSITIVE_INFINITY, format, metadata());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doClose() {
|
||||||
|
Releasables.close(mins);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,155 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||||
|
|
||||||
|
import com.tdunning.math.stats.Centroid;
|
||||||
|
import com.tdunning.math.stats.TDigest;
|
||||||
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.elasticsearch.common.CheckedConsumer;
|
||||||
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
|
import org.elasticsearch.plugins.SearchPlugin;
|
||||||
|
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.InternalMax;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.TDigestState;
|
||||||
|
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
|
||||||
|
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
|
||||||
|
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
||||||
|
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||||
|
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
|
||||||
|
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import static java.util.Collections.singleton;
|
||||||
|
import static org.elasticsearch.search.aggregations.AggregationBuilders.max;
|
||||||
|
|
||||||
|
public class HistoBackedMaxAggregatorTests extends AggregatorTestCase {
|
||||||
|
|
||||||
|
private static final String FIELD_NAME = "field";
|
||||||
|
|
||||||
|
public void testNoDocs() throws IOException {
|
||||||
|
testCase(new MatchAllDocsQuery(), iw -> {
|
||||||
|
// Intentionally not writing any docs
|
||||||
|
}, max -> {
|
||||||
|
assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d);
|
||||||
|
assertFalse(AggregationInspectionHelper.hasValue(max));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNoMatchingField() throws IOException {
|
||||||
|
testCase(new MatchAllDocsQuery(), iw -> {
|
||||||
|
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
|
||||||
|
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
|
||||||
|
}, max -> {
|
||||||
|
assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d);
|
||||||
|
assertFalse(AggregationInspectionHelper.hasValue(max));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSimpleHistogram() throws IOException {
|
||||||
|
testCase(new MatchAllDocsQuery(), iw -> {
|
||||||
|
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
|
||||||
|
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
|
||||||
|
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
|
||||||
|
}, max -> {
|
||||||
|
assertEquals(90d, max.getValue(), 0.01d);
|
||||||
|
assertTrue(AggregationInspectionHelper.hasValue(max));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testQueryFiltering() throws IOException {
|
||||||
|
testCase(new TermQuery(new Term("match", "yes")), iw -> {
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "yes", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "yes", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "no", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "no", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {3, 1.2, 100}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "yes", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
|
||||||
|
);
|
||||||
|
}, min -> {
|
||||||
|
assertEquals(90d, min.getValue(), 0.01d);
|
||||||
|
assertTrue(AggregationInspectionHelper.hasValue(min));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testCase(Query query,
|
||||||
|
CheckedConsumer<RandomIndexWriter, IOException> indexer,
|
||||||
|
Consumer<InternalMax> verify) throws IOException {
|
||||||
|
testCase(max("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
|
||||||
|
}
|
||||||
|
|
||||||
|
private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
|
||||||
|
TDigest histogram = new TDigestState(100.0); //default
|
||||||
|
for (double value : values) {
|
||||||
|
histogram.add(value);
|
||||||
|
}
|
||||||
|
BytesStreamOutput streamOutput = new BytesStreamOutput();
|
||||||
|
histogram.compress();
|
||||||
|
Collection<Centroid> centroids = histogram.centroids();
|
||||||
|
Iterator<Centroid> iterator = centroids.iterator();
|
||||||
|
while ( iterator.hasNext()) {
|
||||||
|
Centroid centroid = iterator.next();
|
||||||
|
streamOutput.writeVInt(centroid.count());
|
||||||
|
streamOutput.writeDouble(centroid.mean());
|
||||||
|
}
|
||||||
|
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<SearchPlugin> getSearchPlugins() {
|
||||||
|
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
|
||||||
|
// Note: this is the same list as Core, plus Analytics
|
||||||
|
return org.elasticsearch.common.collect.List.of(
|
||||||
|
CoreValuesSourceType.NUMERIC,
|
||||||
|
CoreValuesSourceType.BOOLEAN,
|
||||||
|
CoreValuesSourceType.DATE,
|
||||||
|
AnalyticsValuesSourceType.HISTOGRAM
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
|
||||||
|
return new MaxAggregationBuilder("_name").field(fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private MappedFieldType defaultFieldType() {
|
||||||
|
return new HistogramFieldMapper.HistogramFieldType(HistoBackedMaxAggregatorTests.FIELD_NAME, true, Collections.emptyMap());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,155 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||||
|
|
||||||
|
import com.tdunning.math.stats.Centroid;
|
||||||
|
import com.tdunning.math.stats.TDigest;
|
||||||
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.elasticsearch.common.CheckedConsumer;
|
||||||
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
|
import org.elasticsearch.plugins.SearchPlugin;
|
||||||
|
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.InternalMin;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder;
|
||||||
|
import org.elasticsearch.search.aggregations.metrics.TDigestState;
|
||||||
|
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
|
||||||
|
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
|
||||||
|
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
||||||
|
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||||
|
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
|
||||||
|
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
import static java.util.Collections.singleton;
|
||||||
|
import static org.elasticsearch.search.aggregations.AggregationBuilders.min;
|
||||||
|
|
||||||
|
public class HistoBackedMinAggregatorTests extends AggregatorTestCase {
|
||||||
|
|
||||||
|
private static final String FIELD_NAME = "field";
|
||||||
|
|
||||||
|
public void testNoDocs() throws IOException {
|
||||||
|
testCase(new MatchAllDocsQuery(), iw -> {
|
||||||
|
// Intentionally not writing any docs
|
||||||
|
}, min -> {
|
||||||
|
assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d);
|
||||||
|
assertFalse(AggregationInspectionHelper.hasValue(min));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNoMatchingField() throws IOException {
|
||||||
|
testCase(new MatchAllDocsQuery(), iw -> {
|
||||||
|
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
|
||||||
|
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
|
||||||
|
}, min -> {
|
||||||
|
assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d);
|
||||||
|
assertFalse(AggregationInspectionHelper.hasValue(min));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSimpleHistogram() throws IOException {
|
||||||
|
testCase(new MatchAllDocsQuery(), iw -> {
|
||||||
|
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
|
||||||
|
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
|
||||||
|
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
|
||||||
|
}, min -> {
|
||||||
|
assertEquals(-10d, min.getValue(), 0.01d);
|
||||||
|
assertTrue(AggregationInspectionHelper.hasValue(min));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testQueryFiltering() throws IOException {
|
||||||
|
testCase(new TermQuery(new Term("match", "yes")), iw -> {
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "yes", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "yes", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "no", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "no", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
|
||||||
|
);
|
||||||
|
iw.addDocument(Arrays.asList(
|
||||||
|
new StringField("match", "yes", Field.Store.NO),
|
||||||
|
getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
|
||||||
|
);
|
||||||
|
}, min -> {
|
||||||
|
assertEquals(-10d, min.getValue(), 0.01d);
|
||||||
|
assertTrue(AggregationInspectionHelper.hasValue(min));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testCase(Query query,
|
||||||
|
CheckedConsumer<RandomIndexWriter, IOException> indexer,
|
||||||
|
Consumer<InternalMin> verify) throws IOException {
|
||||||
|
testCase(min("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
|
||||||
|
}
|
||||||
|
|
||||||
|
private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
|
||||||
|
TDigest histogram = new TDigestState(100.0); //default
|
||||||
|
for (double value : values) {
|
||||||
|
histogram.add(value);
|
||||||
|
}
|
||||||
|
BytesStreamOutput streamOutput = new BytesStreamOutput();
|
||||||
|
histogram.compress();
|
||||||
|
Collection<Centroid> centroids = histogram.centroids();
|
||||||
|
Iterator<Centroid> iterator = centroids.iterator();
|
||||||
|
while ( iterator.hasNext()) {
|
||||||
|
Centroid centroid = iterator.next();
|
||||||
|
streamOutput.writeVInt(centroid.count());
|
||||||
|
streamOutput.writeDouble(centroid.mean());
|
||||||
|
}
|
||||||
|
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<SearchPlugin> getSearchPlugins() {
|
||||||
|
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
|
||||||
|
// Note: this is the same list as Core, plus Analytics
|
||||||
|
return org.elasticsearch.common.collect.List.of(
|
||||||
|
CoreValuesSourceType.NUMERIC,
|
||||||
|
CoreValuesSourceType.BOOLEAN,
|
||||||
|
CoreValuesSourceType.DATE,
|
||||||
|
AnalyticsValuesSourceType.HISTOGRAM
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
|
||||||
|
return new MinAggregationBuilder("_name").field(fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private MappedFieldType defaultFieldType() {
|
||||||
|
return new HistogramFieldMapper.HistogramFieldType(HistoBackedMinAggregatorTests.FIELD_NAME, true, Collections.emptyMap());
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,11 +39,19 @@ setup:
|
||||||
histo_avg:
|
histo_avg:
|
||||||
avg:
|
avg:
|
||||||
field: "latency"
|
field: "latency"
|
||||||
|
histo_min:
|
||||||
|
min:
|
||||||
|
field: "latency"
|
||||||
|
histo_max:
|
||||||
|
max:
|
||||||
|
field: "latency"
|
||||||
|
|
||||||
- match: { hits.total.value: 2 }
|
- match: { hits.total.value: 2 }
|
||||||
- match: { aggregations.histo_sum.value: 25 }
|
- match: { aggregations.histo_sum.value: 25 }
|
||||||
- match: { aggregations.histo_value_count.value: 80 }
|
- match: { aggregations.histo_value_count.value: 80 }
|
||||||
- match: { aggregations.histo_avg.value: 0.3125}
|
- match: { aggregations.histo_avg.value: 0.3125}
|
||||||
|
- match: { aggregations.histo_min.value: 0}
|
||||||
|
- match: { aggregations.histo_max.value: 0.5}
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
Loading…
Reference in New Issue