[7.x] Histogram field type support for min/max aggregations (#62689)

Implement min/max aggregations for histogram fields.

Backports #62532
This commit is contained in:
Christos Soulios 2020-09-21 12:53:56 +03:00 committed by GitHub
parent 178b25fc4b
commit ad79a2b6a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 681 additions and 8 deletions

View File

@ -139,3 +139,54 @@ POST /sales/_search
<1> Documents without a value in the `grade` field will fall into the same <1> Documents without a value in the `grade` field will fall into the same
bucket as documents that have the value `10`. bucket as documents that have the value `10`.
[[search-aggregations-metrics-max-aggregation-histogram-fields]]
==== Histogram fields
When `max` is computed on <<histogram,histogram fields>>, the result of the aggregation is the maximum
of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored.
For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:
[source,console]
--------------------------------------------------
PUT metrics_index/_doc/1
{
"network.name" : "net-1",
"latency_histo" : {
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
"counts" : [3, 7, 23, 12, 6] <2>
}
}
PUT metrics_index/_doc/2
{
"network.name" : "net-2",
"latency_histo" : {
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
"counts" : [8, 17, 8, 7, 6] <2>
}
}
POST /metrics_index/_search?size=0
{
"aggs" : {
"min_latency" : { "min" : { "field" : "latency_histo" } }
}
}
--------------------------------------------------
The `max` aggregation will return the maximum value of all histogram fields:
[source,console-result]
--------------------------------------------------
{
...
"aggregations": {
"min_latency": {
"value": 0.5
}
}
}
--------------------------------------------------
// TESTRESPONSE[skip:test not setup]

View File

@ -140,3 +140,54 @@ POST /sales/_search
<1> Documents without a value in the `grade` field will fall into the same <1> Documents without a value in the `grade` field will fall into the same
bucket as documents that have the value `10`. bucket as documents that have the value `10`.
[[search-aggregations-metrics-min-aggregation-histogram-fields]]
==== Histogram fields
When `min` is computed on <<histogram,histogram fields>>, the result of the aggregation is the minimum
of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored.
For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:
[source,console]
--------------------------------------------------
PUT metrics_index/_doc/1
{
"network.name" : "net-1",
"latency_histo" : {
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
"counts" : [3, 7, 23, 12, 6] <2>
}
}
PUT metrics_index/_doc/2
{
"network.name" : "net-2",
"latency_histo" : {
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
"counts" : [8, 17, 8, 7, 6] <2>
}
}
POST /metrics_index/_search?size=0
{
"aggs" : {
"min_latency" : { "min" : { "field" : "latency_histo" } }
}
}
--------------------------------------------------
The `min` aggregation will return the minimum value of all histogram fields:
[source,console-result]
--------------------------------------------------
{
...
"aggregations": {
"min_latency": {
"value": 0.1
}
}
}
--------------------------------------------------
// TESTRESPONSE[skip:test not setup]

View File

@ -35,6 +35,8 @@ binary <<doc-values,doc values>> and not indexed. Its size in bytes is at most
Because the data is not indexed, you only can use `histogram` fields for the Because the data is not indexed, you only can use `histogram` fields for the
following aggregations and queries: following aggregations and queries:
* <<search-aggregations-metrics-min-aggregation-histogram-fields,min>> aggregation
* <<search-aggregations-metrics-max-aggregation-histogram-fields,max>> aggregation
* <<search-aggregations-metrics-sum-aggregation-histogram-fields,sum>> aggregation * <<search-aggregations-metrics-sum-aggregation-histogram-fields,sum>> aggregation
* <<search-aggregations-metrics-valuecount-aggregation-histogram-fields,value_count>> aggregation * <<search-aggregations-metrics-valuecount-aggregation-histogram-fields,value_count>> aggregation
* <<search-aggregations-metrics-avg-aggregation-histogram-fields,avg>> aggregation * <<search-aggregations-metrics-avg-aggregation-histogram-fields,avg>> aggregation

View File

@ -166,7 +166,9 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
AnalyticsAggregatorFactory::registerHistoBackedSumAggregator, AnalyticsAggregatorFactory::registerHistoBackedSumAggregator,
AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator, AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator,
AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator, AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator,
AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator,
AnalyticsAggregatorFactory::registerHistoBackedMinggregator,
AnalyticsAggregatorFactory::registerHistoBackedMaxggregator
); );
} }

View File

@ -7,6 +7,8 @@ package org.elasticsearch.xpack.analytics.aggregations;
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.PercentilesConfig; import org.elasticsearch.search.aggregations.metrics.PercentilesConfig;
@ -18,6 +20,8 @@ import org.elasticsearch.xpack.analytics.aggregations.bucket.histogram.HistoBack
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedAvgAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedAvgAggregator;
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentileRanksAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentileRanksAggregator;
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentilesAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentilesAggregator;
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMaxAggregator;
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMinAggregator;
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedSumAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedSumAggregator;
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentileRanksAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentileRanksAggregator;
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentilesAggregator; import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentilesAggregator;
@ -87,4 +91,13 @@ public class AnalyticsAggregatorFactory {
HistoBackedHistogramAggregator::new, HistoBackedHistogramAggregator::new,
true); true);
} }
public static void registerHistoBackedMinggregator(ValuesSourceRegistry.Builder builder) {
builder.register(MinAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMinAggregator::new, true);
}
public static void registerHistoBackedMaxggregator(ValuesSourceRegistry.Builder builder) {
builder.register(MaxAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMaxAggregator::new, true);
}
} }

View File

@ -0,0 +1,117 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.aggregations.metrics;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.ScoreMode;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.DoubleArray;
import org.elasticsearch.index.fielddata.HistogramValue;
import org.elasticsearch.index.fielddata.HistogramValues;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.metrics.InternalMax;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource;
import java.io.IOException;
import java.util.Map;
public class HistoBackedMaxAggregator extends NumericMetricsAggregator.SingleValue {
private final HistogramValuesSource.Histogram valuesSource;
final DocValueFormat formatter;
DoubleArray maxes;
public HistoBackedMaxAggregator(
String name,
ValuesSourceConfig config,
SearchContext context,
Aggregator parent,
Map<String, Object> metadata
) throws IOException {
super(name, context, parent, metadata);
this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null;
if (valuesSource != null) {
maxes = context.bigArrays().newDoubleArray(1, false);
maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY);
}
this.formatter = config.format();
}
@Override
public ScoreMode scoreMode() {
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
if (valuesSource == null) {
if (parent != null) {
return LeafBucketCollector.NO_OP_COLLECTOR;
} else {
// we have no parent and the values source is empty so we can skip collecting hits.
throw new CollectionTerminatedException();
}
}
final BigArrays bigArrays = context.bigArrays();
final HistogramValues values = valuesSource.getHistogramValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
if (bucket >= maxes.size()) {
long from = maxes.size();
maxes = bigArrays.grow(maxes, bucket + 1);
maxes.fill(from, maxes.size(), Double.NEGATIVE_INFINITY);
}
if (values.advanceExact(doc)) {
final HistogramValue sketch = values.histogram();
while (sketch.next()) {
double value = sketch.value();
double max = maxes.get(bucket);
max = Math.max(max, value);
maxes.set(bucket, max);
}
}
}
};
}
@Override
public double metric(long owningBucketOrd) {
if (valuesSource == null || owningBucketOrd >= maxes.size()) {
return Double.NEGATIVE_INFINITY;
}
return maxes.get(owningBucketOrd);
}
@Override
public InternalAggregation buildAggregation(long bucket) {
if (valuesSource == null || bucket >= maxes.size()) {
return buildEmptyAggregation();
}
return new InternalMax(name, maxes.get(bucket), formatter, metadata());
}
@Override
public InternalAggregation buildEmptyAggregation() {
return new InternalMax(name, Double.NEGATIVE_INFINITY, formatter, metadata());
}
@Override
public void doClose() {
Releasables.close(maxes);
}
}

View File

@ -0,0 +1,119 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.aggregations.metrics;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.ScoreMode;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.DoubleArray;
import org.elasticsearch.index.fielddata.HistogramValue;
import org.elasticsearch.index.fielddata.HistogramValues;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.metrics.InternalMin;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource;
import java.io.IOException;
import java.util.Map;
public class HistoBackedMinAggregator extends NumericMetricsAggregator.SingleValue {
private final HistogramValuesSource.Histogram valuesSource;
final DocValueFormat format;
DoubleArray mins;
public HistoBackedMinAggregator(
String name,
ValuesSourceConfig config,
SearchContext context,
Aggregator parent,
Map<String, Object> metadata
) throws IOException {
super(name, context, parent, metadata);
this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null;
if (valuesSource != null) {
mins = context.bigArrays().newDoubleArray(1, false);
mins.fill(0, mins.size(), Double.POSITIVE_INFINITY);
}
this.format = config.format();
}
@Override
public ScoreMode scoreMode() {
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
if (valuesSource == null) {
if (parent == null) {
return LeafBucketCollector.NO_OP_COLLECTOR;
} else {
// we have no parent and the values source is empty so we can skip collecting hits.
throw new CollectionTerminatedException();
}
}
final BigArrays bigArrays = context.bigArrays();
final HistogramValues values = valuesSource.getHistogramValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
if (bucket >= mins.size()) {
long from = mins.size();
mins = bigArrays.grow(mins, bucket + 1);
mins.fill(from, mins.size(), Double.POSITIVE_INFINITY);
}
if (values.advanceExact(doc)) {
final HistogramValue sketch = values.histogram();
while (sketch.next()) {
double value = sketch.value();
double min = mins.get(bucket);
min = Math.min(min, value);
mins.set(bucket, min);
}
}
}
};
}
@Override
public double metric(long owningBucketOrd) {
if (valuesSource == null || owningBucketOrd >= mins.size()) {
return Double.POSITIVE_INFINITY;
}
return mins.get(owningBucketOrd);
}
@Override
public InternalAggregation buildAggregation(long bucket) {
if (valuesSource == null || bucket >= mins.size()) {
return buildEmptyAggregation();
}
return new InternalMin(name, mins.get(bucket), format, metadata());
}
@Override
public InternalAggregation buildEmptyAggregation() {
return new InternalMin(name, Double.POSITIVE_INFINITY, format, metadata());
}
@Override
public void doClose() {
Releasables.close(mins);
}
}

View File

@ -0,0 +1,155 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.aggregations.metrics;
import com.tdunning.math.stats.Centroid;
import com.tdunning.math.stats.TDigest;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.plugins.SearchPlugin;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.metrics.InternalMax;
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.TDigestState;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import static java.util.Collections.singleton;
import static org.elasticsearch.search.aggregations.AggregationBuilders.max;
public class HistoBackedMaxAggregatorTests extends AggregatorTestCase {
private static final String FIELD_NAME = "field";
public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
// Intentionally not writing any docs
}, max -> {
assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d);
assertFalse(AggregationInspectionHelper.hasValue(max));
});
}
public void testNoMatchingField() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
}, max -> {
assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d);
assertFalse(AggregationInspectionHelper.hasValue(max));
});
}
public void testSimpleHistogram() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
}, max -> {
assertEquals(90d, max.getValue(), 0.01d);
assertTrue(AggregationInspectionHelper.hasValue(max));
});
}
public void testQueryFiltering() throws IOException {
testCase(new TermQuery(new Term("match", "yes")), iw -> {
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "no", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "no", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {3, 1.2, 100}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
);
}, min -> {
assertEquals(90d, min.getValue(), 0.01d);
assertTrue(AggregationInspectionHelper.hasValue(min));
});
}
private void testCase(Query query,
CheckedConsumer<RandomIndexWriter, IOException> indexer,
Consumer<InternalMax> verify) throws IOException {
testCase(max("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
}
private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
TDigest histogram = new TDigestState(100.0); //default
for (double value : values) {
histogram.add(value);
}
BytesStreamOutput streamOutput = new BytesStreamOutput();
histogram.compress();
Collection<Centroid> centroids = histogram.centroids();
Iterator<Centroid> iterator = centroids.iterator();
while ( iterator.hasNext()) {
Centroid centroid = iterator.next();
streamOutput.writeVInt(centroid.count());
streamOutput.writeDouble(centroid.mean());
}
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
}
@Override
protected List<SearchPlugin> getSearchPlugins() {
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
}
@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
// Note: this is the same list as Core, plus Analytics
return org.elasticsearch.common.collect.List.of(
CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BOOLEAN,
CoreValuesSourceType.DATE,
AnalyticsValuesSourceType.HISTOGRAM
);
}
@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new MaxAggregationBuilder("_name").field(fieldName);
}
private MappedFieldType defaultFieldType() {
return new HistogramFieldMapper.HistogramFieldType(HistoBackedMaxAggregatorTests.FIELD_NAME, true, Collections.emptyMap());
}
}

View File

@ -0,0 +1,155 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.analytics.aggregations.metrics;
import com.tdunning.math.stats.Centroid;
import com.tdunning.math.stats.TDigest;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.plugins.SearchPlugin;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.metrics.InternalMin;
import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.TDigestState;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;
import static java.util.Collections.singleton;
import static org.elasticsearch.search.aggregations.AggregationBuilders.min;
public class HistoBackedMinAggregatorTests extends AggregatorTestCase {
private static final String FIELD_NAME = "field";
public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
// Intentionally not writing any docs
}, min -> {
assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d);
assertFalse(AggregationInspectionHelper.hasValue(min));
});
}
public void testNoMatchingField() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
}, min -> {
assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d);
assertFalse(AggregationInspectionHelper.hasValue(min));
});
}
public void testSimpleHistogram() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
}, min -> {
assertEquals(-10d, min.getValue(), 0.01d);
assertTrue(AggregationInspectionHelper.hasValue(min));
});
}
public void testQueryFiltering() throws IOException {
testCase(new TermQuery(new Term("match", "yes")), iw -> {
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "no", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "no", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
);
iw.addDocument(Arrays.asList(
new StringField("match", "yes", Field.Store.NO),
getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
);
}, min -> {
assertEquals(-10d, min.getValue(), 0.01d);
assertTrue(AggregationInspectionHelper.hasValue(min));
});
}
private void testCase(Query query,
CheckedConsumer<RandomIndexWriter, IOException> indexer,
Consumer<InternalMin> verify) throws IOException {
testCase(min("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
}
private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
TDigest histogram = new TDigestState(100.0); //default
for (double value : values) {
histogram.add(value);
}
BytesStreamOutput streamOutput = new BytesStreamOutput();
histogram.compress();
Collection<Centroid> centroids = histogram.centroids();
Iterator<Centroid> iterator = centroids.iterator();
while ( iterator.hasNext()) {
Centroid centroid = iterator.next();
streamOutput.writeVInt(centroid.count());
streamOutput.writeDouble(centroid.mean());
}
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
}
@Override
protected List<SearchPlugin> getSearchPlugins() {
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
}
@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
// Note: this is the same list as Core, plus Analytics
return org.elasticsearch.common.collect.List.of(
CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BOOLEAN,
CoreValuesSourceType.DATE,
AnalyticsValuesSourceType.HISTOGRAM
);
}
@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new MinAggregationBuilder("_name").field(fieldName);
}
private MappedFieldType defaultFieldType() {
return new HistogramFieldMapper.HistogramFieldType(HistoBackedMinAggregatorTests.FIELD_NAME, true, Collections.emptyMap());
}
}

View File

@ -39,11 +39,19 @@ setup:
histo_avg: histo_avg:
avg: avg:
field: "latency" field: "latency"
histo_min:
min:
field: "latency"
histo_max:
max:
field: "latency"
- match: { hits.total.value: 2 } - match: { hits.total.value: 2 }
- match: { aggregations.histo_sum.value: 25 } - match: { aggregations.histo_sum.value: 25 }
- match: { aggregations.histo_value_count.value: 80 } - match: { aggregations.histo_value_count.value: 80 }
- match: { aggregations.histo_avg.value: 0.3125} - match: { aggregations.histo_avg.value: 0.3125}
- match: { aggregations.histo_min.value: 0}
- match: { aggregations.histo_max.value: 0.5}
--- ---