[7.x] Histogram field type support for min/max aggregations (#62689)
Implement min/max aggregations for histogram fields. Backports #62532
This commit is contained in:
parent
178b25fc4b
commit
ad79a2b6a1
|
@ -139,3 +139,54 @@ POST /sales/_search
|
|||
|
||||
<1> Documents without a value in the `grade` field will fall into the same
|
||||
bucket as documents that have the value `10`.
|
||||
|
||||
[[search-aggregations-metrics-max-aggregation-histogram-fields]]
|
||||
==== Histogram fields
|
||||
|
||||
When `max` is computed on <<histogram,histogram fields>>, the result of the aggregation is the maximum
|
||||
of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored.
|
||||
|
||||
For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
PUT metrics_index/_doc/1
|
||||
{
|
||||
"network.name" : "net-1",
|
||||
"latency_histo" : {
|
||||
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||
"counts" : [3, 7, 23, 12, 6] <2>
|
||||
}
|
||||
}
|
||||
|
||||
PUT metrics_index/_doc/2
|
||||
{
|
||||
"network.name" : "net-2",
|
||||
"latency_histo" : {
|
||||
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||
"counts" : [8, 17, 8, 7, 6] <2>
|
||||
}
|
||||
}
|
||||
|
||||
POST /metrics_index/_search?size=0
|
||||
{
|
||||
"aggs" : {
|
||||
"min_latency" : { "min" : { "field" : "latency_histo" } }
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The `max` aggregation will return the maximum value of all histogram fields:
|
||||
|
||||
[source,console-result]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...
|
||||
"aggregations": {
|
||||
"min_latency": {
|
||||
"value": 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[skip:test not setup]
|
||||
|
|
|
@ -140,3 +140,54 @@ POST /sales/_search
|
|||
|
||||
<1> Documents without a value in the `grade` field will fall into the same
|
||||
bucket as documents that have the value `10`.
|
||||
|
||||
[[search-aggregations-metrics-min-aggregation-histogram-fields]]
|
||||
==== Histogram fields
|
||||
|
||||
When `min` is computed on <<histogram,histogram fields>>, the result of the aggregation is the minimum
|
||||
of all elements in the `values` array. Note, that the `counts` array of the histogram is ignored.
|
||||
|
||||
For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
PUT metrics_index/_doc/1
|
||||
{
|
||||
"network.name" : "net-1",
|
||||
"latency_histo" : {
|
||||
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||
"counts" : [3, 7, 23, 12, 6] <2>
|
||||
}
|
||||
}
|
||||
|
||||
PUT metrics_index/_doc/2
|
||||
{
|
||||
"network.name" : "net-2",
|
||||
"latency_histo" : {
|
||||
"values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1>
|
||||
"counts" : [8, 17, 8, 7, 6] <2>
|
||||
}
|
||||
}
|
||||
|
||||
POST /metrics_index/_search?size=0
|
||||
{
|
||||
"aggs" : {
|
||||
"min_latency" : { "min" : { "field" : "latency_histo" } }
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The `min` aggregation will return the minimum value of all histogram fields:
|
||||
|
||||
[source,console-result]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...
|
||||
"aggregations": {
|
||||
"min_latency": {
|
||||
"value": 0.1
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[skip:test not setup]
|
||||
|
|
|
@ -35,6 +35,8 @@ binary <<doc-values,doc values>> and not indexed. Its size in bytes is at most
|
|||
Because the data is not indexed, you only can use `histogram` fields for the
|
||||
following aggregations and queries:
|
||||
|
||||
* <<search-aggregations-metrics-min-aggregation-histogram-fields,min>> aggregation
|
||||
* <<search-aggregations-metrics-max-aggregation-histogram-fields,max>> aggregation
|
||||
* <<search-aggregations-metrics-sum-aggregation-histogram-fields,sum>> aggregation
|
||||
* <<search-aggregations-metrics-valuecount-aggregation-histogram-fields,value_count>> aggregation
|
||||
* <<search-aggregations-metrics-avg-aggregation-histogram-fields,avg>> aggregation
|
||||
|
|
|
@ -160,14 +160,16 @@ public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugi
|
|||
|
||||
@Override
|
||||
public List<Consumer<ValuesSourceRegistry.Builder>> getAggregationExtentions() {
|
||||
return org.elasticsearch.common.collect.List.of(
|
||||
AnalyticsAggregatorFactory::registerPercentilesAggregator,
|
||||
AnalyticsAggregatorFactory::registerPercentileRanksAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedSumAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator
|
||||
);
|
||||
return org.elasticsearch.common.collect.List.of(
|
||||
AnalyticsAggregatorFactory::registerPercentilesAggregator,
|
||||
AnalyticsAggregatorFactory::registerPercentileRanksAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedSumAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedValueCountAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedAverageAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedHistogramAggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedMinggregator,
|
||||
AnalyticsAggregatorFactory::registerHistoBackedMaxggregator
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -7,6 +7,8 @@ package org.elasticsearch.xpack.analytics.aggregations;
|
|||
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.PercentilesConfig;
|
||||
|
@ -18,6 +20,8 @@ import org.elasticsearch.xpack.analytics.aggregations.bucket.histogram.HistoBack
|
|||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedAvgAggregator;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentileRanksAggregator;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedHDRPercentilesAggregator;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMaxAggregator;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedMinAggregator;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedSumAggregator;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentileRanksAggregator;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.metrics.HistoBackedTDigestPercentilesAggregator;
|
||||
|
@ -87,4 +91,13 @@ public class AnalyticsAggregatorFactory {
|
|||
HistoBackedHistogramAggregator::new,
|
||||
true);
|
||||
}
|
||||
|
||||
public static void registerHistoBackedMinggregator(ValuesSourceRegistry.Builder builder) {
|
||||
builder.register(MinAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMinAggregator::new, true);
|
||||
}
|
||||
|
||||
public static void registerHistoBackedMaxggregator(ValuesSourceRegistry.Builder builder) {
|
||||
builder.register(MaxAggregationBuilder.REGISTRY_KEY, AnalyticsValuesSourceType.HISTOGRAM, HistoBackedMaxAggregator::new, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.DoubleArray;
|
||||
import org.elasticsearch.index.fielddata.HistogramValue;
|
||||
import org.elasticsearch.index.fielddata.HistogramValues;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.metrics.InternalMax;
|
||||
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class HistoBackedMaxAggregator extends NumericMetricsAggregator.SingleValue {
|
||||
|
||||
private final HistogramValuesSource.Histogram valuesSource;
|
||||
final DocValueFormat formatter;
|
||||
DoubleArray maxes;
|
||||
|
||||
public HistoBackedMaxAggregator(
|
||||
String name,
|
||||
ValuesSourceConfig config,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
Map<String, Object> metadata
|
||||
) throws IOException {
|
||||
super(name, context, parent, metadata);
|
||||
this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null;
|
||||
if (valuesSource != null) {
|
||||
maxes = context.bigArrays().newDoubleArray(1, false);
|
||||
maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
this.formatter = config.format();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
|
||||
if (valuesSource == null) {
|
||||
if (parent != null) {
|
||||
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
} else {
|
||||
// we have no parent and the values source is empty so we can skip collecting hits.
|
||||
throw new CollectionTerminatedException();
|
||||
}
|
||||
}
|
||||
|
||||
final BigArrays bigArrays = context.bigArrays();
|
||||
final HistogramValues values = valuesSource.getHistogramValues(ctx);
|
||||
return new LeafBucketCollectorBase(sub, values) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
if (bucket >= maxes.size()) {
|
||||
long from = maxes.size();
|
||||
maxes = bigArrays.grow(maxes, bucket + 1);
|
||||
maxes.fill(from, maxes.size(), Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
if (values.advanceExact(doc)) {
|
||||
final HistogramValue sketch = values.histogram();
|
||||
while (sketch.next()) {
|
||||
double value = sketch.value();
|
||||
double max = maxes.get(bucket);
|
||||
max = Math.max(max, value);
|
||||
maxes.set(bucket, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public double metric(long owningBucketOrd) {
|
||||
if (valuesSource == null || owningBucketOrd >= maxes.size()) {
|
||||
return Double.NEGATIVE_INFINITY;
|
||||
}
|
||||
return maxes.get(owningBucketOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildAggregation(long bucket) {
|
||||
if (valuesSource == null || bucket >= maxes.size()) {
|
||||
return buildEmptyAggregation();
|
||||
}
|
||||
return new InternalMax(name, maxes.get(bucket), formatter, metadata());
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
return new InternalMax(name, Double.NEGATIVE_INFINITY, formatter, metadata());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doClose() {
|
||||
Releasables.close(maxes);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.DoubleArray;
|
||||
import org.elasticsearch.index.fielddata.HistogramValue;
|
||||
import org.elasticsearch.index.fielddata.HistogramValues;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.metrics.InternalMin;
|
||||
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.support.HistogramValuesSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class HistoBackedMinAggregator extends NumericMetricsAggregator.SingleValue {
|
||||
|
||||
private final HistogramValuesSource.Histogram valuesSource;
|
||||
final DocValueFormat format;
|
||||
DoubleArray mins;
|
||||
|
||||
public HistoBackedMinAggregator(
|
||||
String name,
|
||||
ValuesSourceConfig config,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
Map<String, Object> metadata
|
||||
) throws IOException {
|
||||
super(name, context, parent, metadata);
|
||||
this.valuesSource = config.hasValues() ? (HistogramValuesSource.Histogram) config.getValuesSource() : null;
|
||||
if (valuesSource != null) {
|
||||
mins = context.bigArrays().newDoubleArray(1, false);
|
||||
mins.fill(0, mins.size(), Double.POSITIVE_INFINITY);
|
||||
}
|
||||
this.format = config.format();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return valuesSource != null && valuesSource.needsScores() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
|
||||
if (valuesSource == null) {
|
||||
if (parent == null) {
|
||||
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
} else {
|
||||
// we have no parent and the values source is empty so we can skip collecting hits.
|
||||
throw new CollectionTerminatedException();
|
||||
}
|
||||
}
|
||||
|
||||
final BigArrays bigArrays = context.bigArrays();
|
||||
final HistogramValues values = valuesSource.getHistogramValues(ctx);
|
||||
return new LeafBucketCollectorBase(sub, values) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
if (bucket >= mins.size()) {
|
||||
long from = mins.size();
|
||||
mins = bigArrays.grow(mins, bucket + 1);
|
||||
mins.fill(from, mins.size(), Double.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
if (values.advanceExact(doc)) {
|
||||
final HistogramValue sketch = values.histogram();
|
||||
while (sketch.next()) {
|
||||
double value = sketch.value();
|
||||
double min = mins.get(bucket);
|
||||
min = Math.min(min, value);
|
||||
mins.set(bucket, min);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public double metric(long owningBucketOrd) {
|
||||
if (valuesSource == null || owningBucketOrd >= mins.size()) {
|
||||
return Double.POSITIVE_INFINITY;
|
||||
}
|
||||
return mins.get(owningBucketOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildAggregation(long bucket) {
|
||||
if (valuesSource == null || bucket >= mins.size()) {
|
||||
return buildEmptyAggregation();
|
||||
}
|
||||
return new InternalMin(name, mins.get(bucket), format, metadata());
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
return new InternalMin(name, Double.POSITIVE_INFINITY, format, metadata());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doClose() {
|
||||
Releasables.close(mins);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||
|
||||
import com.tdunning.math.stats.Centroid;
|
||||
import com.tdunning.math.stats.TDigest;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.elasticsearch.common.CheckedConsumer;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.plugins.SearchPlugin;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
import org.elasticsearch.search.aggregations.metrics.InternalMax;
|
||||
import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.TDigestState;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
|
||||
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
||||
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
|
||||
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static java.util.Collections.singleton;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.max;
|
||||
|
||||
public class HistoBackedMaxAggregatorTests extends AggregatorTestCase {
|
||||
|
||||
private static final String FIELD_NAME = "field";
|
||||
|
||||
public void testNoDocs() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
// Intentionally not writing any docs
|
||||
}, max -> {
|
||||
assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d);
|
||||
assertFalse(AggregationInspectionHelper.hasValue(max));
|
||||
});
|
||||
}
|
||||
|
||||
public void testNoMatchingField() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
|
||||
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
|
||||
}, max -> {
|
||||
assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), 0d);
|
||||
assertFalse(AggregationInspectionHelper.hasValue(max));
|
||||
});
|
||||
}
|
||||
|
||||
public void testSimpleHistogram() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
|
||||
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
|
||||
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
|
||||
}, max -> {
|
||||
assertEquals(90d, max.getValue(), 0.01d);
|
||||
assertTrue(AggregationInspectionHelper.hasValue(max));
|
||||
});
|
||||
}
|
||||
|
||||
public void testQueryFiltering() throws IOException {
|
||||
testCase(new TermQuery(new Term("match", "yes")), iw -> {
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "yes", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "yes", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "no", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "no", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {3, 1.2, 100}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "yes", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
|
||||
);
|
||||
}, min -> {
|
||||
assertEquals(90d, min.getValue(), 0.01d);
|
||||
assertTrue(AggregationInspectionHelper.hasValue(min));
|
||||
});
|
||||
}
|
||||
|
||||
private void testCase(Query query,
|
||||
CheckedConsumer<RandomIndexWriter, IOException> indexer,
|
||||
Consumer<InternalMax> verify) throws IOException {
|
||||
testCase(max("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
|
||||
}
|
||||
|
||||
private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
|
||||
TDigest histogram = new TDigestState(100.0); //default
|
||||
for (double value : values) {
|
||||
histogram.add(value);
|
||||
}
|
||||
BytesStreamOutput streamOutput = new BytesStreamOutput();
|
||||
histogram.compress();
|
||||
Collection<Centroid> centroids = histogram.centroids();
|
||||
Iterator<Centroid> iterator = centroids.iterator();
|
||||
while ( iterator.hasNext()) {
|
||||
Centroid centroid = iterator.next();
|
||||
streamOutput.writeVInt(centroid.count());
|
||||
streamOutput.writeDouble(centroid.mean());
|
||||
}
|
||||
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<SearchPlugin> getSearchPlugins() {
|
||||
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
|
||||
// Note: this is the same list as Core, plus Analytics
|
||||
return org.elasticsearch.common.collect.List.of(
|
||||
CoreValuesSourceType.NUMERIC,
|
||||
CoreValuesSourceType.BOOLEAN,
|
||||
CoreValuesSourceType.DATE,
|
||||
AnalyticsValuesSourceType.HISTOGRAM
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
|
||||
return new MaxAggregationBuilder("_name").field(fieldName);
|
||||
}
|
||||
|
||||
private MappedFieldType defaultFieldType() {
|
||||
return new HistogramFieldMapper.HistogramFieldType(HistoBackedMaxAggregatorTests.FIELD_NAME, true, Collections.emptyMap());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.analytics.aggregations.metrics;
|
||||
|
||||
import com.tdunning.math.stats.Centroid;
|
||||
import com.tdunning.math.stats.TDigest;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.elasticsearch.common.CheckedConsumer;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.plugins.SearchPlugin;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
import org.elasticsearch.search.aggregations.metrics.InternalMin;
|
||||
import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.metrics.TDigestState;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
|
||||
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
||||
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||
import org.elasticsearch.xpack.analytics.aggregations.support.AnalyticsValuesSourceType;
|
||||
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static java.util.Collections.singleton;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.min;
|
||||
|
||||
public class HistoBackedMinAggregatorTests extends AggregatorTestCase {
|
||||
|
||||
private static final String FIELD_NAME = "field";
|
||||
|
||||
public void testNoDocs() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
// Intentionally not writing any docs
|
||||
}, min -> {
|
||||
assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d);
|
||||
assertFalse(AggregationInspectionHelper.hasValue(min));
|
||||
});
|
||||
}
|
||||
|
||||
public void testNoMatchingField() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {3, 1.2, 10})));
|
||||
iw.addDocument(singleton(getDocValue("wrong_field", new double[] {5.3, 6, 20})));
|
||||
}, min -> {
|
||||
assertEquals(Double.POSITIVE_INFINITY, min.getValue(), 0d);
|
||||
assertFalse(AggregationInspectionHelper.hasValue(min));
|
||||
});
|
||||
}
|
||||
|
||||
public void testSimpleHistogram() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), iw -> {
|
||||
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {3, 1.2, 10})));
|
||||
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {5.3, 6, 6, 20})));
|
||||
iw.addDocument(singleton(getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90})));
|
||||
}, min -> {
|
||||
assertEquals(-10d, min.getValue(), 0.01d);
|
||||
assertTrue(AggregationInspectionHelper.hasValue(min));
|
||||
});
|
||||
}
|
||||
|
||||
public void testQueryFiltering() throws IOException {
|
||||
testCase(new TermQuery(new Term("match", "yes")), iw -> {
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "yes", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "yes", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {5.3, 6, 20}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "no", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {-34, 1.2, 10}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "no", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {3, 1.2, 10}))
|
||||
);
|
||||
iw.addDocument(Arrays.asList(
|
||||
new StringField("match", "yes", Field.Store.NO),
|
||||
getDocValue(FIELD_NAME, new double[] {-10, 0.01, 1, 90}))
|
||||
);
|
||||
}, min -> {
|
||||
assertEquals(-10d, min.getValue(), 0.01d);
|
||||
assertTrue(AggregationInspectionHelper.hasValue(min));
|
||||
});
|
||||
}
|
||||
|
||||
private void testCase(Query query,
|
||||
CheckedConsumer<RandomIndexWriter, IOException> indexer,
|
||||
Consumer<InternalMin> verify) throws IOException {
|
||||
testCase(min("_name").field(FIELD_NAME), query, indexer, verify, defaultFieldType());
|
||||
}
|
||||
|
||||
private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException {
|
||||
TDigest histogram = new TDigestState(100.0); //default
|
||||
for (double value : values) {
|
||||
histogram.add(value);
|
||||
}
|
||||
BytesStreamOutput streamOutput = new BytesStreamOutput();
|
||||
histogram.compress();
|
||||
Collection<Centroid> centroids = histogram.centroids();
|
||||
Iterator<Centroid> iterator = centroids.iterator();
|
||||
while ( iterator.hasNext()) {
|
||||
Centroid centroid = iterator.next();
|
||||
streamOutput.writeVInt(centroid.count());
|
||||
streamOutput.writeDouble(centroid.mean());
|
||||
}
|
||||
return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<SearchPlugin> getSearchPlugins() {
|
||||
return org.elasticsearch.common.collect.List.of(new AnalyticsPlugin(Settings.EMPTY));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
|
||||
// Note: this is the same list as Core, plus Analytics
|
||||
return org.elasticsearch.common.collect.List.of(
|
||||
CoreValuesSourceType.NUMERIC,
|
||||
CoreValuesSourceType.BOOLEAN,
|
||||
CoreValuesSourceType.DATE,
|
||||
AnalyticsValuesSourceType.HISTOGRAM
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
|
||||
return new MinAggregationBuilder("_name").field(fieldName);
|
||||
}
|
||||
|
||||
private MappedFieldType defaultFieldType() {
|
||||
return new HistogramFieldMapper.HistogramFieldType(HistoBackedMinAggregatorTests.FIELD_NAME, true, Collections.emptyMap());
|
||||
}
|
||||
}
|
|
@ -39,11 +39,19 @@ setup:
|
|||
histo_avg:
|
||||
avg:
|
||||
field: "latency"
|
||||
histo_min:
|
||||
min:
|
||||
field: "latency"
|
||||
histo_max:
|
||||
max:
|
||||
field: "latency"
|
||||
|
||||
- match: { hits.total.value: 2 }
|
||||
- match: { aggregations.histo_sum.value: 25 }
|
||||
- match: { aggregations.histo_value_count.value: 80 }
|
||||
- match: { aggregations.histo_avg.value: 0.3125}
|
||||
- match: { aggregations.histo_min.value: 0}
|
||||
- match: { aggregations.histo_max.value: 0.5}
|
||||
|
||||
|
||||
---
|
||||
|
|
Loading…
Reference in New Issue