Add histogram field type support to boxplot aggs (#52265)

Add support for the histogram field type to boxplot aggs.

Closes #52233
Relates to #33112
This commit is contained in:
Igor Motov 2020-02-13 18:09:26 -05:00 committed by GitHub
parent c61124a7b9
commit a66988281f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 76 additions and 27 deletions

View File

@ -45,6 +45,7 @@ include::metrics/valuecount-aggregation.asciidoc[]
include::metrics/median-absolute-deviation-aggregation.asciidoc[] include::metrics/median-absolute-deviation-aggregation.asciidoc[]
include::metrics/boxplot-aggregation.asciidoc[]

View File

@ -4,7 +4,8 @@
=== Boxplot Aggregation === Boxplot Aggregation
A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents. A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script. These values can be generated by a provided script or extracted from specific numeric or
<<histogram,histogram fields>> in the documents.
The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
median, first quartile (25th percentile) and third quartile (75th percentile) values. median, first quartile (25th percentile) and third quartile (75th percentile) values.

View File

@ -285,7 +285,7 @@ GET latency/_search
<1> Compression controls memory usage and approximation error <1> Compression controls memory usage and approximation error
// tag::[t-digest] // tag::t-digest[]
The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the
more nodes available, the higher the accuracy (and large memory footprint) proportional more nodes available, the higher the accuracy (and large memory footprint) proportional
to the volume of data. The `compression` parameter limits the maximum number of to the volume of data. The `compression` parameter limits the maximum number of
@ -301,7 +301,7 @@ A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large a
of data which arrives sorted and in-order) the default settings will produce a of data which arrives sorted and in-order) the default settings will produce a
TDigest roughly 64KB in size. In practice data tends to be more random and TDigest roughly 64KB in size. In practice data tends to be more random and
the TDigest will use less memory. the TDigest will use less memory.
// tag::[t-digest] // end::t-digest[]
==== HDR Histogram ==== HDR Histogram

View File

@ -37,6 +37,7 @@ following aggregations and queries:
* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation * <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation * <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
* <<query-dsl-exists-query,exists>> query * <<query-dsl-exists-query,exists>> query
[[mapping-types-histogram-building-histogram]] [[mapping-types-histogram-building-histogram]]

View File

@ -29,7 +29,7 @@ import java.util.Objects;
import static org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder.COMPRESSION_FIELD; import static org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder.COMPRESSION_FIELD;
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric, public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource,
BoxplotAggregationBuilder> { BoxplotAggregationBuilder> {
public static final String NAME = "boxplot"; public static final String NAME = "boxplot";
@ -37,7 +37,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
static { static {
PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME); PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false); ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD); PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
} }
@ -98,7 +98,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
@Override @Override
protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext, protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
ValuesSourceConfig<ValuesSource.Numeric> config, ValuesSourceConfig<ValuesSource> config,
AggregatorFactory parent, AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException { AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData); return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);

View File

@ -11,6 +11,8 @@ import org.apache.lucene.search.ScoreMode;
import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.util.ObjectArray;
import org.elasticsearch.index.fielddata.HistogramValue;
import org.elasticsearch.index.fielddata.HistogramValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
@ -29,12 +31,12 @@ import java.util.Map;
public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue { public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
private final ValuesSource.Numeric valuesSource; private final ValuesSource valuesSource;
private final DocValueFormat format; private final DocValueFormat format;
protected ObjectArray<TDigestState> states; protected ObjectArray<TDigestState> states;
protected final double compression; protected final double compression;
BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression, BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression,
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators, SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException { Map<String, Object> metaData) throws IOException {
super(name, context, parent, pipelineAggregators, metaData); super(name, context, parent, pipelineAggregators, metaData);
@ -58,12 +60,26 @@ public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
return LeafBucketCollector.NO_OP_COLLECTOR; return LeafBucketCollector.NO_OP_COLLECTOR;
} }
final BigArrays bigArrays = context.bigArrays(); final BigArrays bigArrays = context.bigArrays();
final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); if (valuesSource instanceof ValuesSource.Histogram) {
final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
if (values.advanceExact(doc)) {
final HistogramValue sketch = values.histogram();
while(sketch.next()) {
state.add(sketch.value(), sketch.count());
}
}
}
};
} else {
final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx);
return new LeafBucketCollectorBase(sub, values) { return new LeafBucketCollectorBase(sub, values) {
@Override @Override
public void collect(int doc, long bucket) throws IOException { public void collect(int doc, long bucket) throws IOException {
states = bigArrays.grow(states, bucket + 1); states = bigArrays.grow(states, bucket + 1);
if (values.advanceExact(doc)) { if (values.advanceExact(doc)) {
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket); TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
if (values.advanceExact(doc)) { if (values.advanceExact(doc)) {
@ -76,6 +92,7 @@ public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
} }
}; };
} }
}
private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) { private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {
states = bigArrays.grow(states, bucket + 1); states = bigArrays.grow(states, bucket + 1);

View File

@ -20,12 +20,12 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> { public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {
private final double compression; private final double compression;
BoxplotAggregatorFactory(String name, BoxplotAggregatorFactory(String name,
ValuesSourceConfig<ValuesSource.Numeric> config, ValuesSourceConfig<ValuesSource> config,
double compression, double compression,
QueryShardContext queryShardContext, QueryShardContext queryShardContext,
AggregatorFactory parent, AggregatorFactory parent,
@ -46,7 +46,7 @@ public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<Valu
} }
@Override @Override
protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource, protected Aggregator doCreateInternal(ValuesSource valuesSource,
SearchContext searchContext, SearchContext searchContext,
Aggregator parent, Aggregator parent,
boolean collectsFromSingleBucket, boolean collectsFromSingleBucket,

View File

@ -27,6 +27,8 @@ import org.elasticsearch.search.aggregations.metrics.PercentilesMethod;
import org.elasticsearch.search.aggregations.metrics.TDigestState; import org.elasticsearch.search.aggregations.metrics.TDigestState;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.xpack.analytics.AnalyticsPlugin; import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import org.elasticsearch.xpack.analytics.boxplot.Boxplot;
import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import java.util.ArrayList; import java.util.ArrayList;
@ -131,8 +133,7 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
} }
} }
public void testTDigestHistogram() throws Exception { private void setupTDigestHistogram(int compression) throws Exception {
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
.startObject() .startObject()
.startObject("_doc") .startObject("_doc")
@ -170,8 +171,6 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
PutMappingRequest request2 = new PutMappingRequest("pre_agg").type("_doc").source(xContentBuilder2); PutMappingRequest request2 = new PutMappingRequest("pre_agg").type("_doc").source(xContentBuilder2);
client().admin().indices().putMapping(request2).actionGet(); client().admin().indices().putMapping(request2).actionGet();
int compression = TestUtil.nextInt(random(), 200, 300);
TDigestState histogram = new TDigestState(compression); TDigestState histogram = new TDigestState(compression);
BulkRequest bulkRequest = new BulkRequest(); BulkRequest bulkRequest = new BulkRequest();
@ -218,6 +217,11 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
response = client().prepareSearch("pre_agg").get(); response = client().prepareSearch("pre_agg").get();
assertEquals(numDocs / frq, response.getHits().getTotalHits().value); assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
}
public void testTDigestHistogram() throws Exception {
int compression = TestUtil.nextInt(random(), 200, 300);
setupTDigestHistogram(compression);
PercentilesAggregationBuilder builder = PercentilesAggregationBuilder builder =
AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST) AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
@ -236,6 +240,31 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
} }
} }
public void testBoxplotHistogram() throws Exception {
int compression = TestUtil.nextInt(random(), 200, 300);
setupTDigestHistogram(compression);
BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression);
SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get();
SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get();
SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get();
Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg");
Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg");
Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg");
assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0);
assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0);
assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0);
assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0);
assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0);
assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0);
assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0);
assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0);
assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0);
assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0);
}
@Override @Override
protected Collection<Class<? extends Plugin>> getPlugins() { protected Collection<Class<? extends Plugin>> getPlugins() {
List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins()); List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());