Add histogram field type support to boxplot aggs (#52265)
Add support for the histogram field type to boxplot aggs. Closes #52233 Relates to #33112
This commit is contained in:
parent
c61124a7b9
commit
a66988281f
|
@ -45,6 +45,7 @@ include::metrics/valuecount-aggregation.asciidoc[]
|
||||||
|
|
||||||
include::metrics/median-absolute-deviation-aggregation.asciidoc[]
|
include::metrics/median-absolute-deviation-aggregation.asciidoc[]
|
||||||
|
|
||||||
|
include::metrics/boxplot-aggregation.asciidoc[]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,8 @@
|
||||||
=== Boxplot Aggregation
|
=== Boxplot Aggregation
|
||||||
|
|
||||||
A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
|
A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
|
||||||
These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
|
These values can be generated by a provided script or extracted from specific numeric or
|
||||||
|
<<histogram,histogram fields>> in the documents.
|
||||||
|
|
||||||
The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
|
The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
|
||||||
median, first quartile (25th percentile) and third quartile (75th percentile) values.
|
median, first quartile (25th percentile) and third quartile (75th percentile) values.
|
||||||
|
|
|
@ -285,7 +285,7 @@ GET latency/_search
|
||||||
|
|
||||||
<1> Compression controls memory usage and approximation error
|
<1> Compression controls memory usage and approximation error
|
||||||
|
|
||||||
// tag::[t-digest]
|
// tag::t-digest[]
|
||||||
The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the
|
The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the
|
||||||
more nodes available, the higher the accuracy (and large memory footprint) proportional
|
more nodes available, the higher the accuracy (and large memory footprint) proportional
|
||||||
to the volume of data. The `compression` parameter limits the maximum number of
|
to the volume of data. The `compression` parameter limits the maximum number of
|
||||||
|
@ -301,7 +301,7 @@ A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large a
|
||||||
of data which arrives sorted and in-order) the default settings will produce a
|
of data which arrives sorted and in-order) the default settings will produce a
|
||||||
TDigest roughly 64KB in size. In practice data tends to be more random and
|
TDigest roughly 64KB in size. In practice data tends to be more random and
|
||||||
the TDigest will use less memory.
|
the TDigest will use less memory.
|
||||||
// tag::[t-digest]
|
// end::t-digest[]
|
||||||
|
|
||||||
==== HDR Histogram
|
==== HDR Histogram
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@ following aggregations and queries:
|
||||||
|
|
||||||
* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
|
* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
|
||||||
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
|
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
|
||||||
|
* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
|
||||||
* <<query-dsl-exists-query,exists>> query
|
* <<query-dsl-exists-query,exists>> query
|
||||||
|
|
||||||
[[mapping-types-histogram-building-histogram]]
|
[[mapping-types-histogram-building-histogram]]
|
||||||
|
|
|
@ -29,7 +29,7 @@ import java.util.Objects;
|
||||||
|
|
||||||
import static org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder.COMPRESSION_FIELD;
|
import static org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder.COMPRESSION_FIELD;
|
||||||
|
|
||||||
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric,
|
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource,
|
||||||
BoxplotAggregationBuilder> {
|
BoxplotAggregationBuilder> {
|
||||||
public static final String NAME = "boxplot";
|
public static final String NAME = "boxplot";
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
|
||||||
|
|
||||||
static {
|
static {
|
||||||
PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
|
PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
|
||||||
ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false);
|
ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
|
||||||
PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
|
PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
|
protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
|
||||||
ValuesSourceConfig<ValuesSource.Numeric> config,
|
ValuesSourceConfig<ValuesSource> config,
|
||||||
AggregatorFactory parent,
|
AggregatorFactory parent,
|
||||||
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
|
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
|
||||||
return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);
|
return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);
|
||||||
|
|
|
@ -11,6 +11,8 @@ import org.apache.lucene.search.ScoreMode;
|
||||||
import org.elasticsearch.common.lease.Releasables;
|
import org.elasticsearch.common.lease.Releasables;
|
||||||
import org.elasticsearch.common.util.BigArrays;
|
import org.elasticsearch.common.util.BigArrays;
|
||||||
import org.elasticsearch.common.util.ObjectArray;
|
import org.elasticsearch.common.util.ObjectArray;
|
||||||
|
import org.elasticsearch.index.fielddata.HistogramValue;
|
||||||
|
import org.elasticsearch.index.fielddata.HistogramValues;
|
||||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||||
import org.elasticsearch.search.DocValueFormat;
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator;
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
|
@ -29,12 +31,12 @@ import java.util.Map;
|
||||||
|
|
||||||
public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
|
public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
|
||||||
|
|
||||||
private final ValuesSource.Numeric valuesSource;
|
private final ValuesSource valuesSource;
|
||||||
private final DocValueFormat format;
|
private final DocValueFormat format;
|
||||||
protected ObjectArray<TDigestState> states;
|
protected ObjectArray<TDigestState> states;
|
||||||
protected final double compression;
|
protected final double compression;
|
||||||
|
|
||||||
BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression,
|
BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression,
|
||||||
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
|
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
|
||||||
Map<String, Object> metaData) throws IOException {
|
Map<String, Object> metaData) throws IOException {
|
||||||
super(name, context, parent, pipelineAggregators, metaData);
|
super(name, context, parent, pipelineAggregators, metaData);
|
||||||
|
@ -58,12 +60,26 @@ public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
|
||||||
return LeafBucketCollector.NO_OP_COLLECTOR;
|
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||||
}
|
}
|
||||||
final BigArrays bigArrays = context.bigArrays();
|
final BigArrays bigArrays = context.bigArrays();
|
||||||
final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
|
if (valuesSource instanceof ValuesSource.Histogram) {
|
||||||
|
final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx);
|
||||||
|
return new LeafBucketCollectorBase(sub, values) {
|
||||||
|
@Override
|
||||||
|
public void collect(int doc, long bucket) throws IOException {
|
||||||
|
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
|
||||||
|
if (values.advanceExact(doc)) {
|
||||||
|
final HistogramValue sketch = values.histogram();
|
||||||
|
while(sketch.next()) {
|
||||||
|
state.add(sketch.value(), sketch.count());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx);
|
||||||
return new LeafBucketCollectorBase(sub, values) {
|
return new LeafBucketCollectorBase(sub, values) {
|
||||||
@Override
|
@Override
|
||||||
public void collect(int doc, long bucket) throws IOException {
|
public void collect(int doc, long bucket) throws IOException {
|
||||||
states = bigArrays.grow(states, bucket + 1);
|
states = bigArrays.grow(states, bucket + 1);
|
||||||
|
|
||||||
if (values.advanceExact(doc)) {
|
if (values.advanceExact(doc)) {
|
||||||
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
|
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
|
||||||
if (values.advanceExact(doc)) {
|
if (values.advanceExact(doc)) {
|
||||||
|
@ -76,6 +92,7 @@ public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {
|
private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {
|
||||||
states = bigArrays.grow(states, bucket + 1);
|
states = bigArrays.grow(states, bucket + 1);
|
||||||
|
|
|
@ -20,12 +20,12 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> {
|
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {
|
||||||
|
|
||||||
private final double compression;
|
private final double compression;
|
||||||
|
|
||||||
BoxplotAggregatorFactory(String name,
|
BoxplotAggregatorFactory(String name,
|
||||||
ValuesSourceConfig<ValuesSource.Numeric> config,
|
ValuesSourceConfig<ValuesSource> config,
|
||||||
double compression,
|
double compression,
|
||||||
QueryShardContext queryShardContext,
|
QueryShardContext queryShardContext,
|
||||||
AggregatorFactory parent,
|
AggregatorFactory parent,
|
||||||
|
@ -46,7 +46,7 @@ public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<Valu
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource,
|
protected Aggregator doCreateInternal(ValuesSource valuesSource,
|
||||||
SearchContext searchContext,
|
SearchContext searchContext,
|
||||||
Aggregator parent,
|
Aggregator parent,
|
||||||
boolean collectsFromSingleBucket,
|
boolean collectsFromSingleBucket,
|
||||||
|
|
|
@ -27,6 +27,8 @@ import org.elasticsearch.search.aggregations.metrics.PercentilesMethod;
|
||||||
import org.elasticsearch.search.aggregations.metrics.TDigestState;
|
import org.elasticsearch.search.aggregations.metrics.TDigestState;
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||||
|
import org.elasticsearch.xpack.analytics.boxplot.Boxplot;
|
||||||
|
import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
|
||||||
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
|
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -131,8 +133,7 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTDigestHistogram() throws Exception {
|
private void setupTDigestHistogram(int compression) throws Exception {
|
||||||
|
|
||||||
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
|
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
|
||||||
.startObject()
|
.startObject()
|
||||||
.startObject("_doc")
|
.startObject("_doc")
|
||||||
|
@ -170,8 +171,6 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
||||||
PutMappingRequest request2 = new PutMappingRequest("pre_agg").type("_doc").source(xContentBuilder2);
|
PutMappingRequest request2 = new PutMappingRequest("pre_agg").type("_doc").source(xContentBuilder2);
|
||||||
client().admin().indices().putMapping(request2).actionGet();
|
client().admin().indices().putMapping(request2).actionGet();
|
||||||
|
|
||||||
|
|
||||||
int compression = TestUtil.nextInt(random(), 200, 300);
|
|
||||||
TDigestState histogram = new TDigestState(compression);
|
TDigestState histogram = new TDigestState(compression);
|
||||||
BulkRequest bulkRequest = new BulkRequest();
|
BulkRequest bulkRequest = new BulkRequest();
|
||||||
|
|
||||||
|
@ -218,6 +217,11 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
response = client().prepareSearch("pre_agg").get();
|
response = client().prepareSearch("pre_agg").get();
|
||||||
assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
|
assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTDigestHistogram() throws Exception {
|
||||||
|
int compression = TestUtil.nextInt(random(), 200, 300);
|
||||||
|
setupTDigestHistogram(compression);
|
||||||
|
|
||||||
PercentilesAggregationBuilder builder =
|
PercentilesAggregationBuilder builder =
|
||||||
AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
|
AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
|
||||||
|
@ -236,6 +240,31 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBoxplotHistogram() throws Exception {
|
||||||
|
int compression = TestUtil.nextInt(random(), 200, 300);
|
||||||
|
setupTDigestHistogram(compression);
|
||||||
|
BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression);
|
||||||
|
|
||||||
|
SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get();
|
||||||
|
SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get();
|
||||||
|
SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get();
|
||||||
|
|
||||||
|
Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg");
|
||||||
|
Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg");
|
||||||
|
Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg");
|
||||||
|
assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0);
|
||||||
|
assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0);
|
||||||
|
assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0);
|
||||||
|
assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0);
|
||||||
|
|
||||||
|
assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0);
|
||||||
|
assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0);
|
||||||
|
assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0);
|
||||||
|
assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0);
|
||||||
|
assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0);
|
||||||
|
assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Collection<Class<? extends Plugin>> getPlugins() {
|
protected Collection<Class<? extends Plugin>> getPlugins() {
|
||||||
List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());
|
List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());
|
||||||
|
|
Loading…
Reference in New Issue