Add histogram field type support to boxplot aggs (#52265)
Add support for the histogram field type to boxplot aggs. Closes #52233 Relates to #33112
This commit is contained in:
parent
c61124a7b9
commit
a66988281f
|
@ -45,6 +45,7 @@ include::metrics/valuecount-aggregation.asciidoc[]
|
|||
|
||||
include::metrics/median-absolute-deviation-aggregation.asciidoc[]
|
||||
|
||||
include::metrics/boxplot-aggregation.asciidoc[]
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
=== Boxplot Aggregation
|
||||
|
||||
A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
|
||||
These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
|
||||
These values can be generated by a provided script or extracted from specific numeric or
|
||||
<<histogram,histogram fields>> in the documents.
|
||||
|
||||
The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
|
||||
median, first quartile (25th percentile) and third quartile (75th percentile) values.
|
||||
|
|
|
@ -285,7 +285,7 @@ GET latency/_search
|
|||
|
||||
<1> Compression controls memory usage and approximation error
|
||||
|
||||
// tag::[t-digest]
|
||||
// tag::t-digest[]
|
||||
The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the
|
||||
more nodes available, the higher the accuracy (and large memory footprint) proportional
|
||||
to the volume of data. The `compression` parameter limits the maximum number of
|
||||
|
@ -301,7 +301,7 @@ A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large a
|
|||
of data which arrives sorted and in-order) the default settings will produce a
|
||||
TDigest roughly 64KB in size. In practice data tends to be more random and
|
||||
the TDigest will use less memory.
|
||||
// tag::[t-digest]
|
||||
// end::t-digest[]
|
||||
|
||||
==== HDR Histogram
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ following aggregations and queries:
|
|||
|
||||
* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
|
||||
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
|
||||
* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
|
||||
* <<query-dsl-exists-query,exists>> query
|
||||
|
||||
[[mapping-types-histogram-building-histogram]]
|
||||
|
|
|
@ -29,7 +29,7 @@ import java.util.Objects;
|
|||
|
||||
import static org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder.COMPRESSION_FIELD;
|
||||
|
||||
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric,
|
||||
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource,
|
||||
BoxplotAggregationBuilder> {
|
||||
public static final String NAME = "boxplot";
|
||||
|
||||
|
@ -37,7 +37,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
|
|||
|
||||
static {
|
||||
PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
|
||||
ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false);
|
||||
ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
|
||||
PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
|
||||
}
|
||||
|
||||
|
@ -98,7 +98,7 @@ public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.Le
|
|||
|
||||
@Override
|
||||
protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
|
||||
ValuesSourceConfig<ValuesSource.Numeric> config,
|
||||
ValuesSourceConfig<ValuesSource> config,
|
||||
AggregatorFactory parent,
|
||||
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
|
||||
return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);
|
||||
|
|
|
@ -11,6 +11,8 @@ import org.apache.lucene.search.ScoreMode;
|
|||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.ObjectArray;
|
||||
import org.elasticsearch.index.fielddata.HistogramValue;
|
||||
import org.elasticsearch.index.fielddata.HistogramValues;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
|
@ -29,12 +31,12 @@ import java.util.Map;
|
|||
|
||||
public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
|
||||
|
||||
private final ValuesSource.Numeric valuesSource;
|
||||
private final ValuesSource valuesSource;
|
||||
private final DocValueFormat format;
|
||||
protected ObjectArray<TDigestState> states;
|
||||
protected final double compression;
|
||||
|
||||
BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression,
|
||||
BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression,
|
||||
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, context, parent, pipelineAggregators, metaData);
|
||||
|
@ -58,23 +60,38 @@ public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {
|
|||
return LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
}
|
||||
final BigArrays bigArrays = context.bigArrays();
|
||||
final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
|
||||
return new LeafBucketCollectorBase(sub, values) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
states = bigArrays.grow(states, bucket + 1);
|
||||
|
||||
if (values.advanceExact(doc)) {
|
||||
if (valuesSource instanceof ValuesSource.Histogram) {
|
||||
final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx);
|
||||
return new LeafBucketCollectorBase(sub, values) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
|
||||
if (values.advanceExact(doc)) {
|
||||
final int valueCount = values.docValueCount();
|
||||
for (int i = 0; i < valueCount; i++) {
|
||||
state.add(values.nextValue());
|
||||
final HistogramValue sketch = values.histogram();
|
||||
while(sketch.next()) {
|
||||
state.add(sketch.value(), sketch.count());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
} else {
|
||||
final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx);
|
||||
return new LeafBucketCollectorBase(sub, values) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
states = bigArrays.grow(states, bucket + 1);
|
||||
if (values.advanceExact(doc)) {
|
||||
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
|
||||
if (values.advanceExact(doc)) {
|
||||
final int valueCount = values.docValueCount();
|
||||
for (int i = 0; i < valueCount; i++) {
|
||||
state.add(values.nextValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {
|
||||
|
|
|
@ -20,12 +20,12 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> {
|
||||
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {
|
||||
|
||||
private final double compression;
|
||||
|
||||
BoxplotAggregatorFactory(String name,
|
||||
ValuesSourceConfig<ValuesSource.Numeric> config,
|
||||
ValuesSourceConfig<ValuesSource> config,
|
||||
double compression,
|
||||
QueryShardContext queryShardContext,
|
||||
AggregatorFactory parent,
|
||||
|
@ -46,7 +46,7 @@ public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<Valu
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource,
|
||||
protected Aggregator doCreateInternal(ValuesSource valuesSource,
|
||||
SearchContext searchContext,
|
||||
Aggregator parent,
|
||||
boolean collectsFromSingleBucket,
|
||||
|
|
|
@ -27,6 +27,8 @@ import org.elasticsearch.search.aggregations.metrics.PercentilesMethod;
|
|||
import org.elasticsearch.search.aggregations.metrics.TDigestState;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
|
||||
import org.elasticsearch.xpack.analytics.boxplot.Boxplot;
|
||||
import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
|
||||
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -131,8 +133,7 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testTDigestHistogram() throws Exception {
|
||||
|
||||
private void setupTDigestHistogram(int compression) throws Exception {
|
||||
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.startObject("_doc")
|
||||
|
@ -170,8 +171,6 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
|||
PutMappingRequest request2 = new PutMappingRequest("pre_agg").type("_doc").source(xContentBuilder2);
|
||||
client().admin().indices().putMapping(request2).actionGet();
|
||||
|
||||
|
||||
int compression = TestUtil.nextInt(random(), 200, 300);
|
||||
TDigestState histogram = new TDigestState(compression);
|
||||
BulkRequest bulkRequest = new BulkRequest();
|
||||
|
||||
|
@ -218,6 +217,11 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
|||
|
||||
response = client().prepareSearch("pre_agg").get();
|
||||
assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
|
||||
}
|
||||
|
||||
public void testTDigestHistogram() throws Exception {
|
||||
int compression = TestUtil.nextInt(random(), 200, 300);
|
||||
setupTDigestHistogram(compression);
|
||||
|
||||
PercentilesAggregationBuilder builder =
|
||||
AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
|
||||
|
@ -236,6 +240,31 @@ public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testBoxplotHistogram() throws Exception {
|
||||
int compression = TestUtil.nextInt(random(), 200, 300);
|
||||
setupTDigestHistogram(compression);
|
||||
BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression);
|
||||
|
||||
SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get();
|
||||
SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get();
|
||||
SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get();
|
||||
|
||||
Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg");
|
||||
Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg");
|
||||
Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg");
|
||||
assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0);
|
||||
assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0);
|
||||
assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0);
|
||||
assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0);
|
||||
|
||||
assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0);
|
||||
assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0);
|
||||
assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0);
|
||||
assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0);
|
||||
assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0);
|
||||
assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> getPlugins() {
|
||||
List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());
|
||||
|
|
Loading…
Reference in New Issue