From e62aaa928efebf20e40b0e3f83d563e2f41555d3 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 24 Jul 2015 17:59:36 +0200 Subject: [PATCH 1/2] Tests: Fix TermVectorsUnitTests to not rely on HashMap iteration order. --- .../action/termvectors/TermVectorsUnitTests.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/action/termvectors/TermVectorsUnitTests.java b/core/src/test/java/org/elasticsearch/action/termvectors/TermVectorsUnitTests.java index 49f896c748d..d48473d8507 100644 --- a/core/src/test/java/org/elasticsearch/action/termvectors/TermVectorsUnitTests.java +++ b/core/src/test/java/org/elasticsearch/action/termvectors/TermVectorsUnitTests.java @@ -49,6 +49,7 @@ import org.junit.Test; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.Arrays; import java.util.EnumSet; import java.util.HashSet; import java.util.Set; @@ -335,11 +336,11 @@ public class TermVectorsUnitTests extends ElasticsearchTestCase { } void checkParsedFilterParameters(MultiTermVectorsRequest multiRequest) { - int id = 1; + Set ids = new HashSet<>(Arrays.asList("1", "2")); for (TermVectorsRequest request : multiRequest.requests) { assertThat(request.index(), equalTo("testidx")); assertThat(request.type(), equalTo("test")); - assertThat(request.id(), equalTo(id+"")); + assertTrue(ids.remove(request.id())); assertNotNull(request.filterSettings()); assertThat(request.filterSettings().maxNumTerms, equalTo(20)); assertThat(request.filterSettings().minTermFreq, equalTo(1)); @@ -348,7 +349,7 @@ public class TermVectorsUnitTests extends ElasticsearchTestCase { assertThat(request.filterSettings().maxDocFreq, equalTo(20)); assertThat(request.filterSettings().minWordLength, equalTo(1)); assertThat(request.filterSettings().maxWordLength, equalTo(20)); - id++; } + assertTrue(ids.isEmpty()); } } From 3e0532a0c549c1ffb52f34307e24b2662819b75d Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Mon, 20 Jul 2015 12:23:21 +0100 Subject: [PATCH 2/2] Aggregations: Add HDRHistogram as an option in percentiles and percentile_ranks aggregations HDRHistogram has been added as an option in the percentiles and percentile_ranks aggregation. It has one option `number_significant_digits` which controls the accuracy and memory size for the algorithm Closes #8324 --- core/licenses/HdrHistogram-2.1.5.jar.sha1 | 1 + core/licenses/HdrHistogram-LICENSE.txt | 41 ++ core/licenses/HdrHistogram-NOTICE.txt | 0 core/pom.xml | 4 + core/src/main/assemblies/common-bin.xml | 1 + .../TransportAggregationModule.java | 14 +- .../AbstractPercentilesBuilder.java | 87 +++ .../AbstractPercentilesParser.java | 115 +++- .../percentiles/InternalPercentile.java | 4 +- .../metrics/percentiles/PercentileRanks.java | 2 + .../percentiles/PercentileRanksBuilder.java | 23 +- .../percentiles/PercentileRanksParser.java | 26 +- .../metrics/percentiles/Percentiles.java | 2 + .../percentiles/PercentilesBuilder.java | 29 +- .../percentiles/PercentilesMethod.java | 61 +++ .../percentiles/PercentilesParser.java | 25 +- .../hdr/AbstractHDRPercentilesAggregator.java | 125 +++++ .../hdr/AbstractInternalHDRPercentiles.java | 144 +++++ .../hdr/HDRPercentileRanksAggregator.java | 106 ++++ .../hdr/HDRPercentilesAggregator.java | 108 ++++ .../hdr/InternalHDRPercentileRanks.java | 133 +++++ .../hdr/InternalHDRPercentiles.java | 123 +++++ .../AbstractInternalTDigestPercentiles.java} | 19 +- ...AbstractTDigestPercentilesAggregator.java} | 7 +- .../InternalTDigestPercentileRanks.java} | 22 +- .../InternalTDigestPercentiles.java} | 22 +- .../TDigestPercentileRanksAggregator.java} | 19 +- .../TDigestPercentilesAggregator.java} | 17 +- .../HDRPercentilesAggregationBenchmark.java | 158 ++++++ .../metrics/HDRPercentileRanksTests.java | 502 ++++++++++++++++++ .../metrics/HDRPercentilesTests.java | 489 +++++++++++++++++ ....java => TDigestPercentileRanksTests.java} | 4 +- ...ests.java => TDigestPercentilesTests.java} | 4 +- .../metrics/percentile-aggregation.asciidoc | 34 ++ .../percentile-rank-aggregation.asciidoc | 34 ++ pom.xml | 6 + 36 files changed, 2380 insertions(+), 131 deletions(-) create mode 100644 core/licenses/HdrHistogram-2.1.5.jar.sha1 create mode 100644 core/licenses/HdrHistogram-LICENSE.txt create mode 100644 core/licenses/HdrHistogram-NOTICE.txt create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesBuilder.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesMethod.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractHDRPercentilesAggregator.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractInternalHDRPercentiles.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentileRanksAggregator.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentilesAggregator.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentileRanks.java create mode 100644 core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentiles.java rename core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/{AbstractInternalPercentiles.java => tdigest/AbstractInternalTDigestPercentiles.java} (85%) rename core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/{AbstractPercentilesAggregator.java => tdigest/AbstractTDigestPercentilesAggregator.java} (93%) rename core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/{InternalPercentileRanks.java => tdigest/InternalTDigestPercentileRanks.java} (76%) rename core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/{InternalPercentiles.java => tdigest/InternalTDigestPercentiles.java} (76%) rename core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/{PercentileRanksAggregator.java => tdigest/TDigestPercentileRanksAggregator.java} (78%) rename core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/{PercentilesAggregator.java => tdigest/TDigestPercentilesAggregator.java} (81%) create mode 100644 core/src/test/java/org/elasticsearch/benchmark/search/aggregations/HDRPercentilesAggregationBenchmark.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesTests.java rename core/src/test/java/org/elasticsearch/search/aggregations/metrics/{PercentileRanksTests.java => TDigestPercentileRanksTests.java} (99%) rename core/src/test/java/org/elasticsearch/search/aggregations/metrics/{PercentilesTests.java => TDigestPercentilesTests.java} (99%) diff --git a/core/licenses/HdrHistogram-2.1.5.jar.sha1 b/core/licenses/HdrHistogram-2.1.5.jar.sha1 new file mode 100644 index 00000000000..8a74670eab9 --- /dev/null +++ b/core/licenses/HdrHistogram-2.1.5.jar.sha1 @@ -0,0 +1 @@ +d1831874fb3c769fd126c4826e69e6b40c703ee0 diff --git a/core/licenses/HdrHistogram-LICENSE.txt b/core/licenses/HdrHistogram-LICENSE.txt new file mode 100644 index 00000000000..c9473ed5fa2 --- /dev/null +++ b/core/licenses/HdrHistogram-LICENSE.txt @@ -0,0 +1,41 @@ +The code in this repository code was Written by Gil Tene, Michael Barker, +and Matt Warren, and released to the public domain, as explained at +http://creativecommons.org/publicdomain/zero/1.0/ + +For users of this code who wish to consume it under the "BSD" license +rather than under the public domain or CC0 contribution text mentioned +above, the code found under this directory is *also* provided under the +following license (commonly referred to as the BSD 2-Clause License). This +license does not detract from the above stated release of the code into +the public domain, and simply represents an additional license granted by +the Author. + +----------------------------------------------------------------------------- +** Beginning of "BSD 2-Clause License" text. ** + + Copyright (c) 2012, 2013, 2014 Gil Tene + Copyright (c) 2014 Michael Barker + Copyright (c) 2014 Matt Warren + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/core/licenses/HdrHistogram-NOTICE.txt b/core/licenses/HdrHistogram-NOTICE.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/pom.xml b/core/pom.xml index 8e3ef26e961..b7383d3783a 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -178,6 +178,10 @@ com.tdunning t-digest + + org.hdrhistogram + HdrHistogram + org.apache.commons commons-lang3 diff --git a/core/src/main/assemblies/common-bin.xml b/core/src/main/assemblies/common-bin.xml index dafd901aecd..d40299497f8 100644 --- a/core/src/main/assemblies/common-bin.xml +++ b/core/src/main/assemblies/common-bin.xml @@ -26,6 +26,7 @@ org.apache.commons:commons-lang3 commons-cli:commons-cli com.twitter:jsr166e + org.hdrhistogram:HdrHistogram diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java b/core/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java index ae4684dc2e0..00d2aac533b 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java @@ -51,8 +51,10 @@ import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardina import org.elasticsearch.search.aggregations.metrics.geobounds.InternalGeoBounds; import org.elasticsearch.search.aggregations.metrics.max.InternalMax; import org.elasticsearch.search.aggregations.metrics.min.InternalMin; -import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentileRanks; -import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentiles; +import org.elasticsearch.search.aggregations.metrics.percentiles.hdr.InternalHDRPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.percentiles.hdr.InternalHDRPercentiles; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.InternalTDigestPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.InternalTDigestPercentiles; import org.elasticsearch.search.aggregations.metrics.scripted.InternalScriptedMetric; import org.elasticsearch.search.aggregations.metrics.stats.InternalStats; import org.elasticsearch.search.aggregations.metrics.stats.extended.InternalExtendedStats; @@ -65,8 +67,8 @@ import org.elasticsearch.search.aggregations.pipeline.bucketmetrics.avg.AvgBucke import org.elasticsearch.search.aggregations.pipeline.bucketmetrics.max.MaxBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.bucketmetrics.min.MinBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.bucketmetrics.sum.SumBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.cumulativesum.CumulativeSumPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.bucketscript.BucketScriptPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.cumulativesum.CumulativeSumPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.derivative.DerivativePipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.derivative.InternalDerivative; import org.elasticsearch.search.aggregations.pipeline.having.BucketSelectorPipelineAggregator; @@ -90,8 +92,10 @@ public class TransportAggregationModule extends AbstractModule implements SpawnM InternalStats.registerStreams(); InternalExtendedStats.registerStreams(); InternalValueCount.registerStreams(); - InternalPercentiles.registerStreams(); - InternalPercentileRanks.registerStreams(); + InternalTDigestPercentiles.registerStreams(); + InternalTDigestPercentileRanks.registerStreams(); + InternalHDRPercentiles.registerStreams(); + InternalHDRPercentileRanks.registerStreams(); InternalCardinality.registerStreams(); InternalScriptedMetric.registerStreams(); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesBuilder.java new file mode 100644 index 00000000000..c587d6423db --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesBuilder.java @@ -0,0 +1,87 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.metrics.percentiles; + +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.aggregations.metrics.ValuesSourceMetricsAggregationBuilder; + +import java.io.IOException; + +abstract class AbstractPercentilesBuilder> extends + ValuesSourceMetricsAggregationBuilder { + + private Double compression; + private PercentilesMethod method; + private Integer numberOfSignificantValueDigits; + + public AbstractPercentilesBuilder(String name, String type) { + super(name, type); + } + + /** + * Expert: Set the method to use to compute the percentiles. + */ + public PB method(PercentilesMethod method) { + this.method = method; + return (PB) this; + } + + /** + * Expert: set the compression. Higher values improve accuracy but also + * memory usage. Only relevant when using {@link PercentilesMethod#TDIGEST}. + */ + public PB compression(double compression) { + this.compression = compression; + return (PB) this; + } + + /** + * Expert: set the number of significant digits in the values. Only relevant + * when using {@link PercentilesMethod#HDR}. + */ + public PB numberOfSignificantValueDigits(int numberOfSignificantValueDigits) { + this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; + return (PB) this; + } + + @Override + protected void internalXContent(XContentBuilder builder, Params params) throws IOException { + super.internalXContent(builder, params); + + doInternalXContent(builder, params); + + if (method != null) { + builder.startObject(method.getName()); + + if (compression != null) { + builder.field(AbstractPercentilesParser.COMPRESSION_FIELD.getPreferredName(), compression); + } + + if (numberOfSignificantValueDigits != null) { + builder.field(AbstractPercentilesParser.NUMBER_SIGNIFICANT_DIGITS_FIELD.getPreferredName(), numberOfSignificantValueDigits); + } + + builder.endObject(); + } + } + + protected abstract void doInternalXContent(XContentBuilder builder, Params params) throws IOException; + +} \ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesParser.java index 8d12c9bf1a7..e264cf694dc 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesParser.java @@ -21,10 +21,12 @@ package org.elasticsearch.search.aggregations.metrics.percentiles; import com.carrotsearch.hppc.DoubleArrayList; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactory; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.InternalTDigestPercentiles; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; @@ -36,6 +38,11 @@ import java.util.Arrays; public abstract class AbstractPercentilesParser implements Aggregator.Parser { + public static final ParseField KEYED_FIELD = new ParseField("keyed"); + public static final ParseField METHOD_FIELD = new ParseField("method"); + public static final ParseField COMPRESSION_FIELD = new ParseField("compression"); + public static final ParseField NUMBER_SIGNIFICANT_DIGITS_FIELD = new ParseField("number_of_significant_value_digits"); + private boolean formattable; public AbstractPercentilesParser(boolean formattable) { @@ -44,14 +51,16 @@ public abstract class AbstractPercentilesParser implements Aggregator.Parser { @Override public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - - ValuesSourceParser vsParser = ValuesSourceParser.numeric(aggregationName, InternalPercentiles.TYPE, context) + + ValuesSourceParser vsParser = ValuesSourceParser.numeric(aggregationName, InternalTDigestPercentiles.TYPE, context) .formattable(formattable).build(); - + double[] keys = null; boolean keyed = true; - double compression = 100; - + Double compression = null; + Integer numberOfSignificantValueDigits = null; + PercentilesMethod method = null; + XContentParser.Token token; String currentFieldName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { @@ -60,7 +69,7 @@ public abstract class AbstractPercentilesParser implements Aggregator.Parser { } else if (vsParser.token(currentFieldName, token, parser)) { continue; } else if (token == XContentParser.Token.START_ARRAY) { - if (keysFieldName().equals(currentFieldName)) { + if (context.parseFieldMatcher().match(currentFieldName, keysField())) { DoubleArrayList values = new DoubleArrayList(10); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { double value = parser.doubleValue(); @@ -73,30 +82,104 @@ public abstract class AbstractPercentilesParser implements Aggregator.Parser { + currentFieldName + "].", parser.getTokenLocation()); } } else if (token == XContentParser.Token.VALUE_BOOLEAN) { - if ("keyed".equals(currentFieldName)) { + if (context.parseFieldMatcher().match(currentFieldName, KEYED_FIELD)) { keyed = parser.booleanValue(); } else { throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].", parser.getTokenLocation()); } - } else if (token == XContentParser.Token.VALUE_NUMBER) { - if ("compression".equals(currentFieldName)) { - compression = parser.doubleValue(); + } else if (token == XContentParser.Token.START_OBJECT) { + if (method != null) { + throw new SearchParseException(context, "Found multiple methods in [" + aggregationName + "]: [" + currentFieldName + + "]. only one of [" + PercentilesMethod.TDIGEST.getName() + "] and [" + PercentilesMethod.HDR.getName() + + "] may be used.", parser.getTokenLocation()); + } + method = PercentilesMethod.resolveFromName(currentFieldName); + if (method == null) { + throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "].", + parser.getTokenLocation()); } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" - + currentFieldName + "].", parser.getTokenLocation()); + switch (method) { + case TDIGEST: + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.VALUE_NUMBER) { + if (context.parseFieldMatcher().match(currentFieldName, COMPRESSION_FIELD)) { + compression = parser.doubleValue(); + } else { + throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + + "]: [" + currentFieldName + "].", parser.getTokenLocation()); + } + } else { + throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + + currentFieldName + "].", parser.getTokenLocation()); + } + } + break; + case HDR: + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.VALUE_NUMBER) { + if (context.parseFieldMatcher().match(currentFieldName, NUMBER_SIGNIFICANT_DIGITS_FIELD)) { + numberOfSignificantValueDigits = parser.intValue(); + } else { + throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + + "]: [" + currentFieldName + "].", parser.getTokenLocation()); + } + } else { + throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + + currentFieldName + "].", parser.getTokenLocation()); + } + } + break; + } } } else { throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "].", parser.getTokenLocation()); } } - return buildFactory(context, aggregationName, vsParser.config(), keys, compression, keyed); + + if (method == null) { + method = PercentilesMethod.TDIGEST; + } + + switch (method) { + case TDIGEST: + if (numberOfSignificantValueDigits != null) { + throw new SearchParseException(context, "[number_of_significant_value_digits] cannot be used with method [tdigest] in [" + + aggregationName + "].", parser.getTokenLocation()); + } + if (compression == null) { + compression = 100.0; + } + break; + case HDR: + if (compression != null) { + throw new SearchParseException(context, "[compression] cannot be used with method [hdr] in [" + aggregationName + "].", + parser.getTokenLocation()); + } + if (numberOfSignificantValueDigits == null) { + numberOfSignificantValueDigits = 3; + } + break; + + default: + // Shouldn't get here but if we do, throw a parse exception for + // invalid method + throw new SearchParseException(context, "Unknown value for [" + currentFieldName + "] in [" + aggregationName + "]: [" + method + + "].", parser.getTokenLocation()); + } + + return buildFactory(context, aggregationName, vsParser.config(), keys, method, compression, + numberOfSignificantValueDigits, keyed); } - protected abstract AggregatorFactory buildFactory(SearchContext context, String aggregationName, ValuesSourceConfig config, double[] cdfValues, - double compression, boolean keyed); + protected abstract AggregatorFactory buildFactory(SearchContext context, String aggregationName, ValuesSourceConfig config, + double[] cdfValues, PercentilesMethod method, Double compression, Integer numberOfSignificantValueDigits, boolean keyed); - protected abstract String keysFieldName(); + protected abstract ParseField keysField(); } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentile.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentile.java index 62223fc24ab..bb8876d82fd 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentile.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentile.java @@ -19,12 +19,12 @@ package org.elasticsearch.search.aggregations.metrics.percentiles; -class InternalPercentile implements Percentile { +public class InternalPercentile implements Percentile { private final double percent; private final double value; - InternalPercentile(double percent, double value) { + public InternalPercentile(double percent, double value) { this.percent = percent; this.value = value; } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanks.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanks.java index a0f04c91d37..19ad169b8f7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanks.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanks.java @@ -26,6 +26,8 @@ import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation; */ public interface PercentileRanks extends NumericMetricsAggregation.MultiValue, Iterable { + public static final String TYPE_NAME = "percentile_ranks"; + /** * Return the percentile for the given value. */ diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksBuilder.java index b2941870f3e..abb9bc5bf81 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksBuilder.java @@ -19,23 +19,21 @@ package org.elasticsearch.search.aggregations.metrics.percentiles; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.search.aggregations.metrics.ValuesSourceMetricsAggregationBuilder; import java.io.IOException; /** * Builder for the {@link PercentileRanks} aggregation. */ -public class PercentileRanksBuilder extends ValuesSourceMetricsAggregationBuilder { +public class PercentileRanksBuilder extends AbstractPercentilesBuilder { private double[] values; - private Double compression; /** * Sole constructor. */ public PercentileRanksBuilder(String name) { - super(name, InternalPercentileRanks.TYPE.name()); + super(name, PercentileRanks.TYPE_NAME); } /** @@ -46,24 +44,11 @@ public class PercentileRanksBuilder extends ValuesSourceMetricsAggregationBuilde return this; } - /** - * Expert: set the compression. Higher values improve accuracy but also memory usage. - */ - public PercentileRanksBuilder compression(double compression) { - this.compression = compression; - return this; - } - @Override - protected void internalXContent(XContentBuilder builder, Params params) throws IOException { - super.internalXContent(builder, params); + protected void doInternalXContent(XContentBuilder builder, Params params) throws IOException { if (values != null) { - builder.field("values", values); - } - - if (compression != null) { - builder.field("compression", compression); + builder.field(PercentileRanksParser.VALUES_FIELD.getPreferredName(), values); } } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksParser.java index 7d27c5556da..51e900a2ce7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksParser.java @@ -18,8 +18,12 @@ */ package org.elasticsearch.search.aggregations.metrics.percentiles; +import org.elasticsearch.common.ParseField; import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.aggregations.AggregatorFactory; +import org.elasticsearch.search.aggregations.metrics.percentiles.hdr.HDRPercentileRanksAggregator; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.InternalTDigestPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestPercentileRanksAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -29,26 +33,36 @@ import org.elasticsearch.search.internal.SearchContext; */ public class PercentileRanksParser extends AbstractPercentilesParser { + public static final ParseField VALUES_FIELD = new ParseField("values"); + public PercentileRanksParser() { super(false); } @Override public String type() { - return InternalPercentileRanks.TYPE.name(); + return InternalTDigestPercentileRanks.TYPE.name(); } @Override - protected String keysFieldName() { - return "values"; + protected ParseField keysField() { + return VALUES_FIELD; } - + @Override - protected AggregatorFactory buildFactory(SearchContext context, String aggregationName, ValuesSourceConfig valuesSourceConfig, double[] keys, double compression, boolean keyed) { + protected AggregatorFactory buildFactory(SearchContext context, String aggregationName, ValuesSourceConfig valuesSourceConfig, + double[] keys, PercentilesMethod method, Double compression, Integer numberOfSignificantValueDigits, boolean keyed) { if (keys == null) { throw new SearchParseException(context, "Missing token values in [" + aggregationName + "].", null); } - return new PercentileRanksAggregator.Factory(aggregationName, valuesSourceConfig, keys, compression, keyed); + if (method == PercentilesMethod.TDIGEST) { + return new TDigestPercentileRanksAggregator.Factory(aggregationName, valuesSourceConfig, keys, compression, keyed); + } else if (method == PercentilesMethod.HDR) { + return new HDRPercentileRanksAggregator.Factory(aggregationName, valuesSourceConfig, keys, numberOfSignificantValueDigits, + keyed); + } else { + throw new AssertionError(); + } } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/Percentiles.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/Percentiles.java index b753dd1cf5d..30fdb3f0002 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/Percentiles.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/Percentiles.java @@ -25,6 +25,8 @@ import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation; */ public interface Percentiles extends NumericMetricsAggregation.MultiValue, Iterable { + public static final String TYPE_NAME = "percentiles"; + /** * Return the value associated with the provided percentile. */ diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesBuilder.java index e1ab721a7af..399f9eabe20 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesBuilder.java @@ -19,23 +19,21 @@ package org.elasticsearch.search.aggregations.metrics.percentiles; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.search.aggregations.metrics.ValuesSourceMetricsAggregationBuilder; import java.io.IOException; + /** * Builder for the {@link Percentiles} aggregation. */ -public class PercentilesBuilder extends ValuesSourceMetricsAggregationBuilder { - - private double[] percentiles; - private Double compression; +public class PercentilesBuilder extends AbstractPercentilesBuilder { + double[] percentiles; /** * Sole constructor. */ public PercentilesBuilder(String name) { - super(name, InternalPercentiles.TYPE.name()); + super(name, Percentiles.TYPE_NAME); } /** @@ -52,24 +50,11 @@ public class PercentilesBuilder extends ValuesSourceMetricsAggregationBuildernull if no {@link PercentilesMethod} exists for the name. + */ + public static PercentilesMethod resolveFromName(String name) { + for (PercentilesMethod method : values()) { + if (method.name.equalsIgnoreCase(name)) { + return method; + } + } + return null; + } +} \ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesParser.java index 5560856327c..6fbb2ccad90 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesParser.java @@ -18,7 +18,11 @@ */ package org.elasticsearch.search.aggregations.metrics.percentiles; +import org.elasticsearch.common.ParseField; import org.elasticsearch.search.aggregations.AggregatorFactory; +import org.elasticsearch.search.aggregations.metrics.percentiles.hdr.HDRPercentilesAggregator; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestPercentilesAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -28,6 +32,8 @@ import org.elasticsearch.search.internal.SearchContext; */ public class PercentilesParser extends AbstractPercentilesParser { + public static final ParseField PERCENTS_FIELD = new ParseField("percents"); + public PercentilesParser() { super(true); } @@ -36,20 +42,27 @@ public class PercentilesParser extends AbstractPercentilesParser { @Override public String type() { - return InternalPercentiles.TYPE.name(); + return InternalTDigestPercentiles.TYPE.name(); } @Override - protected String keysFieldName() { - return "percents"; + protected ParseField keysField() { + return PERCENTS_FIELD; } - + @Override - protected AggregatorFactory buildFactory(SearchContext context, String aggregationName, ValuesSourceConfig valuesSourceConfig, double[] keys, double compression, boolean keyed) { + protected AggregatorFactory buildFactory(SearchContext context, String aggregationName, ValuesSourceConfig valuesSourceConfig, + double[] keys, PercentilesMethod method, Double compression, Integer numberOfSignificantValueDigits, boolean keyed) { if (keys == null) { keys = DEFAULT_PERCENTS; } - return new PercentilesAggregator.Factory(aggregationName, valuesSourceConfig, keys, compression, keyed); + if (method == PercentilesMethod.TDIGEST) { + return new TDigestPercentilesAggregator.Factory(aggregationName, valuesSourceConfig, keys, compression, keyed); + } else if (method == PercentilesMethod.HDR) { + return new HDRPercentilesAggregator.Factory(aggregationName, valuesSourceConfig, keys, numberOfSignificantValueDigits, keyed); + } else { + throw new AssertionError(); + } } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractHDRPercentilesAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractHDRPercentilesAggregator.java new file mode 100644 index 00000000000..be513a2e766 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractHDRPercentilesAggregator.java @@ -0,0 +1,125 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.metrics.percentiles.hdr; + +import org.HdrHistogram.DoubleHistogram; +import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.util.ArrayUtils; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; +import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.format.ValueFormatter; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +public abstract class AbstractHDRPercentilesAggregator extends NumericMetricsAggregator.MultiValue { + + private static int indexOfKey(double[] keys, double key) { + return ArrayUtils.binarySearch(keys, key, 0.001); + } + + protected final double[] keys; + protected final ValuesSource.Numeric valuesSource; + protected final ValueFormatter formatter; + protected ObjectArray states; + protected final int numberOfSignificantValueDigits; + protected final boolean keyed; + + public AbstractHDRPercentilesAggregator(String name, ValuesSource.Numeric valuesSource, AggregationContext context, Aggregator parent, + double[] keys, int numberOfSignificantValueDigits, boolean keyed, ValueFormatter formatter, + List pipelineAggregators, Map metaData) throws IOException { + super(name, context, parent, pipelineAggregators, metaData); + this.valuesSource = valuesSource; + this.keyed = keyed; + this.formatter = formatter; + this.states = context.bigArrays().newObjectArray(1); + this.keys = keys; + this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; + } + + @Override + public boolean needsScores() { + return valuesSource != null && valuesSource.needsScores(); + } + + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, + final LeafBucketCollector sub) throws IOException { + if (valuesSource == null) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } + final BigArrays bigArrays = context.bigArrays(); + final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + states = bigArrays.grow(states, bucket + 1); + + DoubleHistogram state = states.get(bucket); + if (state == null) { + state = new DoubleHistogram(numberOfSignificantValueDigits); + // Set the histogram to autosize so it can resize itself as + // the data range increases. Resize operations should be + // rare as the histogram buckets are exponential (on the top + // level). In the future we could expose the range as an + // option on the request so the histogram can be fixed at + // initialisation and doesn't need resizing. + state.setAutoResize(true); + states.set(bucket, state); + } + + values.setDocument(doc); + final int valueCount = values.count(); + for (int i = 0; i < valueCount; i++) { + state.recordValue(values.valueAt(i)); + } + } + }; + } + + @Override + public boolean hasMetric(String name) { + return indexOfKey(keys, Double.parseDouble(name)) >= 0; + } + + protected DoubleHistogram getState(long bucketOrd) { + if (bucketOrd >= states.size()) { + return null; + } + final DoubleHistogram state = states.get(bucketOrd); + return state; + } + + @Override + protected void doClose() { + Releasables.close(states); + } + +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractInternalHDRPercentiles.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractInternalHDRPercentiles.java new file mode 100644 index 00000000000..0549774ec5a --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractInternalHDRPercentiles.java @@ -0,0 +1,144 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.metrics.percentiles.hdr; + +import org.HdrHistogram.DoubleHistogram; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.metrics.InternalNumericMetricsAggregation; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.format.ValueFormatter; +import org.elasticsearch.search.aggregations.support.format.ValueFormatterStreams; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.zip.DataFormatException; + +abstract class AbstractInternalHDRPercentiles extends InternalNumericMetricsAggregation.MultiValue { + + protected double[] keys; + protected DoubleHistogram state; + private boolean keyed; + + AbstractInternalHDRPercentiles() {} // for serialization + + public AbstractInternalHDRPercentiles(String name, double[] keys, DoubleHistogram state, boolean keyed, ValueFormatter formatter, + List pipelineAggregators, + Map metaData) { + super(name, pipelineAggregators, metaData); + this.keys = keys; + this.state = state; + this.keyed = keyed; + this.valueFormatter = formatter; + } + + @Override + public double value(String name) { + return value(Double.parseDouble(name)); + } + + public abstract double value(double key); + + public long getEstimatedMemoryFootprint() { + return state.getEstimatedFootprintInBytes(); + } + + @Override + public AbstractInternalHDRPercentiles doReduce(List aggregations, ReduceContext reduceContext) { + DoubleHistogram merged = null; + for (InternalAggregation aggregation : aggregations) { + final AbstractInternalHDRPercentiles percentiles = (AbstractInternalHDRPercentiles) aggregation; + if (merged == null) { + merged = new DoubleHistogram(percentiles.state); + merged.setAutoResize(true); + } + merged.add(percentiles.state); + } + return createReduced(getName(), keys, merged, keyed, pipelineAggregators(), getMetaData()); + } + + protected abstract AbstractInternalHDRPercentiles createReduced(String name, double[] keys, DoubleHistogram merged, boolean keyed, + List pipelineAggregators, Map metaData); + + @Override + protected void doReadFrom(StreamInput in) throws IOException { + valueFormatter = ValueFormatterStreams.readOptional(in); + keys = new double[in.readInt()]; + for (int i = 0; i < keys.length; ++i) { + keys[i] = in.readDouble(); + } + long minBarForHighestToLowestValueRatio = in.readLong(); + ByteBuffer stateBuffer = ByteBuffer.wrap(in.readByteArray()); + try { + state = DoubleHistogram.decodeFromCompressedByteBuffer(stateBuffer, minBarForHighestToLowestValueRatio); + } catch (DataFormatException e) { + throw new IOException("Failed to decode DoubleHistogram for aggregation [" + name + "]", e); + } + keyed = in.readBoolean(); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + ValueFormatterStreams.writeOptional(valueFormatter, out); + out.writeInt(keys.length); + for (int i = 0 ; i < keys.length; ++i) { + out.writeDouble(keys[i]); + } + out.writeLong(state.getHighestToLowestValueRatio()); + ByteBuffer stateBuffer = ByteBuffer.allocate(state.getNeededByteBufferCapacity()); + state.encodeIntoCompressedByteBuffer(stateBuffer); + out.writeByteArray(stateBuffer.array()); + out.writeBoolean(keyed); + } + + @Override + public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { + if (keyed) { + builder.startObject(CommonFields.VALUES); + for(int i = 0; i < keys.length; ++i) { + String key = String.valueOf(keys[i]); + double value = value(keys[i]); + builder.field(key, value); + if (!(valueFormatter instanceof ValueFormatter.Raw)) { + builder.field(key + "_as_string", valueFormatter.format(value)); + } + } + builder.endObject(); + } else { + builder.startArray(CommonFields.VALUES); + for (int i = 0; i < keys.length; i++) { + double value = value(keys[i]); + builder.startObject(); + builder.field(CommonFields.KEY, keys[i]); + builder.field(CommonFields.VALUE, value); + if (!(valueFormatter instanceof ValueFormatter.Raw)) { + builder.field(CommonFields.VALUE_AS_STRING, valueFormatter.format(value)); + } + builder.endObject(); + } + builder.endArray(); + } + return builder; + } +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentileRanksAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentileRanksAggregator.java new file mode 100644 index 00000000000..d132fdbe678 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentileRanksAggregator.java @@ -0,0 +1,106 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.metrics.percentiles.hdr; + +import org.HdrHistogram.DoubleHistogram; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.aggregations.support.format.ValueFormatter; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * + */ +public class HDRPercentileRanksAggregator extends AbstractHDRPercentilesAggregator { + + public HDRPercentileRanksAggregator(String name, Numeric valuesSource, AggregationContext context, Aggregator parent, + double[] percents, int numberOfSignificantValueDigits, boolean keyed, ValueFormatter formatter, + List pipelineAggregators, Map metaData) throws IOException { + super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, formatter, pipelineAggregators, + metaData); + } + + @Override + public InternalAggregation buildAggregation(long owningBucketOrdinal) { + DoubleHistogram state = getState(owningBucketOrdinal); + if (state == null) { + return buildEmptyAggregation(); + } else { + return new InternalHDRPercentileRanks(name, keys, state, keyed, formatter, pipelineAggregators(), metaData()); + } + } + + @Override + public InternalAggregation buildEmptyAggregation() { + DoubleHistogram state; + state = new DoubleHistogram(numberOfSignificantValueDigits); + state.setAutoResize(true); + return new InternalHDRPercentileRanks(name, keys, state, + keyed, formatter, pipelineAggregators(), metaData()); + } + + @Override + public double metric(String name, long bucketOrd) { + DoubleHistogram state = getState(bucketOrd); + if (state == null) { + return Double.NaN; + } else { + return InternalHDRPercentileRanks.percentileRank(state, Double.valueOf(name)); + } + } + + public static class Factory extends ValuesSourceAggregatorFactory.LeafOnly { + + private final double[] values; + private final int numberOfSignificantValueDigits; + private final boolean keyed; + + public Factory(String name, ValuesSourceConfig valuesSourceConfig, double[] values, + int numberOfSignificantValueDigits, boolean keyed) { + super(name, InternalHDRPercentiles.TYPE.name(), valuesSourceConfig); + this.values = values; + this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; + this.keyed = keyed; + } + + @Override + protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, + List pipelineAggregators, Map metaData) throws IOException { + return new HDRPercentileRanksAggregator(name, null, aggregationContext, parent, values, numberOfSignificantValueDigits, keyed, + config.formatter(), pipelineAggregators, metaData); + } + + @Override + protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource, AggregationContext aggregationContext, Aggregator parent, + boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) + throws IOException { + return new HDRPercentileRanksAggregator(name, valuesSource, aggregationContext, parent, values, numberOfSignificantValueDigits, + keyed, config.formatter(), pipelineAggregators, metaData); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentilesAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentilesAggregator.java new file mode 100644 index 00000000000..d1c0a62c78a --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/HDRPercentilesAggregator.java @@ -0,0 +1,108 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.metrics.percentiles.hdr; + +import org.HdrHistogram.DoubleHistogram; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.aggregations.support.format.ValueFormatter; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * + */ +public class HDRPercentilesAggregator extends AbstractHDRPercentilesAggregator { + + public HDRPercentilesAggregator(String name, Numeric valuesSource, AggregationContext context, Aggregator parent, double[] percents, + int numberOfSignificantValueDigits, boolean keyed, ValueFormatter formatter, + List pipelineAggregators, Map metaData) throws IOException { + super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, formatter, + pipelineAggregators, metaData); + } + + @Override + public InternalAggregation buildAggregation(long owningBucketOrdinal) { + DoubleHistogram state = getState(owningBucketOrdinal); + if (state == null) { + return buildEmptyAggregation(); + } else { + return new InternalHDRPercentiles(name, keys, state, keyed, formatter, pipelineAggregators(), metaData()); + } + } + + @Override + public double metric(String name, long bucketOrd) { + DoubleHistogram state = getState(bucketOrd); + if (state == null) { + return Double.NaN; + } else { + return state.getValueAtPercentile(Double.parseDouble(name)); + } + } + + @Override + public InternalAggregation buildEmptyAggregation() { + DoubleHistogram state; + state = new DoubleHistogram(numberOfSignificantValueDigits); + state.setAutoResize(true); + return new InternalHDRPercentiles(name, keys, state, + keyed, + formatter, pipelineAggregators(), metaData()); + } + + public static class Factory extends ValuesSourceAggregatorFactory.LeafOnly { + + private final double[] percents; + private final int numberOfSignificantValueDigits; + private final boolean keyed; + + public Factory(String name, ValuesSourceConfig valuesSourceConfig, double[] percents, + int numberOfSignificantValueDigits, boolean keyed) { + super(name, InternalTDigestPercentiles.TYPE.name(), valuesSourceConfig); + this.percents = percents; + this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; + this.keyed = keyed; + } + + @Override + protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, + List pipelineAggregators, Map metaData) throws IOException { + return new HDRPercentilesAggregator(name, null, aggregationContext, parent, percents, numberOfSignificantValueDigits, keyed, + config.formatter(), pipelineAggregators, metaData); + } + + @Override + protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource, AggregationContext aggregationContext, Aggregator parent, + boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) + throws IOException { + return new HDRPercentilesAggregator(name, valuesSource, aggregationContext, parent, percents, numberOfSignificantValueDigits, + keyed, config.formatter(), pipelineAggregators, metaData); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentileRanks.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentileRanks.java new file mode 100644 index 00000000000..37acb5313b5 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentileRanks.java @@ -0,0 +1,133 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.metrics.percentiles.hdr; + +import com.google.common.collect.UnmodifiableIterator; + +import org.HdrHistogram.DoubleHistogram; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.search.aggregations.AggregationStreams; +import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanks; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.format.ValueFormatter; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** +* +*/ +public class InternalHDRPercentileRanks extends AbstractInternalHDRPercentiles implements PercentileRanks { + + public final static Type TYPE = new Type(PercentileRanks.TYPE_NAME, "hdr_percentile_ranks"); + + public final static AggregationStreams.Stream STREAM = new AggregationStreams.Stream() { + @Override + public InternalHDRPercentileRanks readResult(StreamInput in) throws IOException { + InternalHDRPercentileRanks result = new InternalHDRPercentileRanks(); + result.readFrom(in); + return result; + } + }; + + public static void registerStreams() { + AggregationStreams.registerStream(STREAM, TYPE.stream()); + } + + InternalHDRPercentileRanks() { + } // for serialization + + public InternalHDRPercentileRanks(String name, double[] cdfValues, DoubleHistogram state, boolean keyed, ValueFormatter formatter, + List pipelineAggregators, Map metaData) { + super(name, cdfValues, state, keyed, formatter, pipelineAggregators, metaData); + } + + @Override + public Iterator iterator() { + return new Iter(keys, state); + } + + @Override + public double percent(double value) { + return percentileRank(state, value); + } + + @Override + public String percentAsString(double value) { + return valueAsString(String.valueOf(value)); + } + + @Override + public double value(double key) { + return percent(key); + } + + @Override + protected AbstractInternalHDRPercentiles createReduced(String name, double[] keys, DoubleHistogram merged, boolean keyed, + List pipelineAggregators, Map metaData) { + return new InternalHDRPercentileRanks(name, keys, merged, keyed, valueFormatter, pipelineAggregators, metaData); + } + + @Override + public Type type() { + return TYPE; + } + + static double percentileRank(DoubleHistogram state, double value) { + if (state.getTotalCount() == 0) { + return Double.NaN; + } + double percentileRank = state.getPercentileAtOrBelowValue(value); + if (percentileRank < 0) { + percentileRank = 0; + } else if (percentileRank > 100) { + percentileRank = 100; + } + return percentileRank; + } + + public static class Iter extends UnmodifiableIterator { + + private final double[] values; + private final DoubleHistogram state; + private int i; + + public Iter(double[] values, DoubleHistogram state) { + this.values = values; + this.state = state; + i = 0; + } + + @Override + public boolean hasNext() { + return i < values.length; + } + + @Override + public Percentile next() { + final Percentile next = new InternalPercentile(percentileRank(state, values[i]), values[i]); + ++i; + return next; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentiles.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentiles.java new file mode 100644 index 00000000000..4c475ed46a8 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/InternalHDRPercentiles.java @@ -0,0 +1,123 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.metrics.percentiles.hdr; + +import com.google.common.collect.UnmodifiableIterator; + +import org.HdrHistogram.DoubleHistogram; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.search.aggregations.AggregationStreams; +import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.format.ValueFormatter; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** +* +*/ +public class InternalHDRPercentiles extends AbstractInternalHDRPercentiles implements Percentiles { + + public final static Type TYPE = new Type(Percentiles.TYPE_NAME, "hdr_percentiles"); + + public final static AggregationStreams.Stream STREAM = new AggregationStreams.Stream() { + @Override + public InternalHDRPercentiles readResult(StreamInput in) throws IOException { + InternalHDRPercentiles result = new InternalHDRPercentiles(); + result.readFrom(in); + return result; + } + }; + + public static void registerStreams() { + AggregationStreams.registerStream(STREAM, TYPE.stream()); + } + + InternalHDRPercentiles() { + } // for serialization + + public InternalHDRPercentiles(String name, double[] percents, DoubleHistogram state, boolean keyed, ValueFormatter formatter, + List pipelineAggregators, Map metaData) { + super(name, percents, state, keyed, formatter, pipelineAggregators, metaData); + } + + @Override + public Iterator iterator() { + return new Iter(keys, state); + } + + @Override + public double percentile(double percent) { + if (state.getTotalCount() == 0) { + return Double.NaN; + } + return state.getValueAtPercentile(percent); + } + + @Override + public String percentileAsString(double percent) { + return valueAsString(String.valueOf(percent)); + } + + @Override + public double value(double key) { + return percentile(key); + } + + @Override + protected AbstractInternalHDRPercentiles createReduced(String name, double[] keys, DoubleHistogram merged, boolean keyed, + List pipelineAggregators, Map metaData) { + return new InternalHDRPercentiles(name, keys, merged, keyed, valueFormatter, pipelineAggregators, metaData); + } + + @Override + public Type type() { + return TYPE; + } + + public static class Iter extends UnmodifiableIterator { + + private final double[] percents; + private final DoubleHistogram state; + private int i; + + public Iter(double[] percents, DoubleHistogram state) { + this.percents = percents; + this.state = state; + i = 0; + } + + @Override + public boolean hasNext() { + return i < percents.length; + } + + @Override + public Percentile next() { + final Percentile next = new InternalPercentile(percents[i], state.getValueAtPercentile(percents[i])); + ++i; + return next; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractInternalPercentiles.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/AbstractInternalTDigestPercentiles.java similarity index 85% rename from core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractInternalPercentiles.java rename to core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/AbstractInternalTDigestPercentiles.java index 5d4faf3e47c..c357566d4de 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractInternalPercentiles.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/AbstractInternalTDigestPercentiles.java @@ -17,14 +17,13 @@ * under the License. */ -package org.elasticsearch.search.aggregations.metrics.percentiles; +package org.elasticsearch.search.aggregations.metrics.percentiles.tdigest; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.metrics.InternalNumericMetricsAggregation; -import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestState; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.format.ValueFormatter; import org.elasticsearch.search.aggregations.support.format.ValueFormatterStreams; @@ -33,15 +32,15 @@ import java.io.IOException; import java.util.List; import java.util.Map; -abstract class AbstractInternalPercentiles extends InternalNumericMetricsAggregation.MultiValue { +abstract class AbstractInternalTDigestPercentiles extends InternalNumericMetricsAggregation.MultiValue { protected double[] keys; protected TDigestState state; private boolean keyed; - AbstractInternalPercentiles() {} // for serialization + AbstractInternalTDigestPercentiles() {} // for serialization - public AbstractInternalPercentiles(String name, double[] keys, TDigestState state, boolean keyed, ValueFormatter formatter, + public AbstractInternalTDigestPercentiles(String name, double[] keys, TDigestState state, boolean keyed, ValueFormatter formatter, List pipelineAggregators, Map metaData) { super(name, pipelineAggregators, metaData); @@ -58,11 +57,15 @@ abstract class AbstractInternalPercentiles extends InternalNumericMetricsAggrega public abstract double value(double key); + public long getEstimatedMemoryFootprint() { + return state.byteSize(); + } + @Override - public AbstractInternalPercentiles doReduce(List aggregations, ReduceContext reduceContext) { + public AbstractInternalTDigestPercentiles doReduce(List aggregations, ReduceContext reduceContext) { TDigestState merged = null; for (InternalAggregation aggregation : aggregations) { - final AbstractInternalPercentiles percentiles = (AbstractInternalPercentiles) aggregation; + final AbstractInternalTDigestPercentiles percentiles = (AbstractInternalTDigestPercentiles) aggregation; if (merged == null) { merged = new TDigestState(percentiles.state.compression()); } @@ -71,7 +74,7 @@ abstract class AbstractInternalPercentiles extends InternalNumericMetricsAggrega return createReduced(getName(), keys, merged, keyed, pipelineAggregators(), getMetaData()); } - protected abstract AbstractInternalPercentiles createReduced(String name, double[] keys, TDigestState merged, boolean keyed, + protected abstract AbstractInternalTDigestPercentiles createReduced(String name, double[] keys, TDigestState merged, boolean keyed, List pipelineAggregators, Map metaData); @Override diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/AbstractTDigestPercentilesAggregator.java similarity index 93% rename from core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesAggregator.java rename to core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/AbstractTDigestPercentilesAggregator.java index 4edf74c356a..13c0a39c9c9 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/AbstractPercentilesAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/AbstractTDigestPercentilesAggregator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.search.aggregations.metrics.percentiles; +package org.elasticsearch.search.aggregations.metrics.percentiles.tdigest; import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.common.lease.Releasables; @@ -29,7 +29,6 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator; -import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestState; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -39,7 +38,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; -public abstract class AbstractPercentilesAggregator extends NumericMetricsAggregator.MultiValue { +public abstract class AbstractTDigestPercentilesAggregator extends NumericMetricsAggregator.MultiValue { private static int indexOfKey(double[] keys, double key) { return ArrayUtils.binarySearch(keys, key, 0.001); @@ -52,7 +51,7 @@ public abstract class AbstractPercentilesAggregator extends NumericMetricsAggreg protected final double compression; protected final boolean keyed; - public AbstractPercentilesAggregator(String name, ValuesSource.Numeric valuesSource, AggregationContext context, Aggregator parent, + public AbstractTDigestPercentilesAggregator(String name, ValuesSource.Numeric valuesSource, AggregationContext context, Aggregator parent, double[] keys, double compression, boolean keyed, ValueFormatter formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentileRanks.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/InternalTDigestPercentileRanks.java similarity index 76% rename from core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentileRanks.java rename to core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/InternalTDigestPercentileRanks.java index 8d9b1f8a2d8..bff0dcf2664 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentileRanks.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/InternalTDigestPercentileRanks.java @@ -16,13 +16,15 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.search.aggregations.metrics.percentiles; +package org.elasticsearch.search.aggregations.metrics.percentiles.tdigest; import com.google.common.collect.UnmodifiableIterator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.search.aggregations.AggregationStreams; -import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestState; +import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanks; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.format.ValueFormatter; @@ -34,14 +36,14 @@ import java.util.Map; /** * */ -public class InternalPercentileRanks extends AbstractInternalPercentiles implements PercentileRanks { +public class InternalTDigestPercentileRanks extends AbstractInternalTDigestPercentiles implements PercentileRanks { - public final static Type TYPE = new Type("percentile_ranks"); + public final static Type TYPE = new Type(PercentileRanks.TYPE_NAME, "t_digest_percentile_ranks"); public final static AggregationStreams.Stream STREAM = new AggregationStreams.Stream() { @Override - public InternalPercentileRanks readResult(StreamInput in) throws IOException { - InternalPercentileRanks result = new InternalPercentileRanks(); + public InternalTDigestPercentileRanks readResult(StreamInput in) throws IOException { + InternalTDigestPercentileRanks result = new InternalTDigestPercentileRanks(); result.readFrom(in); return result; } @@ -51,9 +53,9 @@ public class InternalPercentileRanks extends AbstractInternalPercentiles impleme AggregationStreams.registerStream(STREAM, TYPE.stream()); } - InternalPercentileRanks() {} // for serialization + InternalTDigestPercentileRanks() {} // for serialization - public InternalPercentileRanks(String name, double[] cdfValues, TDigestState state, boolean keyed, ValueFormatter formatter, + public InternalTDigestPercentileRanks(String name, double[] cdfValues, TDigestState state, boolean keyed, ValueFormatter formatter, List pipelineAggregators, Map metaData) { super(name, cdfValues, state, keyed, formatter, pipelineAggregators, metaData); } @@ -79,9 +81,9 @@ public class InternalPercentileRanks extends AbstractInternalPercentiles impleme } @Override - protected AbstractInternalPercentiles createReduced(String name, double[] keys, TDigestState merged, boolean keyed, + protected AbstractInternalTDigestPercentiles createReduced(String name, double[] keys, TDigestState merged, boolean keyed, List pipelineAggregators, Map metaData) { - return new InternalPercentileRanks(name, keys, merged, keyed, valueFormatter, pipelineAggregators, metaData); + return new InternalTDigestPercentileRanks(name, keys, merged, keyed, valueFormatter, pipelineAggregators, metaData); } @Override diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentiles.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/InternalTDigestPercentiles.java similarity index 76% rename from core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentiles.java rename to core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/InternalTDigestPercentiles.java index f3b7ee2f073..564e9d538ef 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/InternalPercentiles.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/InternalTDigestPercentiles.java @@ -16,13 +16,15 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.search.aggregations.metrics.percentiles; +package org.elasticsearch.search.aggregations.metrics.percentiles.tdigest; import com.google.common.collect.UnmodifiableIterator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.search.aggregations.AggregationStreams; -import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestState; +import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.format.ValueFormatter; @@ -34,14 +36,14 @@ import java.util.Map; /** * */ -public class InternalPercentiles extends AbstractInternalPercentiles implements Percentiles { +public class InternalTDigestPercentiles extends AbstractInternalTDigestPercentiles implements Percentiles { - public final static Type TYPE = new Type("percentiles"); + public final static Type TYPE = new Type(Percentiles.TYPE_NAME, "t_digest_percentiles"); public final static AggregationStreams.Stream STREAM = new AggregationStreams.Stream() { @Override - public InternalPercentiles readResult(StreamInput in) throws IOException { - InternalPercentiles result = new InternalPercentiles(); + public InternalTDigestPercentiles readResult(StreamInput in) throws IOException { + InternalTDigestPercentiles result = new InternalTDigestPercentiles(); result.readFrom(in); return result; } @@ -51,10 +53,10 @@ public class InternalPercentiles extends AbstractInternalPercentiles implements AggregationStreams.registerStream(STREAM, TYPE.stream()); } - InternalPercentiles() { + InternalTDigestPercentiles() { } // for serialization - public InternalPercentiles(String name, double[] percents, TDigestState state, boolean keyed, ValueFormatter formatter, + public InternalTDigestPercentiles(String name, double[] percents, TDigestState state, boolean keyed, ValueFormatter formatter, List pipelineAggregators, Map metaData) { super(name, percents, state, keyed, formatter, pipelineAggregators, metaData); } @@ -80,9 +82,9 @@ public class InternalPercentiles extends AbstractInternalPercentiles implements } @Override - protected AbstractInternalPercentiles createReduced(String name, double[] keys, TDigestState merged, boolean keyed, + protected AbstractInternalTDigestPercentiles createReduced(String name, double[] keys, TDigestState merged, boolean keyed, List pipelineAggregators, Map metaData) { - return new InternalPercentiles(name, keys, merged, keyed, valueFormatter, pipelineAggregators, metaData); + return new InternalTDigestPercentiles(name, keys, merged, keyed, valueFormatter, pipelineAggregators, metaData); } @Override diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestPercentileRanksAggregator.java similarity index 78% rename from core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksAggregator.java rename to core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestPercentileRanksAggregator.java index 9e4ac36b692..95c9f0631b2 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentileRanksAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestPercentileRanksAggregator.java @@ -16,11 +16,10 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.search.aggregations.metrics.percentiles; +package org.elasticsearch.search.aggregations.metrics.percentiles.tdigest; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; -import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestState; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -36,9 +35,9 @@ import java.util.Map; /** * */ -public class PercentileRanksAggregator extends AbstractPercentilesAggregator { +public class TDigestPercentileRanksAggregator extends AbstractTDigestPercentilesAggregator { - public PercentileRanksAggregator(String name, Numeric valuesSource, AggregationContext context, Aggregator parent, double[] percents, + public TDigestPercentileRanksAggregator(String name, Numeric valuesSource, AggregationContext context, Aggregator parent, double[] percents, double compression, boolean keyed, ValueFormatter formatter, List pipelineAggregators, Map metaData) throws IOException { @@ -51,13 +50,13 @@ public class PercentileRanksAggregator extends AbstractPercentilesAggregator { if (state == null) { return buildEmptyAggregation(); } else { - return new InternalPercentileRanks(name, keys, state, keyed, formatter, pipelineAggregators(), metaData()); + return new InternalTDigestPercentileRanks(name, keys, state, keyed, formatter, pipelineAggregators(), metaData()); } } @Override public InternalAggregation buildEmptyAggregation() { - return new InternalPercentileRanks(name, keys, new TDigestState(compression), keyed, formatter, pipelineAggregators(), metaData()); + return new InternalTDigestPercentileRanks(name, keys, new TDigestState(compression), keyed, formatter, pipelineAggregators(), metaData()); } @Override @@ -66,7 +65,7 @@ public class PercentileRanksAggregator extends AbstractPercentilesAggregator { if (state == null) { return Double.NaN; } else { - return InternalPercentileRanks.percentileRank(state, Double.valueOf(name)); + return InternalTDigestPercentileRanks.percentileRank(state, Double.valueOf(name)); } } @@ -78,7 +77,7 @@ public class PercentileRanksAggregator extends AbstractPercentilesAggregator { public Factory(String name, ValuesSourceConfig valuesSourceConfig, double[] values, double compression, boolean keyed) { - super(name, InternalPercentiles.TYPE.name(), valuesSourceConfig); + super(name, InternalTDigestPercentiles.TYPE.name(), valuesSourceConfig); this.values = values; this.compression = compression; this.keyed = keyed; @@ -87,7 +86,7 @@ public class PercentileRanksAggregator extends AbstractPercentilesAggregator { @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, List pipelineAggregators, Map metaData) throws IOException { - return new PercentileRanksAggregator(name, null, aggregationContext, parent, values, compression, keyed, config.formatter(), + return new TDigestPercentileRanksAggregator(name, null, aggregationContext, parent, values, compression, keyed, config.formatter(), pipelineAggregators, metaData); } @@ -95,7 +94,7 @@ public class PercentileRanksAggregator extends AbstractPercentilesAggregator { protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource, AggregationContext aggregationContext, Aggregator parent, boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) throws IOException { - return new PercentileRanksAggregator(name, valuesSource, aggregationContext, parent, values, compression, keyed, + return new TDigestPercentileRanksAggregator(name, valuesSource, aggregationContext, parent, values, compression, keyed, config.formatter(), pipelineAggregators, metaData); } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestPercentilesAggregator.java similarity index 81% rename from core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesAggregator.java rename to core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestPercentilesAggregator.java index 894e28dd501..43800bf790f 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/PercentilesAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestPercentilesAggregator.java @@ -16,11 +16,10 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.search.aggregations.metrics.percentiles; +package org.elasticsearch.search.aggregations.metrics.percentiles.tdigest; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; -import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestState; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -36,9 +35,9 @@ import java.util.Map; /** * */ -public class PercentilesAggregator extends AbstractPercentilesAggregator { +public class TDigestPercentilesAggregator extends AbstractTDigestPercentilesAggregator { - public PercentilesAggregator(String name, Numeric valuesSource, AggregationContext context, + public TDigestPercentilesAggregator(String name, Numeric valuesSource, AggregationContext context, Aggregator parent, double[] percents, double compression, boolean keyed, ValueFormatter formatter, List pipelineAggregators, Map metaData) throws IOException { @@ -51,7 +50,7 @@ public class PercentilesAggregator extends AbstractPercentilesAggregator { if (state == null) { return buildEmptyAggregation(); } else { - return new InternalPercentiles(name, keys, state, keyed, formatter, pipelineAggregators(), metaData()); + return new InternalTDigestPercentiles(name, keys, state, keyed, formatter, pipelineAggregators(), metaData()); } } @@ -67,7 +66,7 @@ public class PercentilesAggregator extends AbstractPercentilesAggregator { @Override public InternalAggregation buildEmptyAggregation() { - return new InternalPercentiles(name, keys, new TDigestState(compression), keyed, formatter, pipelineAggregators(), metaData()); + return new InternalTDigestPercentiles(name, keys, new TDigestState(compression), keyed, formatter, pipelineAggregators(), metaData()); } public static class Factory extends ValuesSourceAggregatorFactory.LeafOnly { @@ -78,7 +77,7 @@ public class PercentilesAggregator extends AbstractPercentilesAggregator { public Factory(String name, ValuesSourceConfig valuesSourceConfig, double[] percents, double compression, boolean keyed) { - super(name, InternalPercentiles.TYPE.name(), valuesSourceConfig); + super(name, InternalTDigestPercentiles.TYPE.name(), valuesSourceConfig); this.percents = percents; this.compression = compression; this.keyed = keyed; @@ -87,7 +86,7 @@ public class PercentilesAggregator extends AbstractPercentilesAggregator { @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent, List pipelineAggregators, Map metaData) throws IOException { - return new PercentilesAggregator(name, null, aggregationContext, parent, percents, compression, keyed, config.formatter(), + return new TDigestPercentilesAggregator(name, null, aggregationContext, parent, percents, compression, keyed, config.formatter(), pipelineAggregators, metaData); } @@ -95,7 +94,7 @@ public class PercentilesAggregator extends AbstractPercentilesAggregator { protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource, AggregationContext aggregationContext, Aggregator parent, boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) throws IOException { - return new PercentilesAggregator(name, valuesSource, aggregationContext, parent, percents, compression, keyed, + return new TDigestPercentilesAggregator(name, valuesSource, aggregationContext, parent, percents, compression, keyed, config.formatter(), pipelineAggregators, metaData); } } diff --git a/core/src/test/java/org/elasticsearch/benchmark/search/aggregations/HDRPercentilesAggregationBenchmark.java b/core/src/test/java/org/elasticsearch/benchmark/search/aggregations/HDRPercentilesAggregationBenchmark.java new file mode 100644 index 00000000000..af0eee65f08 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/benchmark/search/aggregations/HDRPercentilesAggregationBenchmark.java @@ -0,0 +1,158 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.benchmark.search.aggregations; + +import com.carrotsearch.randomizedtesting.generators.RandomInts; + +import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.StopWatch; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.SizeUnit; +import org.elasticsearch.common.unit.SizeValue; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.node.Node; +import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesMethod; +import org.elasticsearch.search.aggregations.metrics.percentiles.hdr.InternalHDRPercentiles; +import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.InternalTDigestPercentiles; + +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.common.settings.Settings.settingsBuilder; +import static org.elasticsearch.node.NodeBuilder.nodeBuilder; +import static org.elasticsearch.search.aggregations.AggregationBuilders.percentiles; + +public class HDRPercentilesAggregationBenchmark { + + private static final String TYPE_NAME = "type"; + private static final String INDEX_NAME = "index"; + private static final String HIGH_CARD_FIELD_NAME = "high_card"; + private static final String LOW_CARD_FIELD_NAME = "low_card"; + private static final String GAUSSIAN_FIELD_NAME = "gauss"; + private static final Random R = new Random(); + private static final String CLUSTER_NAME = HDRPercentilesAggregationBenchmark.class.getSimpleName(); + private static final int NUM_DOCS = 10000000; + private static final int LOW_CARD = 1000; + private static final int HIGH_CARD = 1000000; + private static final int BATCH = 100; + private static final int WARM = 5; + private static final int RUNS = 10; + private static final int ITERS = 5; + + public static void main(String[] args) { + long overallStartTime = System.currentTimeMillis(); + Settings settings = settingsBuilder() + .put("index.refresh_interval", "-1") + .put(SETTING_NUMBER_OF_SHARDS, 5) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + + Node[] nodes = new Node[1]; + for (int i = 0; i < nodes.length; i++) { + nodes[i] = nodeBuilder().clusterName(CLUSTER_NAME) + .settings(settingsBuilder().put(settings).put("name", "node" + i)) + .node(); + } + + Node clientNode = nodeBuilder() + .clusterName(CLUSTER_NAME) + .settings(settingsBuilder().put(settings).put("name", "client")).client(true).node(); + + Client client = clientNode.client(); + + try { + client.admin().indices().prepareCreate(INDEX_NAME); + + System.out.println("Indexing " + NUM_DOCS + " documents"); + + StopWatch stopWatch = new StopWatch().start(); + for (int i = 0; i < NUM_DOCS; ) { + BulkRequestBuilder request = client.prepareBulk(); + for (int j = 0; j < BATCH && i < NUM_DOCS; ++j) { + final int lowCard = RandomInts.randomInt(R, LOW_CARD); + final int highCard = RandomInts.randomInt(R, HIGH_CARD); + int gauss = -1; + while (gauss < 0) { + gauss = (int) (R.nextGaussian() * 1000) + 5000; // mean: 5 sec, std deviation: 1 sec + } + request.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, Integer.toString(i)).setSource(LOW_CARD_FIELD_NAME, lowCard, + HIGH_CARD_FIELD_NAME, highCard, GAUSSIAN_FIELD_NAME, gauss)); + ++i; + } + BulkResponse response = request.execute().actionGet(); + if (response.hasFailures()) { + System.err.println("--> failures..."); + System.err.println(response.buildFailureMessage()); + } + if ((i % 100000) == 0) { + System.out.println("--> Indexed " + i + " took " + stopWatch.stop().lastTaskTime()); + stopWatch.start(); + } + } + + client.admin().indices().prepareRefresh(INDEX_NAME).execute().actionGet(); + } catch (Exception e) { + System.out.println("Index already exists, skipping index creation"); + } + + ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet(); + if (clusterHealthResponse.isTimedOut()) { + System.err.println("--> Timed out waiting for cluster health"); + } + + System.out.println("Run\tField\tMethod\tAggregationTime\tEstimatedMemory"); + for (int i = 0; i < WARM + RUNS; ++i) { + for (String field : new String[] { LOW_CARD_FIELD_NAME, HIGH_CARD_FIELD_NAME, GAUSSIAN_FIELD_NAME }) { + for (PercentilesMethod method : new PercentilesMethod[] {PercentilesMethod.TDIGEST, PercentilesMethod.HDR}) { + long start = System.nanoTime(); + SearchResponse resp = null; + for (int j = 0; j < ITERS; ++j) { + resp = client.prepareSearch(INDEX_NAME).setSize(0).addAggregation(percentiles("percentiles").field(field).method(method)).execute().actionGet(); + } + long end = System.nanoTime(); + long memoryEstimate = 0; + switch (method) { + case TDIGEST: + memoryEstimate = ((InternalTDigestPercentiles) resp.getAggregations().get("percentiles")) + .getEstimatedMemoryFootprint(); + break; + case HDR: + memoryEstimate = ((InternalHDRPercentiles) resp.getAggregations().get("percentiles")).getEstimatedMemoryFootprint(); + break; + } + if (i >= WARM) { + System.out.println((i - WARM) + "\t" + field + "\t" + method + "\t" + + new TimeValue((end - start) / ITERS, TimeUnit.NANOSECONDS).millis() + "\t" + + new SizeValue(memoryEstimate, SizeUnit.SINGLE).singles()); + } + } + } + } + long overallEndTime = System.currentTimeMillis(); + System.out.println("Benchmark completed in " + ((overallEndTime - overallStartTime) / 1000) + " seconds"); + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksTests.java new file mode 100644 index 00000000000..70767d70a10 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksTests.java @@ -0,0 +1,502 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.metrics; + +import com.google.common.collect.Lists; + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptService.ScriptType; +import org.elasticsearch.search.aggregations.bucket.global.Global; +import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; +import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Order; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanks; +import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesMethod; +import org.junit.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; +import static org.elasticsearch.search.aggregations.AggregationBuilders.global; +import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram; +import static org.elasticsearch.search.aggregations.AggregationBuilders.percentileRanks; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.sameInstance; + +/** + * + */ +public class HDRPercentileRanksTests extends AbstractNumericTests { + + private static double[] randomPercents(long minValue, long maxValue) { + + final int length = randomIntBetween(1, 20); + final double[] percents = new double[length]; + for (int i = 0; i < percents.length; ++i) { + switch (randomInt(20)) { + case 0: + percents[i] = minValue; + break; + case 1: + percents[i] = maxValue; + break; + default: + percents[i] = (randomDouble() * (maxValue - minValue)) + minValue; + break; + } + } + Arrays.sort(percents); + Loggers.getLogger(HDRPercentileRanksTests.class).info("Using percentiles={}", Arrays.toString(percents)); + return percents; + } + + private static int randomSignificantDigits() { + return randomIntBetween(0, 5); + } + + private void assertConsistent(double[] pcts, PercentileRanks percentiles, long minValue, long maxValue, int numberSigDigits) { + final List percentileList = Lists.newArrayList(percentiles); + assertEquals(pcts.length, percentileList.size()); + for (int i = 0; i < pcts.length; ++i) { + final Percentile percentile = percentileList.get(i); + assertThat(percentile.getValue(), equalTo(pcts[i])); + assertThat(percentile.getPercent(), greaterThanOrEqualTo(0.0)); + assertThat(percentile.getPercent(), lessThanOrEqualTo(100.0)); + + if (percentile.getPercent() == 0) { + double allowedError = minValue / Math.pow(10, numberSigDigits); + assertThat(percentile.getValue(), lessThanOrEqualTo(minValue + allowedError)); + } + if (percentile.getPercent() == 100) { + double allowedError = maxValue / Math.pow(10, numberSigDigits); + assertThat(percentile.getValue(), greaterThanOrEqualTo(maxValue - allowedError)); + } + } + + for (int i = 1; i < percentileList.size(); ++i) { + assertThat(percentileList.get(i).getValue(), greaterThanOrEqualTo(percentileList.get(i - 1).getValue())); + } + } + + @Override + @Test + public void testEmptyAggregation() throws Exception { + + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("empty_bucket_idx") + .setQuery(matchAllQuery()) + .addAggregation( + histogram("histo") + .field("value") + .interval(1l) + .minDocCount(0) + .subAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR) + .numberOfSignificantValueDigits(sigDigits).percentiles(10, 15))).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l)); + Histogram histo = searchResponse.getAggregations().get("histo"); + assertThat(histo, notNullValue()); + Histogram.Bucket bucket = histo.getBuckets().get(1); + assertThat(bucket, notNullValue()); + + PercentileRanks reversePercentiles = bucket.getAggregations().get("percentile_ranks"); + assertThat(reversePercentiles, notNullValue()); + assertThat(reversePercentiles.getName(), equalTo("percentile_ranks")); + assertThat(reversePercentiles.percent(10), equalTo(Double.NaN)); + assertThat(reversePercentiles.percent(15), equalTo(Double.NaN)); + } + + @Override + @Test + public void testUnmapped() throws Exception { + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx_unmapped") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("value").percentiles(0, 10, 15, 100)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l)); + + PercentileRanks reversePercentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertThat(reversePercentiles, notNullValue()); + assertThat(reversePercentiles.getName(), equalTo("percentile_ranks")); + assertThat(reversePercentiles.percent(0), equalTo(Double.NaN)); + assertThat(reversePercentiles.percent(10), equalTo(Double.NaN)); + assertThat(reversePercentiles.percent(15), equalTo(Double.NaN)); + assertThat(reversePercentiles.percent(100), equalTo(Double.NaN)); + } + + @Override + @Test + public void testSingleValuedField() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValue, maxValue); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("value").percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue, maxValue, sigDigits); + } + + @Override + @Test + public void testSingleValuedField_getProperty() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValue, maxValue); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + global("global").subAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("value").percentiles(pcts))).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + Global global = searchResponse.getAggregations().get("global"); + assertThat(global, notNullValue()); + assertThat(global.getName(), equalTo("global")); + assertThat(global.getDocCount(), equalTo(10l)); + assertThat(global.getAggregations(), notNullValue()); + assertThat(global.getAggregations().asMap().size(), equalTo(1)); + + PercentileRanks percentiles = global.getAggregations().get("percentile_ranks"); + assertThat(percentiles, notNullValue()); + assertThat(percentiles.getName(), equalTo("percentile_ranks")); + assertThat((PercentileRanks) global.getProperty("percentile_ranks"), sameInstance(percentiles)); + + } + + @Test + public void testSingleValuedFieldOutsideRange() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = new double[] { minValue - 1, maxValue + 1 }; + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("value").percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue, maxValue, sigDigits); + } + + @Override + @Test + public void testSingleValuedField_PartiallyUnmapped() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValue, maxValue); + SearchResponse searchResponse = client() + .prepareSearch("idx", "idx_unmapped") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("value").percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue, maxValue, sigDigits); + } + + @Override + @Test + public void testSingleValuedField_WithValueScript() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValue - 1, maxValue - 1); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("value").script(new Script("_value - 1")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testSingleValuedField_WithValueScript_WithParams() throws Exception { + int sigDigits = randomSignificantDigits(); + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercents(minValue - 1, maxValue - 1); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("value").script(new Script("_value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testMultiValuedField() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValues, maxValues); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("values").percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValues, maxValues, sigDigits); + } + + @Override + @Test + public void testMultiValuedField_WithValueScript() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValues - 1, maxValues - 1); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("values").script(new Script("_value - 1")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValues - 1, maxValues - 1, sigDigits); + } + + @Test + public void testMultiValuedField_WithValueScript_Reverse() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(20 - maxValues, 20 - minValues); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("values").script(new Script("20 - _value")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, 20 - maxValues, 20 - minValues, sigDigits); + } + + @Override + @Test + public void testMultiValuedField_WithValueScript_WithParams() throws Exception { + int sigDigits = randomSignificantDigits(); + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercents(minValues - 1, maxValues - 1); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .field("values").script(new Script("_value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValues - 1, maxValues - 1, sigDigits); + } + + @Override + @Test + public void testScript_SingleValued() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValue, maxValue); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .script(new Script("doc['value'].value")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue, maxValue, sigDigits); + } + + @Override + @Test + public void testScript_SingleValued_WithParams() throws Exception { + int sigDigits = randomSignificantDigits(); + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercents(minValue - 1, maxValue - 1); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .script(new Script("doc['value'].value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testScript_ExplicitSingleValued_WithParams() throws Exception { + int sigDigits = randomSignificantDigits(); + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercents(minValue - 1, maxValue - 1); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .script(new Script("doc['value'].value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testScript_MultiValued() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValues, maxValues); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .script(new Script("doc['values'].values")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValues, maxValues, sigDigits); + } + + @Override + @Test + public void testScript_ExplicitMultiValued() throws Exception { + int sigDigits = randomSignificantDigits(); + final double[] pcts = randomPercents(minValues, maxValues); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .script(new Script("doc['values'].values")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValues, maxValues, sigDigits); + } + + @Override + @Test + public void testScript_MultiValued_WithParams() throws Exception { + int sigDigits = randomSignificantDigits(); + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercents(minValues - 1, maxValues - 1); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentileRanks("percentile_ranks") + .method(PercentilesMethod.HDR) + .numberOfSignificantValueDigits(sigDigits) + .script(new Script( + "List values = doc['values'].values; double[] res = new double[values.size()]; for (int i = 0; i < res.length; i++) { res[i] = values.get(i) - dec; }; return res;", + ScriptType.INLINE, null, params)).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final PercentileRanks percentiles = searchResponse.getAggregations().get("percentile_ranks"); + assertConsistent(pcts, percentiles, minValues - 1, maxValues - 1, sigDigits); + } + + @Test + public void testOrderBySubAggregation() { + int sigDigits = randomSignificantDigits(); + boolean asc = randomBoolean(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + histogram("histo").field("value").interval(2l) + .subAggregation( + percentileRanks("percentile_ranks").method(PercentilesMethod.HDR) + .numberOfSignificantValueDigits(sigDigits).percentiles(99)) + .order(Order.aggregation("percentile_ranks", "99", asc))).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + Histogram histo = searchResponse.getAggregations().get("histo"); + double previous = asc ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; + for (Histogram.Bucket bucket : histo.getBuckets()) { + PercentileRanks percentiles = bucket.getAggregations().get("percentile_ranks"); + double p99 = percentiles.percent(99); + if (asc) { + assertThat(p99, greaterThanOrEqualTo(previous)); + } else { + assertThat(p99, lessThanOrEqualTo(previous)); + } + previous = p99; + } + } + +} \ No newline at end of file diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesTests.java new file mode 100644 index 00000000000..a131933ecb3 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesTests.java @@ -0,0 +1,489 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.metrics; + +import com.google.common.collect.Lists; + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptService.ScriptType; +import org.elasticsearch.search.aggregations.bucket.global.Global; +import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; +import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Order; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile; +import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles; +import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesMethod; +import org.junit.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; +import static org.elasticsearch.search.aggregations.AggregationBuilders.global; +import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram; +import static org.elasticsearch.search.aggregations.AggregationBuilders.percentiles; +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.sameInstance; + +/** + * + */ +public class HDRPercentilesTests extends AbstractNumericTests { + + private static double[] randomPercentiles() { + final int length = randomIntBetween(1, 20); + final double[] percentiles = new double[length]; + for (int i = 0; i < percentiles.length; ++i) { + switch (randomInt(20)) { + case 0: + percentiles[i] = 0; + break; + case 1: + percentiles[i] = 100; + break; + default: + percentiles[i] = randomDouble() * 100; + break; + } + } + Arrays.sort(percentiles); + Loggers.getLogger(HDRPercentilesTests.class).info("Using percentiles={}", Arrays.toString(percentiles)); + return percentiles; + } + + private static int randomSignificantDigits() { + return randomIntBetween(0, 5); + } + + private void assertConsistent(double[] pcts, Percentiles percentiles, long minValue, long maxValue, int numberSigDigits) { + final List percentileList = Lists.newArrayList(percentiles); + assertEquals(pcts.length, percentileList.size()); + for (int i = 0; i < pcts.length; ++i) { + final Percentile percentile = percentileList.get(i); + assertThat(percentile.getPercent(), equalTo(pcts[i])); + double value = percentile.getValue(); + double allowedError = value / Math.pow(10, numberSigDigits); + assertThat(value, greaterThanOrEqualTo(minValue - allowedError)); + assertThat(value, lessThanOrEqualTo(maxValue + allowedError)); + + if (percentile.getPercent() == 0) { + assertThat(value, closeTo(minValue, allowedError)); + } + if (percentile.getPercent() == 100) { + assertThat(value, closeTo(maxValue, allowedError)); + } + } + + for (int i = 1; i < percentileList.size(); ++i) { + assertThat(percentileList.get(i).getValue(), greaterThanOrEqualTo(percentileList.get(i - 1).getValue())); + } + } + + @Override + @Test + public void testEmptyAggregation() throws Exception { + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("empty_bucket_idx") + .setQuery(matchAllQuery()) + .addAggregation( + histogram("histo") + .field("value") + .interval(1l) + .minDocCount(0) + .subAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR) + .percentiles(10, + 15))).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l)); + Histogram histo = searchResponse.getAggregations().get("histo"); + assertThat(histo, notNullValue()); + Histogram.Bucket bucket = histo.getBuckets().get(1); + assertThat(bucket, notNullValue()); + + Percentiles percentiles = bucket.getAggregations().get("percentiles"); + assertThat(percentiles, notNullValue()); + assertThat(percentiles.getName(), equalTo("percentiles")); + assertThat(percentiles.percentile(10), equalTo(Double.NaN)); + assertThat(percentiles.percentile(15), equalTo(Double.NaN)); + } + + @Override + @Test + public void testUnmapped() throws Exception { + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx_unmapped") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("value") + .percentiles(0, 10, 15, 100)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(0l)); + + Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertThat(percentiles, notNullValue()); + assertThat(percentiles.getName(), equalTo("percentiles")); + assertThat(percentiles.percentile(0), equalTo(Double.NaN)); + assertThat(percentiles.percentile(10), equalTo(Double.NaN)); + assertThat(percentiles.percentile(15), equalTo(Double.NaN)); + assertThat(percentiles.percentile(100), equalTo(Double.NaN)); + } + + @Override + @Test + public void testSingleValuedField() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomIntBetween(1, 5); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("value") + .percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValue, maxValue, sigDigits); + } + + @Override + @Test + public void testSingleValuedField_getProperty() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + global("global").subAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR) + .field("value") + .percentiles(pcts))).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + Global global = searchResponse.getAggregations().get("global"); + assertThat(global, notNullValue()); + assertThat(global.getName(), equalTo("global")); + assertThat(global.getDocCount(), equalTo(10l)); + assertThat(global.getAggregations(), notNullValue()); + assertThat(global.getAggregations().asMap().size(), equalTo(1)); + + Percentiles percentiles = global.getAggregations().get("percentiles"); + assertThat(percentiles, notNullValue()); + assertThat(percentiles.getName(), equalTo("percentiles")); + assertThat((Percentiles) global.getProperty("percentiles"), sameInstance(percentiles)); + + } + + @Override + @Test + public void testSingleValuedField_PartiallyUnmapped() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx", "idx_unmapped") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("value") + .percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValue, maxValue, sigDigits); + } + + @Override + @Test + public void testSingleValuedField_WithValueScript() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("value") + .script(new Script("_value - 1")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testSingleValuedField_WithValueScript_WithParams() throws Exception { + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("value") + .script(new Script("_value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)).execute() + .actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testMultiValuedField() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("values") + .percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValues, maxValues, sigDigits); + } + + @Override + @Test + public void testMultiValuedField_WithValueScript() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("values") + .script(new Script("_value - 1")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValues - 1, maxValues - 1, sigDigits); + } + + @Test + public void testMultiValuedField_WithValueScript_Reverse() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("values") + .script(new Script("20 - _value")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, 20 - maxValues, 20 - minValues, sigDigits); + } + + @Override + @Test + public void testMultiValuedField_WithValueScript_WithParams() throws Exception { + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR).field("values") + .script(new Script("_value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)).execute() + .actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValues - 1, maxValues - 1, sigDigits); + } + + @Override + @Test + public void testScript_SingleValued() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR) + .script(new Script("doc['value'].value")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValue, maxValue, sigDigits); + } + + @Override + @Test + public void testScript_SingleValued_WithParams() throws Exception { + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR) + .script(new Script("doc['value'].value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testScript_ExplicitSingleValued_WithParams() throws Exception { + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR) + .script(new Script("doc['value'].value - dec", ScriptType.INLINE, null, params)).percentiles(pcts)) + .execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValue - 1, maxValue - 1, sigDigits); + } + + @Override + @Test + public void testScript_MultiValued() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR) + .script(new Script("doc['values'].values")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValues, maxValues, sigDigits); + } + + @Override + @Test + public void testScript_ExplicitMultiValued() throws Exception { + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles").numberOfSignificantValueDigits(sigDigits).method(PercentilesMethod.HDR) + .script(new Script("doc['values'].values")).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValues, maxValues, sigDigits); + } + + @Override + @Test + public void testScript_MultiValued_WithParams() throws Exception { + Map params = new HashMap<>(); + params.put("dec", 1); + final double[] pcts = randomPercentiles(); + int sigDigits = randomSignificantDigits(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + percentiles("percentiles") + .numberOfSignificantValueDigits(sigDigits) + .method(PercentilesMethod.HDR) + .script(new Script( + "List values = doc['values'].values; double[] res = new double[values.size()]; for (int i = 0; i < res.length; i++) { res[i] = values.get(i) - dec; }; return res;", + ScriptType.INLINE, null, params)).percentiles(pcts)).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + final Percentiles percentiles = searchResponse.getAggregations().get("percentiles"); + assertConsistent(pcts, percentiles, minValues - 1, maxValues - 1, sigDigits); + } + + @Test + public void testOrderBySubAggregation() { + int sigDigits = randomSignificantDigits(); + boolean asc = randomBoolean(); + SearchResponse searchResponse = client() + .prepareSearch("idx") + .setQuery(matchAllQuery()) + .addAggregation( + histogram("histo").field("value").interval(2l) + .subAggregation( + percentiles("percentiles").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(sigDigits) + .percentiles(99)) + .order(Order.aggregation("percentiles", "99", asc))).execute().actionGet(); + + assertThat(searchResponse.getHits().getTotalHits(), equalTo(10l)); + + Histogram histo = searchResponse.getAggregations().get("histo"); + double previous = asc ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; + for (Histogram.Bucket bucket : histo.getBuckets()) { + Percentiles percentiles = bucket.getAggregations().get("percentiles"); + double p99 = percentiles.percentile(99); + if (asc) { + assertThat(p99, greaterThanOrEqualTo(previous)); + } else { + assertThat(p99, lessThanOrEqualTo(previous)); + } + previous = p99; + } + } + +} \ No newline at end of file diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksTests.java similarity index 99% rename from core/src/test/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksTests.java rename to core/src/test/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksTests.java index b59c11ac18e..7ef236aece6 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksTests.java @@ -50,7 +50,7 @@ import static org.hamcrest.Matchers.sameInstance; /** * */ -public class PercentileRanksTests extends AbstractNumericTests { +public class TDigestPercentileRanksTests extends AbstractNumericTests { private static double[] randomPercents(long minValue, long maxValue) { @@ -70,7 +70,7 @@ public class PercentileRanksTests extends AbstractNumericTests { } } Arrays.sort(percents); - Loggers.getLogger(PercentileRanksTests.class).info("Using percentiles={}", Arrays.toString(percents)); + Loggers.getLogger(TDigestPercentileRanksTests.class).info("Using percentiles={}", Arrays.toString(percents)); return percents; } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/PercentilesTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesTests.java similarity index 99% rename from core/src/test/java/org/elasticsearch/search/aggregations/metrics/PercentilesTests.java rename to core/src/test/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesTests.java index ed9c3c33b82..aeeb40b1382 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/PercentilesTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesTests.java @@ -50,7 +50,7 @@ import static org.hamcrest.Matchers.sameInstance; /** * */ -public class PercentilesTests extends AbstractNumericTests { +public class TDigestPercentilesTests extends AbstractNumericTests { private static double[] randomPercentiles() { final int length = randomIntBetween(1, 20); @@ -69,7 +69,7 @@ public class PercentilesTests extends AbstractNumericTests { } } Arrays.sort(percentiles); - Loggers.getLogger(PercentilesTests.class).info("Using percentiles={}", Arrays.toString(percentiles)); + Loggers.getLogger(TDigestPercentilesTests.class).info("Using percentiles={}", Arrays.toString(percentiles)); return percentiles; } diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index ecad363886d..5357d00461e 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -213,6 +213,40 @@ of data which arrives sorted and in-order) the default settings will produce a TDigest roughly 64KB in size. In practice data tends to be more random and the TDigest will use less memory. +==== HDR Histogram + +experimental[] + +https://github.com/HdrHistogram/HdrHistogram[HDR Histogram] (High Dynamic Range Histogram) is an alternative implementation +that can be useful when calculating percentiles for latency measurements as it can be faster than the t-digest implementation +with the trade-off of a larger memory footprint. This implementation maintains a fixed worse-case percentage error (specified +as a number of significant digits). This means that if data is recorded with values from 1 microsecond up to 1 hour +(3,600,000,000 microseconds) in a histogram set to 3 significant digits, it will maintain a value resolution of 1 microsecond +for values up to 1 millisecond and 3.6 seconds (or better) for the maximum tracked value (1 hour). + +The HDR Histogram can be used by specifying the `method` parameter in the request: + +[source,js] +-------------------------------------------------- +{ + "aggs" : { + "load_time_outlier" : { + "percentiles" : { + "field" : "load_time", + "percents" : [95, 99, 99.9], + "method" : "hdr", <1> + "number_of_significant_value_digits" : 3 <2> + } + } + } +} +-------------------------------------------------- +<1> The `method` parameter is set to `hdr` to indicate that HDR Histogram should be used to calculate the percentiles +<2> `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits + +The HDRHistogram only supports positive values and will error if it is passed a negative value. It is also not a good idea to use +the HDRHistogram if the range of values is unknown as this could lead to high memory usage. + ==== Missing value The `missing` parameter defines how documents that are missing a value should be treated. diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc index 5da59061e0b..759abb22be5 100644 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc @@ -110,6 +110,40 @@ This will interpret the `script` parameter as an `inline` script with the defaul TIP: for indexed scripts replace the `file` parameter with an `id` parameter. +==== HDR Histogram + +experimental[] + +https://github.com/HdrHistogram/HdrHistogram[HDR Histogram] (High Dynamic Range Histogram) is an alternative implementation +that can be useful when calculating percentile ranks for latency measurements as it can be faster than the t-digest implementation +with the trade-off of a larger memory footprint. This implementation maintains a fixed worse-case percentage error (specified as a +number of significant digits). This means that if data is recorded with values from 1 microsecond up to 1 hour (3,600,000,000 +microseconds) in a histogram set to 3 significant digits, it will maintain a value resolution of 1 microsecond for values up to +1 millisecond and 3.6 seconds (or better) for the maximum tracked value (1 hour). + +The HDR Histogram can be used by specifying the `method` parameter in the request: + +[source,js] +-------------------------------------------------- +{ + "aggs" : { + "load_time_outlier" : { + "percentile_ranks" : { + "field" : "load_time", + "values" : [15, 30], + "method" : "hdr", <1> + "number_of_significant_value_digits" : 3 <2> + } + } + } +} +-------------------------------------------------- +<1> The `method` parameter is set to `hdr` to indicate that HDR Histogram should be used to calculate the percentile_ranks +<2> `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits + +The HDRHistogram only supports positive values and will error if it is passed a negative value. It is also not a good idea to use +the HDRHistogram if the range of values is unknown as this could lead to high memory usage. + ==== Missing value The `missing` parameter defines how documents that are missing a value should be treated. diff --git a/pom.xml b/pom.xml index 5df506568e6..e4e9150bbcd 100644 --- a/pom.xml +++ b/pom.xml @@ -394,6 +394,12 @@ 3.0 + + org.hdrhistogram + HdrHistogram + 2.1.5 + + org.apache.commons commons-lang3