From 40850a780d86ca687c9d3d4e3ca27b34238274e1 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 30 Jun 2020 18:26:45 -0400 Subject: [PATCH] Fail variable_width_histogram that collects from many (#58619) (#58780) Adds an explicit check to `variable_width_histogram` to stop it from trying to collect from many buckets because it can't. I tried to make it do so but that is more than an afternoon's project, sadly. So for now we just disallow it. Relates to #42035 --- ...ariablewidthhistogram-aggregation.asciidoc | 2 + ...riableWidthHistogramAggregatorFactory.java | 7 +++ ...VariableWidthHistogramAggregatorTests.java | 56 ++++++++++++++++++- 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc index b3892e9e584..1f5435891b9 100644 --- a/docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/variablewidthhistogram-aggregation.asciidoc @@ -57,6 +57,8 @@ Response: -------------------------------------------------- // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] +IMPORTANT: This aggregation cannot currently be nested under any aggregation that collects from more than a single bucket. + ==== Clustering Algorithm Each shard fetches the first `initial_buffer` documents and stores them in memory. Once the buffer is full, these documents are sorted and linearly separated into `3/4 * shard_size buckets`. diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorFactory.java index 2b60fac8c68..cefda9ffc0f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorFactory.java @@ -65,6 +65,13 @@ public class VariableWidthHistogramAggregatorFactory extends ValuesSourceAggrega Aggregator parent, boolean collectsFromSingleBucket, Map metadata) throws IOException{ + if (collectsFromSingleBucket == false) { + throw new IllegalArgumentException( + "[" + + VariableWidthHistogramAggregationBuilder.NAME + + "] cannot be nested inside an aggregation that collects more than a single bucket." + ); + } AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config, VariableWidthHistogramAggregationBuilder.NAME); if (aggregatorSupplier instanceof VariableWidthHistogramAggregatorSupplier == false) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorTests.java index 761e5abaac3..48aceb9326a 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregatorTests.java @@ -31,13 +31,16 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms; +import org.elasticsearch.search.aggregations.bucket.terms.LongTerms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.InternalStats; import org.elasticsearch.search.aggregations.metrics.StatsAggregationBuilder; @@ -51,12 +54,15 @@ import java.util.List; import java.util.Map; import java.util.function.Consumer; +import static java.util.stream.Collectors.toList; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase { private static final String NUMERIC_FIELD = "numeric"; private static final Query DEFAULT_QUERY = new MatchAllDocsQuery(); - private VariableWidthHistogramAggregationBuilder aggregationBuilder; public void testNoDocs() throws Exception{ final List dataset = Arrays.asList(); @@ -424,6 +430,54 @@ public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase { } + public void testAsSubAggregation() throws IOException { + AggregationBuilder builder = new TermsAggregationBuilder("t").field("t") + .subAggregation(new VariableWidthHistogramAggregationBuilder("v").field("v").setNumBuckets(2)); + CheckedConsumer buildIndex = iw -> { + iw.addDocument( + org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 1)) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 10)) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 11)) + ); + + iw.addDocument( + org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 20)) + ); + iw.addDocument( + org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 30)) + ); + }; + Consumer verify = terms -> { + /* + * This is what the result should be but it never gets called because of + * the explicit check. We do expect to remove the check in the future, + * thus, this stays. + */ + LongTerms.Bucket t1 = terms.getBucketByKey("1"); + InternalVariableWidthHistogram v1 = t1.getAggregations().get("v"); + assertThat( + v1.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()), + equalTo(org.elasticsearch.common.collect.List.of(1.0, 10.5)) + ); + + LongTerms.Bucket t2 = terms.getBucketByKey("1"); + InternalVariableWidthHistogram v2 = t2.getAggregations().get("v"); + assertThat( + v2.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()), + equalTo(org.elasticsearch.common.collect.List.of(20.0, 30)) + ); + }; + Exception e = expectThrows( + IllegalArgumentException.class, + () -> testCase(builder, DEFAULT_QUERY, buildIndex, verify, longField("t"), longField("v")) + ); + assertThat(e.getMessage(), containsString("cannot be nested")); + } + private void testSearchCase(final Query query, final List dataset, boolean multipleSegments, final Consumer configure,