Fail variable_width_histogram that collects from many (#58619) (#58780)

Adds an explicit check to `variable_width_histogram` to stop it from
trying to collect from many buckets because it can't. I tried to make it
do so but that is more than an afternoon's project, sadly. So for now we
just disallow it.

Relates to #42035
This commit is contained in:
Nik Everett 2020-06-30 18:26:45 -04:00 committed by GitHub
parent 3aa08fbcde
commit 40850a780d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 64 additions and 1 deletions

View File

@ -57,6 +57,8 @@ Response:
-------------------------------------------------- --------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
IMPORTANT: This aggregation cannot currently be nested under any aggregation that collects from more than a single bucket.
==== Clustering Algorithm ==== Clustering Algorithm
Each shard fetches the first `initial_buffer` documents and stores them in memory. Once the buffer is full, these documents Each shard fetches the first `initial_buffer` documents and stores them in memory. Once the buffer is full, these documents
are sorted and linearly separated into `3/4 * shard_size buckets`. are sorted and linearly separated into `3/4 * shard_size buckets`.

View File

@ -65,6 +65,13 @@ public class VariableWidthHistogramAggregatorFactory extends ValuesSourceAggrega
Aggregator parent, Aggregator parent,
boolean collectsFromSingleBucket, boolean collectsFromSingleBucket,
Map<String, Object> metadata) throws IOException{ Map<String, Object> metadata) throws IOException{
if (collectsFromSingleBucket == false) {
throw new IllegalArgumentException(
"["
+ VariableWidthHistogramAggregationBuilder.NAME
+ "] cannot be nested inside an aggregation that collects more than a single bucket."
);
}
AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config, AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config,
VariableWidthHistogramAggregationBuilder.NAME); VariableWidthHistogramAggregationBuilder.NAME);
if (aggregatorSupplier instanceof VariableWidthHistogramAggregatorSupplier == false) { if (aggregatorSupplier instanceof VariableWidthHistogramAggregatorSupplier == false) {

View File

@ -31,13 +31,16 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms; import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms;
import org.elasticsearch.search.aggregations.bucket.terms.LongTerms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.InternalStats; import org.elasticsearch.search.aggregations.metrics.InternalStats;
import org.elasticsearch.search.aggregations.metrics.StatsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.StatsAggregationBuilder;
@ -51,12 +54,15 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.function.Consumer; import java.util.function.Consumer;
import static java.util.stream.Collectors.toList;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase { public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase {
private static final String NUMERIC_FIELD = "numeric"; private static final String NUMERIC_FIELD = "numeric";
private static final Query DEFAULT_QUERY = new MatchAllDocsQuery(); private static final Query DEFAULT_QUERY = new MatchAllDocsQuery();
private VariableWidthHistogramAggregationBuilder aggregationBuilder;
public void testNoDocs() throws Exception{ public void testNoDocs() throws Exception{
final List<Number> dataset = Arrays.asList(); final List<Number> dataset = Arrays.asList();
@ -424,6 +430,54 @@ public class VariableWidthHistogramAggregatorTests extends AggregatorTestCase {
} }
public void testAsSubAggregation() throws IOException {
AggregationBuilder builder = new TermsAggregationBuilder("t").field("t")
.subAggregation(new VariableWidthHistogramAggregationBuilder("v").field("v").setNumBuckets(2));
CheckedConsumer<RandomIndexWriter, IOException> buildIndex = iw -> {
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 1))
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 10))
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 1), new SortedNumericDocValuesField("v", 11))
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 20))
);
iw.addDocument(
org.elasticsearch.common.collect.List.of(new SortedNumericDocValuesField("t", 2), new SortedNumericDocValuesField("v", 30))
);
};
Consumer<LongTerms> verify = terms -> {
/*
* This is what the result should be but it never gets called because of
* the explicit check. We do expect to remove the check in the future,
* thus, this stays.
*/
LongTerms.Bucket t1 = terms.getBucketByKey("1");
InternalVariableWidthHistogram v1 = t1.getAggregations().get("v");
assertThat(
v1.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()),
equalTo(org.elasticsearch.common.collect.List.of(1.0, 10.5))
);
LongTerms.Bucket t2 = terms.getBucketByKey("1");
InternalVariableWidthHistogram v2 = t2.getAggregations().get("v");
assertThat(
v2.getBuckets().stream().map(InternalVariableWidthHistogram.Bucket::centroid).collect(toList()),
equalTo(org.elasticsearch.common.collect.List.of(20.0, 30))
);
};
Exception e = expectThrows(
IllegalArgumentException.class,
() -> testCase(builder, DEFAULT_QUERY, buildIndex, verify, longField("t"), longField("v"))
);
assertThat(e.getMessage(), containsString("cannot be nested"));
}
private void testSearchCase(final Query query, final List<Number> dataset, boolean multipleSegments, private void testSearchCase(final Query query, final List<Number> dataset, boolean multipleSegments,
final Consumer<VariableWidthHistogramAggregationBuilder> configure, final Consumer<VariableWidthHistogramAggregationBuilder> configure,