From 891fdda68ee6b2c7437457d386c1c2626cc3ffe8 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 8 Nov 2018 09:30:52 +0100 Subject: [PATCH] Allow unmapped fields in composite aggregations (#35331) Today the `composite` aggregation throws an error if a source targets an unmapped field and `missing_bucket` is set to false. Documents without a value for a source cannot produce any bucket if `missing_bucket` is not activated so the error is a shortcut to say that the response will be empty. However this is not consistent with the `terms` aggregation which accepts unmapped field by default even if the response is also guaranteed to be empty. This commit removes this restriction, if a source contains an unmapped field we now return an empty response (no buckets). Closes #35317 --- .../CompositeValuesSourceBuilder.java | 8 -- .../composite/CompositeAggregatorTests.java | 87 ++++++++++++++++--- 2 files changed, 75 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSourceBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSourceBuilder.java index 01c592a5e01..927534afc85 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSourceBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSourceBuilder.java @@ -25,7 +25,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.script.Script; import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; @@ -304,13 +303,6 @@ public abstract class CompositeValuesSourceBuilder config = ValuesSourceConfig.resolve(context.getQueryShardContext(), valueType, field, script, null,null, format); - - if (config.unmapped() && field != null && missingBucket == false) { - // this source cannot produce any values so we refuse to build - // since composite buckets are not created on null values by default. - throw new QueryShardException(context.getQueryShardContext(), - "failed to find field [" + field + "] and [missing_bucket] is not set"); - } return innerBuild(context, config); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java index 52f6e4227e7..f2cb91673ce 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregatorTests.java @@ -30,7 +30,6 @@ import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.IndexSearcher; @@ -47,7 +46,6 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.NumberFieldMapper; -import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; @@ -130,16 +128,81 @@ public class CompositeAggregatorTests extends AggregatorTestCase { } public void testUnmappedField() throws Exception { - TermsValuesSourceBuilder terms = new TermsValuesSourceBuilder(randomAlphaOfLengthBetween(5, 10)) - .field("unknown"); - CompositeAggregationBuilder builder = new CompositeAggregationBuilder("test", Collections.singletonList(terms)); - IndexSearcher searcher = new IndexSearcher(new MultiReader()); - QueryShardException exc = - expectThrows(QueryShardException.class, () -> createAggregatorFactory(builder, searcher)); - assertThat(exc.getMessage(), containsString("failed to find field [unknown] and [missing_bucket] is not set")); - // should work when missing_bucket is set - terms.missingBucket(true); - createAggregatorFactory(builder, searcher); + final List>> dataset = new ArrayList<>(); + dataset.addAll( + Arrays.asList( + createDocument("keyword", "a"), + createDocument("keyword", "c"), + createDocument("keyword", "a"), + createDocument("keyword", "d"), + createDocument("keyword", "c") + ) + ); + testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset, + () -> new CompositeAggregationBuilder("name", + Arrays.asList( + new TermsValuesSourceBuilder("unmapped").field("unmapped") + ) + ), + (result) -> { + assertEquals(0, result.getBuckets().size()); + } + ); + + testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset, + () -> new CompositeAggregationBuilder("name", + Arrays.asList( + new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true) + ) + ), + (result) -> { + assertEquals(1, result.getBuckets().size()); + assertEquals("{unmapped=null}", result.afterKey().toString()); + assertEquals("{unmapped=null}", result.getBuckets().get(0).getKeyAsString()); + assertEquals(5L, result.getBuckets().get(0).getDocCount()); + } + ); + + testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset, + () -> new CompositeAggregationBuilder("name", + Arrays.asList( + new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true) + )).aggregateAfter(Collections.singletonMap("unmapped", null)), + (result) -> { + assertEquals(0, result.getBuckets().size()); + } + ); + + testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset, + () -> new CompositeAggregationBuilder("name", + Arrays.asList( + new TermsValuesSourceBuilder("keyword").field("keyword"), + new TermsValuesSourceBuilder("unmapped").field("unmapped") + ) + ), + (result) -> { + assertEquals(0, result.getBuckets().size()); + } + ); + + testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset, + () -> new CompositeAggregationBuilder("name", + Arrays.asList( + new TermsValuesSourceBuilder("keyword").field("keyword"), + new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true) + ) + ), + (result) -> { + assertEquals(3, result.getBuckets().size()); + assertEquals("{keyword=d, unmapped=null}", result.afterKey().toString()); + assertEquals("{keyword=a, unmapped=null}", result.getBuckets().get(0).getKeyAsString()); + assertEquals(2L, result.getBuckets().get(0).getDocCount()); + assertEquals("{keyword=c, unmapped=null}", result.getBuckets().get(1).getKeyAsString()); + assertEquals(2L, result.getBuckets().get(1).getDocCount()); + assertEquals("{keyword=d, unmapped=null}", result.getBuckets().get(2).getKeyAsString()); + assertEquals(1L, result.getBuckets().get(2).getDocCount()); + } + ); } public void testWithKeyword() throws Exception {