Allow unmapped fields in composite aggregations (#35331)

Today the `composite` aggregation throws an error if a source targets an
unmapped field and `missing_bucket` is set to false. Documents without a
value for a source cannot produce any bucket if `missing_bucket` is not
activated so the error is a shortcut to say that the response will be empty.
However this is not consistent with the `terms` aggregation which accepts
unmapped field by default even if the response is also guaranteed to be empty.
This commit removes this restriction, if a source contains an unmapped field
we now return an empty response (no buckets).

Closes #35317
This commit is contained in:
Jim Ferenczi 2018-11-08 09:30:52 +01:00 committed by GitHub
parent 1703a61fec
commit 891fdda68e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 75 additions and 20 deletions

View File

@ -25,7 +25,6 @@ import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.ToXContentFragment;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.script.Script; import org.elasticsearch.script.Script;
import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
@ -304,13 +303,6 @@ public abstract class CompositeValuesSourceBuilder<AB extends CompositeValuesSou
public final CompositeValuesSourceConfig build(SearchContext context) throws IOException { public final CompositeValuesSourceConfig build(SearchContext context) throws IOException {
ValuesSourceConfig<?> config = ValuesSourceConfig.resolve(context.getQueryShardContext(), ValuesSourceConfig<?> config = ValuesSourceConfig.resolve(context.getQueryShardContext(),
valueType, field, script, null,null, format); valueType, field, script, null,null, format);
if (config.unmapped() && field != null && missingBucket == false) {
// this source cannot produce any values so we refuse to build
// since composite buckets are not created on null values by default.
throw new QueryShardException(context.getQueryShardContext(),
"failed to find field [" + field + "] and [missing_bucket] is not set");
}
return innerBuild(context, config); return innerBuild(context, config);
} }
} }

View File

@ -30,7 +30,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
@ -47,7 +46,6 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
@ -130,16 +128,81 @@ public class CompositeAggregatorTests extends AggregatorTestCase {
} }
public void testUnmappedField() throws Exception { public void testUnmappedField() throws Exception {
TermsValuesSourceBuilder terms = new TermsValuesSourceBuilder(randomAlphaOfLengthBetween(5, 10)) final List<Map<String, List<Object>>> dataset = new ArrayList<>();
.field("unknown"); dataset.addAll(
CompositeAggregationBuilder builder = new CompositeAggregationBuilder("test", Collections.singletonList(terms)); Arrays.asList(
IndexSearcher searcher = new IndexSearcher(new MultiReader()); createDocument("keyword", "a"),
QueryShardException exc = createDocument("keyword", "c"),
expectThrows(QueryShardException.class, () -> createAggregatorFactory(builder, searcher)); createDocument("keyword", "a"),
assertThat(exc.getMessage(), containsString("failed to find field [unknown] and [missing_bucket] is not set")); createDocument("keyword", "d"),
// should work when missing_bucket is set createDocument("keyword", "c")
terms.missingBucket(true); )
createAggregatorFactory(builder, searcher); );
testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset,
() -> new CompositeAggregationBuilder("name",
Arrays.asList(
new TermsValuesSourceBuilder("unmapped").field("unmapped")
)
),
(result) -> {
assertEquals(0, result.getBuckets().size());
}
);
testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset,
() -> new CompositeAggregationBuilder("name",
Arrays.asList(
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true)
)
),
(result) -> {
assertEquals(1, result.getBuckets().size());
assertEquals("{unmapped=null}", result.afterKey().toString());
assertEquals("{unmapped=null}", result.getBuckets().get(0).getKeyAsString());
assertEquals(5L, result.getBuckets().get(0).getDocCount());
}
);
testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset,
() -> new CompositeAggregationBuilder("name",
Arrays.asList(
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true)
)).aggregateAfter(Collections.singletonMap("unmapped", null)),
(result) -> {
assertEquals(0, result.getBuckets().size());
}
);
testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset,
() -> new CompositeAggregationBuilder("name",
Arrays.asList(
new TermsValuesSourceBuilder("keyword").field("keyword"),
new TermsValuesSourceBuilder("unmapped").field("unmapped")
)
),
(result) -> {
assertEquals(0, result.getBuckets().size());
}
);
testSearchCase(Arrays.asList(new MatchAllDocsQuery(), new DocValuesFieldExistsQuery("keyword")), dataset,
() -> new CompositeAggregationBuilder("name",
Arrays.asList(
new TermsValuesSourceBuilder("keyword").field("keyword"),
new TermsValuesSourceBuilder("unmapped").field("unmapped").missingBucket(true)
)
),
(result) -> {
assertEquals(3, result.getBuckets().size());
assertEquals("{keyword=d, unmapped=null}", result.afterKey().toString());
assertEquals("{keyword=a, unmapped=null}", result.getBuckets().get(0).getKeyAsString());
assertEquals(2L, result.getBuckets().get(0).getDocCount());
assertEquals("{keyword=c, unmapped=null}", result.getBuckets().get(1).getKeyAsString());
assertEquals(2L, result.getBuckets().get(1).getDocCount());
assertEquals("{keyword=d, unmapped=null}", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());
}
);
} }
public void testWithKeyword() throws Exception { public void testWithKeyword() throws Exception {