From b7308aa03cdf87d12a0ff781b2bbb7b6222fcdad Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 1 Feb 2019 09:35:46 +0100 Subject: [PATCH] Don't load global ordinals with the `map` execution_hint (#37833) The terms aggregator loads the global ordinals to retrieve the cardinality of the field to aggregate on. This information is then used to select the strategy to use for the aggregation (breadth_first or depth_first). However this should be avoided if the execution_hint is explicitly set to map since this mode doesn't really need the global ordinals. Since we still need the cardinality of the field this change picks the maximum cardinality in the segments as an estimation of the total cardinality to select the strategy to use (breadth_first or depth_first). This estimation is only used if the execution hint is set to map, otherwise the global ordinals are still used to retrieve the accurate cardinality. Closes #37705 --- .../test/search.aggregation/20_terms.yml | 71 +++++++++++++++++++ .../bucket/terms/TermsAggregatorFactory.java | 21 ++++-- 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml index d442672bf8b..88e0ecff296 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -700,3 +700,74 @@ setup: - is_false: aggregations.str_terms.buckets.1.key_as_string - match: { aggregations.str_terms.buckets.1.doc_count: 2 } + +--- +"Global ordinals are not loaded with the map execution hint": + + - skip: + version: " - 6.99.99" + reason: bug fixed in 7.0 + + - do: + index: + refresh: true + index: test_1 + id: 1 + routing: 1 + body: { "str": "abc" } + + - do: + index: + refresh: true + index: test_1 + id: 2 + routing: 1 + body: { "str": "abc" } + + - do: + index: + refresh: true + index: test_1 + id: 3 + routing: 1 + body: { "str": "bcd" } + + - do: + indices.refresh: {} + + - do: + search: + index: test_1 + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "map" } } } } + + - match: { hits.total.value: 3} + - length: { aggregations.str_terms.buckets: 2 } + + - do: + indices.stats: + index: test_1 + metric: fielddata + fielddata_fields: str + + - match: { indices.test_1.total.fielddata.memory_size_in_bytes: 0} + + - do: + search: + index: test_1 + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "global_ordinals" } } } } + + - match: { hits.total.value: 3} + - length: { aggregations.str_terms.buckets: 2 } + + - do: + indices.stats: + index: test_1 + metric: fielddata + fielddata_fields: str + + - gt: { indices.test_1.total.fielddata.memory_size_in_bytes: 0} + + + + + diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 1ff0efd3e83..346da32763b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.logging.log4j.LogManager; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.logging.DeprecationLogger; @@ -133,7 +134,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory pipelineAggregators, Map metaData) throws IOException { - final long maxOrd = getMaxOrd(valuesSource, context.searcher()); + final long maxOrd = getMaxOrd(context.searcher(), valuesSource, ExecutionMode.GLOBAL_ORDINALS); assert maxOrd != -1; final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());