Don't load global ordinals with the `map` execution_hint (#37833)
The terms aggregator loads the global ordinals to retrieve the cardinality of the field to aggregate on. This information is then used to select the strategy to use for the aggregation (breadth_first or depth_first). However this should be avoided if the execution_hint is explicitly set to map since this mode doesn't really need the global ordinals. Since we still need the cardinality of the field this change picks the maximum cardinality in the segments as an estimation of the total cardinality to select the strategy to use (breadth_first or depth_first). This estimation is only used if the execution hint is set to map, otherwise the global ordinals are still used to retrieve the accurate cardinality. Closes #37705
This commit is contained in:
parent
23f00e3676
commit
b7308aa03c
|
@ -700,3 +700,74 @@ setup:
|
|||
- is_false: aggregations.str_terms.buckets.1.key_as_string
|
||||
|
||||
- match: { aggregations.str_terms.buckets.1.doc_count: 2 }
|
||||
|
||||
---
|
||||
"Global ordinals are not loaded with the map execution hint":
|
||||
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: bug fixed in 7.0
|
||||
|
||||
- do:
|
||||
index:
|
||||
refresh: true
|
||||
index: test_1
|
||||
id: 1
|
||||
routing: 1
|
||||
body: { "str": "abc" }
|
||||
|
||||
- do:
|
||||
index:
|
||||
refresh: true
|
||||
index: test_1
|
||||
id: 2
|
||||
routing: 1
|
||||
body: { "str": "abc" }
|
||||
|
||||
- do:
|
||||
index:
|
||||
refresh: true
|
||||
index: test_1
|
||||
id: 3
|
||||
routing: 1
|
||||
body: { "str": "bcd" }
|
||||
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_1
|
||||
body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "map" } } } }
|
||||
|
||||
- match: { hits.total.value: 3}
|
||||
- length: { aggregations.str_terms.buckets: 2 }
|
||||
|
||||
- do:
|
||||
indices.stats:
|
||||
index: test_1
|
||||
metric: fielddata
|
||||
fielddata_fields: str
|
||||
|
||||
- match: { indices.test_1.total.fielddata.memory_size_in_bytes: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_1
|
||||
body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "global_ordinals" } } } }
|
||||
|
||||
- match: { hits.total.value: 3}
|
||||
- length: { aggregations.str_terms.buckets: 2 }
|
||||
|
||||
- do:
|
||||
indices.stats:
|
||||
index: test_1
|
||||
metric: fielddata
|
||||
fielddata_fields: str
|
||||
|
||||
- gt: { indices.test_1.total.fielddata.memory_size_in_bytes: 0}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
|
@ -133,7 +134,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) {
|
||||
execution = ExecutionMode.MAP;
|
||||
}
|
||||
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
|
||||
final long maxOrd = getMaxOrd(context.searcher(), valuesSource, execution);
|
||||
if (execution == null) {
|
||||
execution = ExecutionMode.GLOBAL_ORDINALS;
|
||||
}
|
||||
|
@ -207,13 +208,23 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the maximum global ordinal value for the provided {@link ValuesSource} or -1
|
||||
* Get the maximum ordinal value for the provided {@link ValuesSource} or -1
|
||||
* if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}.
|
||||
*/
|
||||
static long getMaxOrd(ValuesSource source, IndexSearcher searcher) throws IOException {
|
||||
static long getMaxOrd(IndexSearcher searcher, ValuesSource source, ExecutionMode executionMode) throws IOException {
|
||||
if (source instanceof ValuesSource.Bytes.WithOrdinals) {
|
||||
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) source;
|
||||
if (executionMode == ExecutionMode.MAP) {
|
||||
// global ordinals are not requested so we don't load them
|
||||
// and return the biggest cardinality per segment instead.
|
||||
long maxOrd = -1;
|
||||
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
|
||||
maxOrd = Math.max(maxOrd, valueSourceWithOrdinals.ordinalsValues(leaf).getValueCount());
|
||||
}
|
||||
return maxOrd;
|
||||
} else {
|
||||
return valueSourceWithOrdinals.globalMaxOrd(searcher);
|
||||
}
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
@ -258,7 +269,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
|
||||
final long maxOrd = getMaxOrd(context.searcher(), valuesSource, ExecutionMode.GLOBAL_ORDINALS);
|
||||
assert maxOrd != -1;
|
||||
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
|
||||
|
||||
|
|
Loading…
Reference in New Issue