Use `global_ordinals_hash` execution mode when sorting by sub aggregations. (#26014)
This is a safer default since sorting by sub aggregations prevents these aggregations from being deferred. `global_ordinals_hash` will at least make sure that we do not use memory for buckets that are not collected. Closes #24359
This commit is contained in:
parent
4e27edd7a1
commit
0bf8a354a0
|
@ -30,6 +30,7 @@ import org.elasticsearch.search.aggregations.AggregatorFactory;
|
|||
import org.elasticsearch.search.aggregations.BucketOrder;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.InternalOrder;
|
||||
import org.elasticsearch.search.aggregations.InternalOrder.CompoundOrder;
|
||||
import org.elasticsearch.search.aggregations.NonCollectingAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketUtils;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds;
|
||||
|
@ -93,6 +94,17 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
};
|
||||
}
|
||||
|
||||
private static boolean isAggregationSort(BucketOrder order) {
|
||||
if (order instanceof InternalOrder.Aggregation) {
|
||||
return true;
|
||||
} else if (order instanceof InternalOrder.CompoundOrder) {
|
||||
InternalOrder.CompoundOrder compoundOrder = (CompoundOrder) order;
|
||||
return compoundOrder.orderElements().stream().anyMatch(TermsAggregatorFactory::isAggregationSort);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket,
|
||||
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
|
||||
|
@ -139,10 +151,17 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
// to be unbounded and most instances may only aggregate few
|
||||
// documents, so use hashed based
|
||||
// global ordinals to keep the bucket ords dense.
|
||||
|
||||
// Additionally, if using partitioned terms the regular global
|
||||
// ordinals would be sparse so we opt for hash
|
||||
|
||||
// Finally if we are sorting by sub aggregations, then these
|
||||
// aggregations cannot be deferred, so global_ordinals_hash is
|
||||
// a safer choice as we won't use memory for sub aggregations
|
||||
// for buckets that are not collected.
|
||||
if (Aggregator.descendsFromBucketAggregator(parent) ||
|
||||
(includeExclude != null && includeExclude.isPartitionBased())) {
|
||||
(includeExclude != null && includeExclude.isPartitionBased()) ||
|
||||
isAggregationSort(order)) {
|
||||
execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
|
||||
} else {
|
||||
if (factories == AggregatorFactories.EMPTY) {
|
||||
|
|
Loading…
Reference in New Issue