Terms aggs: only use ordinals on low-cardinality fields by default.

Close #5303
This commit is contained in:
Adrien Grand 2014-02-28 22:32:55 +01:00
parent 612e95a321
commit 5008b04cf4
1 changed files with 27 additions and 1 deletions

View File

@ -18,6 +18,7 @@
*/ */
package org.elasticsearch.search.aggregations.bucket.terms; package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.AtomicReaderContext;
import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
@ -70,6 +71,31 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
} }
} }
private boolean shouldUseOrdinals(Aggregator parent, ValuesSource valuesSource, AggregationContext context) {
// if there is a parent bucket aggregator the number of instances of this aggregator is going to be unbounded and most instances
// may only aggregate few documents, so don't use ordinals
if (hasParentBucketAggregator(parent)) {
return false;
}
// be defensive: if the number of unique values is unknown, don't use ordinals
final long maxNumUniqueValues = valuesSource.metaData().maxAtomicUniqueValuesCount();
if (maxNumUniqueValues == -1) {
return false;
}
// if the number of unique values is high compared to the document count, then ordinals are only going to make things slower
int maxDoc = 0;
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().reader().leaves()) {
maxDoc = Math.max(maxDoc, ctx.reader().maxDoc());
}
if (maxNumUniqueValues > (maxDoc >>> 4)) {
return false;
}
return true;
}
@Override @Override
protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) { protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
long estimatedBucketCount = valuesSource.metaData().maxAtomicUniqueValuesCount(); long estimatedBucketCount = valuesSource.metaData().maxAtomicUniqueValuesCount();
@ -98,7 +124,7 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
} }
if (execution == null) { if (execution == null) {
if ((valuesSource instanceof BytesValuesSource.WithOrdinals) if ((valuesSource instanceof BytesValuesSource.WithOrdinals)
&& !hasParentBucketAggregator(parent)) { && shouldUseOrdinals(parent, valuesSource, aggregationContext)) {
execution = EXECUTION_HINT_VALUE_ORDINALS; execution = EXECUTION_HINT_VALUE_ORDINALS;
} else { } else {
execution = EXECUTION_HINT_VALUE_MAP; execution = EXECUTION_HINT_VALUE_MAP;