From ecdcc2df9283446a6e8a876845835e5040247500 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 18 Mar 2014 14:44:12 +0100 Subject: [PATCH] Fix cardinality memory-usage considerations. Default precision was computed based on the number of MULTI_BUCKET parents instead of PER_BUCKET. The ordinals-based execution mode was almost always used although ordinals might have non-negligible memory usage compared to the counters. Close #5452 --- .../cardinality/CardinalityAggregator.java | 15 ++++++++++++++- .../cardinality/CardinalityAggregatorFactory.java | 4 ++-- .../metrics/cardinality/HyperLogLogPlusPlus.java | 7 +++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregator.java index 7323e255334..1bbea3637ed 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregator.java @@ -23,6 +23,7 @@ import com.google.common.base.Preconditions; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lease.Releasable; @@ -93,7 +94,10 @@ public class CardinalityAggregator extends MetricsAggregator.SingleValue { if (bytesValues instanceof BytesValues.WithOrdinals) { BytesValues.WithOrdinals values = (BytesValues.WithOrdinals) bytesValues; final long maxOrd = values.ordinals().getMaxOrd(); - if (maxOrd <= reader.reader().maxDoc()) { + final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd); + final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision); + // only use ordinals if they don't increase memory usage by more than 25% + if (ordinalsMemoryUsage < countsMemoryUsage / 4) { return new OrdinalsCollector(counts, values, bigArrays); } } @@ -195,6 +199,15 @@ public class CardinalityAggregator extends MetricsAggregator.SingleValue { private static class OrdinalsCollector implements Collector { + private static final long SHALLOW_FIXEDBITSET_SIZE = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class); + + /** + * Return an approximate memory overhead per bucket for this collector. + */ + public static long memoryOverhead(long maxOrd) { + return RamUsageEstimator.NUM_BYTES_OBJECT_REF + SHALLOW_FIXEDBITSET_SIZE + (maxOrd + 7) / 8; // 1 bit per ord + } + private final BigArrays bigArrays; private final BytesValues.WithOrdinals values; private final Ordinals.Docs ordinals; diff --git a/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java index 096b12eaae2..9c26083fdc6 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/CardinalityAggregatorFactory.java @@ -64,8 +64,8 @@ final class CardinalityAggregatorFactory extends ValueSourceAggregatorFactory