Fix cardinality memory-usage considerations.

Default precision was computed based on the number of MULTI_BUCKET parents
instead of PER_BUCKET.

The ordinals-based execution mode was almost always used although ordinals
might have non-negligible memory usage compared to the counters.

Close #5452
This commit is contained in:
Adrien Grand 2014-03-18 14:44:12 +01:00
parent 85c3c6fe62
commit ecdcc2df92
3 changed files with 23 additions and 3 deletions

View File

@ -23,6 +23,7 @@ import com.google.common.base.Preconditions;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasable;
@ -93,7 +94,10 @@ public class CardinalityAggregator extends MetricsAggregator.SingleValue {
if (bytesValues instanceof BytesValues.WithOrdinals) { if (bytesValues instanceof BytesValues.WithOrdinals) {
BytesValues.WithOrdinals values = (BytesValues.WithOrdinals) bytesValues; BytesValues.WithOrdinals values = (BytesValues.WithOrdinals) bytesValues;
final long maxOrd = values.ordinals().getMaxOrd(); final long maxOrd = values.ordinals().getMaxOrd();
if (maxOrd <= reader.reader().maxDoc()) { final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd);
final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision);
// only use ordinals if they don't increase memory usage by more than 25%
if (ordinalsMemoryUsage < countsMemoryUsage / 4) {
return new OrdinalsCollector(counts, values, bigArrays); return new OrdinalsCollector(counts, values, bigArrays);
} }
} }
@ -195,6 +199,15 @@ public class CardinalityAggregator extends MetricsAggregator.SingleValue {
private static class OrdinalsCollector implements Collector { private static class OrdinalsCollector implements Collector {
private static final long SHALLOW_FIXEDBITSET_SIZE = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
/**
* Return an approximate memory overhead per bucket for this collector.
*/
public static long memoryOverhead(long maxOrd) {
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + SHALLOW_FIXEDBITSET_SIZE + (maxOrd + 7) / 8; // 1 bit per ord
}
private final BigArrays bigArrays; private final BigArrays bigArrays;
private final BytesValues.WithOrdinals values; private final BytesValues.WithOrdinals values;
private final Ordinals.Docs ordinals; private final Ordinals.Docs ordinals;

View File

@ -64,8 +64,8 @@ final class CardinalityAggregatorFactory extends ValueSourceAggregatorFactory<Va
private int defaultPrecision(Aggregator parent) { private int defaultPrecision(Aggregator parent) {
int precision = HyperLogLogPlusPlus.DEFAULT_PRECISION; int precision = HyperLogLogPlusPlus.DEFAULT_PRECISION;
while (parent != null) { while (parent != null) {
if (parent.bucketAggregationMode() == BucketAggregationMode.MULTI_BUCKETS) { if (parent.bucketAggregationMode() == BucketAggregationMode.PER_BUCKET) {
// if the parent is a multi-bucket aggregator, we substract 5 to the precision, // if the parent is a per-bucket aggregator, we substract 5 to the precision,
// which will effectively divide the memory usage of each counter by 32 // which will effectively divide the memory usage of each counter by 32
precision -= 5; precision -= 5;
} }

View File

@ -75,6 +75,13 @@ public final class HyperLogLogPlusPlus implements Releasable {
return precision; return precision;
} }
/**
* Return the expected per-bucket memory usage for the given precision.
*/
public static long memoryUsage(int precision) {
return 1L << precision;
}
// these static tables come from the appendix of the paper // these static tables come from the appendix of the paper
private static final double[][] RAW_ESTIMATE_DATA = { private static final double[][] RAW_ESTIMATE_DATA = {
// precision 4 // precision 4