Calculate precise cardinality upper bounds (#61529) (#61754)

This reworks `CardinalityUpperBound` to support precise estimates while
maintaining most of the public API. This will allow us to make more
informed choices about the data structures that we use in aggregations.
None of those interesting choices come as part of this change, but they
are more possible with it.
This commit is contained in:
Nik Everett 2020-08-31 15:10:02 -04:00 committed by GitHub
parent f39a9bbe19
commit fb84c1f73e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 108 additions and 34 deletions

View File

@ -23,22 +23,29 @@ import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregator;
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
import java.util.function.IntFunction;
/**
* Upper bound of how many {@code owningBucketOrds} that an {@link Aggregator}
* will have to collect into. Just "none", "one", and "many".
* will have to collect into.
*/
public enum CardinalityUpperBound {
public abstract class CardinalityUpperBound {
/**
* {@link Aggregator}s with this cardinality won't collect any data at
* all. For the most part this happens when an aggregation is inside of a
* {@link BucketsAggregator} that is pointing to an unmapped field.
*/
NONE {
public static final CardinalityUpperBound NONE = new CardinalityUpperBound() {
@Override
public CardinalityUpperBound multiply(int bucketCount) {
return NONE;
}
},
@Override
public <R> R map(IntFunction<R> mapper) {
return mapper.apply(0);
}
};
/**
* {@link Aggregator}s with this cardinality will collect be collected
@ -47,26 +54,14 @@ public enum CardinalityUpperBound {
* aggregations like {@link FilterAggregator} or a {@link RangeAggregator}
* configured to collect only a single range.
*/
ONE {
@Override
public CardinalityUpperBound multiply(int bucketCount) {
switch (bucketCount) {
case 0:
return NONE;
case 1:
return ONE;
default:
return MANY;
}
}
},
public static final CardinalityUpperBound ONE = new KnownCardinalityUpperBound(1);
/**
* {@link Aggregator}s with this cardinality may be collected many times.
* Most sub-aggregators of {@link BucketsAggregator}s will have
* this cardinality.
*/
MANY {
public static final CardinalityUpperBound MANY = new CardinalityUpperBound() {
@Override
public CardinalityUpperBound multiply(int bucketCount) {
if (bucketCount == 0) {
@ -74,8 +69,17 @@ public enum CardinalityUpperBound {
}
return MANY;
}
@Override
public <R> R map(IntFunction<R> mapper) {
return mapper.apply(Integer.MAX_VALUE);
}
};
private CardinalityUpperBound() {
// Sealed class
}
/**
* Get the rough measure of the number of buckets a fixed-bucket
* {@link Aggregator} will collect.
@ -84,4 +88,46 @@ public enum CardinalityUpperBound {
* will collect per owning ordinal
*/
public abstract CardinalityUpperBound multiply(int bucketCount);
/**
* Map the cardinality to a value. The argument to the {@code mapper}
* is the estimated cardinality, or {@code Integer.MAX_VALUE} if the
* cardinality is unknown.
*/
public abstract <R> R map(IntFunction<R> mapper);
/**
* Cardinality estimate with a known upper bound.
*/
private static class KnownCardinalityUpperBound extends CardinalityUpperBound {
private final int estimate;
KnownCardinalityUpperBound(int estimate) {
this.estimate = estimate;
}
@Override
public CardinalityUpperBound multiply(int bucketCount) {
if (bucketCount < 0) {
throw new IllegalArgumentException("bucketCount must be positive but was [" + bucketCount + "]");
}
switch (bucketCount) {
case 0:
return NONE;
case 1:
return this;
default:
long newEstimate = (long) estimate * (long) bucketCount;
if (newEstimate >= Integer.MAX_VALUE) {
return MANY;
}
return new KnownCardinalityUpperBound((int) newEstimate);
}
}
@Override
public <R> R map(IntFunction<R> mapper) {
return mapper.apply(estimate);
}
}
}

View File

@ -67,7 +67,7 @@ public class NestedAggregator extends BucketsAggregator implements SingleBucketA
: Queries.newNonNestedFilter(context.mapperService().getIndexSettings().getIndexVersionCreated());
this.parentFilter = context.bitsetFilterCache().getBitSetProducer(parentFilter);
this.childFilter = childObjectMapper.nestedTypeFilter();
this.collectsFromSingleBucket = cardinality != CardinalityUpperBound.MANY;
this.collectsFromSingleBucket = cardinality.map(estimate -> estimate < 2);
}
@Override

View File

@ -34,7 +34,7 @@ public abstract class BytesKeyedBucketOrds implements Releasable {
* Build a {@link LongKeyedBucketOrds}.
*/
public static BytesKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
return cardinality == CardinalityUpperBound.ONE ? new FromSingle(bigArrays) : new FromMany(bigArrays);
return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
}
private BytesKeyedBucketOrds() {}

View File

@ -106,10 +106,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
if (remapGlobalOrds) {
this.collectionStrategy = new RemapGlobalOrds(cardinality);
} else {
if (cardinality == CardinalityUpperBound.MANY) {
throw new AggregationExecutionException("Dense ords don't know how to collect from many buckets");
}
this.collectionStrategy = new DenseGlobalOrds();
this.collectionStrategy = cardinality.map(estimate -> {
if (estimate > 1) {
throw new AggregationExecutionException("Dense ords don't know how to collect from many buckets");
}
return new DenseGlobalOrds();
});
}
}

View File

@ -33,8 +33,7 @@ public abstract class LongKeyedBucketOrds implements Releasable {
* Build a {@link LongKeyedBucketOrds}.
*/
public static LongKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
// TODO nothing NONE?
return cardinality != CardinalityUpperBound.MANY ? new FromSingle(bigArrays) : new FromMany(bigArrays);
return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
}
private LongKeyedBucketOrds() {}

View File

@ -22,20 +22,47 @@ package org.elasticsearch.search.aggregations;
import org.elasticsearch.test.ESTestCase;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.sameInstance;
public class CardinalityUpperBoundTests extends ESTestCase {
public void testNoneMultiply() {
assertThat(CardinalityUpperBound.NONE.multiply(randomInt()), equalTo(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.NONE.multiply(randomInt()), sameInstance(CardinalityUpperBound.NONE));
}
public void testNoneMap() {
assertThat(CardinalityUpperBound.NONE.map(i -> i), equalTo(0));
}
public void testOneMultiply() {
assertThat(CardinalityUpperBound.ONE.multiply(0), equalTo(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.ONE.multiply(1), equalTo(CardinalityUpperBound.ONE));
assertThat(CardinalityUpperBound.ONE.multiply(between(2, Integer.MAX_VALUE)), equalTo(CardinalityUpperBound.MANY));
assertThat(CardinalityUpperBound.ONE.multiply(0), sameInstance(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.ONE.multiply(1), sameInstance(CardinalityUpperBound.ONE));
assertThat(CardinalityUpperBound.ONE.multiply(Integer.MAX_VALUE), sameInstance(CardinalityUpperBound.MANY));
}
public void testOneMap() {
assertThat(CardinalityUpperBound.ONE.map(i -> i), equalTo(1));
}
public void testLargerKnownValues() {
int estimate = between(2, Short.MAX_VALUE);
CardinalityUpperBound known = CardinalityUpperBound.ONE.multiply(estimate);
assertThat(known.map(i -> i), equalTo(estimate));
assertThat(known.multiply(0), sameInstance(CardinalityUpperBound.NONE));
assertThat(known.multiply(1), sameInstance(known));
int minOverflow = (int) Math.ceil((double) Integer.MAX_VALUE / estimate);
assertThat(known.multiply(between(minOverflow, Integer.MAX_VALUE)), sameInstance(CardinalityUpperBound.MANY));
int multiplier = between(2, Short.MAX_VALUE - 1);
assertThat(known.multiply(multiplier).map(i -> i), equalTo(estimate * multiplier));
}
public void testManyMultiply() {
assertThat(CardinalityUpperBound.MANY.multiply(0), equalTo(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.MANY.multiply(between(1, Integer.MAX_VALUE)), equalTo(CardinalityUpperBound.MANY));
assertThat(CardinalityUpperBound.MANY.multiply(0), sameInstance(CardinalityUpperBound.NONE));
assertThat(CardinalityUpperBound.MANY.multiply(between(1, Integer.MAX_VALUE)), sameInstance(CardinalityUpperBound.MANY));
}
public void testManyMap() {
assertThat(CardinalityUpperBound.MANY.map(i -> i), equalTo(Integer.MAX_VALUE));
}
}

View File

@ -289,9 +289,9 @@ public class RangeAggregatorTests extends AggregatorTestCase {
simpleTestCase(aggregationBuilder, new MatchAllDocsQuery(), range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
InternalAggCardinality pc = ranges.get(0).getAggregations().get("c");
assertThat(pc.cardinality(), equalTo(CardinalityUpperBound.MANY));
assertThat(pc.cardinality().map(i -> i), equalTo(2));
pc = ranges.get(1).getAggregations().get("c");
assertThat(pc.cardinality(), equalTo(CardinalityUpperBound.MANY));
assertThat(pc.cardinality().map(i -> i), equalTo(2));
});
}