This reworks `CardinalityUpperBound` to support precise estimates while maintaining most of the public API. This will allow us to make more informed choices about the data structures that we use in aggregations. None of those interesting choices come as part of this change, but they are more possible with it.
This commit is contained in:
parent
f39a9bbe19
commit
fb84c1f73e
|
@ -23,22 +23,29 @@ import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
|
|||
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
|
||||
|
||||
import java.util.function.IntFunction;
|
||||
|
||||
/**
|
||||
* Upper bound of how many {@code owningBucketOrds} that an {@link Aggregator}
|
||||
* will have to collect into. Just "none", "one", and "many".
|
||||
* will have to collect into.
|
||||
*/
|
||||
public enum CardinalityUpperBound {
|
||||
public abstract class CardinalityUpperBound {
|
||||
/**
|
||||
* {@link Aggregator}s with this cardinality won't collect any data at
|
||||
* all. For the most part this happens when an aggregation is inside of a
|
||||
* {@link BucketsAggregator} that is pointing to an unmapped field.
|
||||
*/
|
||||
NONE {
|
||||
public static final CardinalityUpperBound NONE = new CardinalityUpperBound() {
|
||||
@Override
|
||||
public CardinalityUpperBound multiply(int bucketCount) {
|
||||
return NONE;
|
||||
}
|
||||
},
|
||||
|
||||
@Override
|
||||
public <R> R map(IntFunction<R> mapper) {
|
||||
return mapper.apply(0);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* {@link Aggregator}s with this cardinality will collect be collected
|
||||
|
@ -47,26 +54,14 @@ public enum CardinalityUpperBound {
|
|||
* aggregations like {@link FilterAggregator} or a {@link RangeAggregator}
|
||||
* configured to collect only a single range.
|
||||
*/
|
||||
ONE {
|
||||
@Override
|
||||
public CardinalityUpperBound multiply(int bucketCount) {
|
||||
switch (bucketCount) {
|
||||
case 0:
|
||||
return NONE;
|
||||
case 1:
|
||||
return ONE;
|
||||
default:
|
||||
return MANY;
|
||||
}
|
||||
}
|
||||
},
|
||||
public static final CardinalityUpperBound ONE = new KnownCardinalityUpperBound(1);
|
||||
|
||||
/**
|
||||
* {@link Aggregator}s with this cardinality may be collected many times.
|
||||
* Most sub-aggregators of {@link BucketsAggregator}s will have
|
||||
* this cardinality.
|
||||
*/
|
||||
MANY {
|
||||
public static final CardinalityUpperBound MANY = new CardinalityUpperBound() {
|
||||
@Override
|
||||
public CardinalityUpperBound multiply(int bucketCount) {
|
||||
if (bucketCount == 0) {
|
||||
|
@ -74,8 +69,17 @@ public enum CardinalityUpperBound {
|
|||
}
|
||||
return MANY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <R> R map(IntFunction<R> mapper) {
|
||||
return mapper.apply(Integer.MAX_VALUE);
|
||||
}
|
||||
};
|
||||
|
||||
private CardinalityUpperBound() {
|
||||
// Sealed class
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the rough measure of the number of buckets a fixed-bucket
|
||||
* {@link Aggregator} will collect.
|
||||
|
@ -84,4 +88,46 @@ public enum CardinalityUpperBound {
|
|||
* will collect per owning ordinal
|
||||
*/
|
||||
public abstract CardinalityUpperBound multiply(int bucketCount);
|
||||
|
||||
/**
|
||||
* Map the cardinality to a value. The argument to the {@code mapper}
|
||||
* is the estimated cardinality, or {@code Integer.MAX_VALUE} if the
|
||||
* cardinality is unknown.
|
||||
*/
|
||||
public abstract <R> R map(IntFunction<R> mapper);
|
||||
|
||||
/**
|
||||
* Cardinality estimate with a known upper bound.
|
||||
*/
|
||||
private static class KnownCardinalityUpperBound extends CardinalityUpperBound {
|
||||
private final int estimate;
|
||||
|
||||
KnownCardinalityUpperBound(int estimate) {
|
||||
this.estimate = estimate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CardinalityUpperBound multiply(int bucketCount) {
|
||||
if (bucketCount < 0) {
|
||||
throw new IllegalArgumentException("bucketCount must be positive but was [" + bucketCount + "]");
|
||||
}
|
||||
switch (bucketCount) {
|
||||
case 0:
|
||||
return NONE;
|
||||
case 1:
|
||||
return this;
|
||||
default:
|
||||
long newEstimate = (long) estimate * (long) bucketCount;
|
||||
if (newEstimate >= Integer.MAX_VALUE) {
|
||||
return MANY;
|
||||
}
|
||||
return new KnownCardinalityUpperBound((int) newEstimate);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public <R> R map(IntFunction<R> mapper) {
|
||||
return mapper.apply(estimate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ public class NestedAggregator extends BucketsAggregator implements SingleBucketA
|
|||
: Queries.newNonNestedFilter(context.mapperService().getIndexSettings().getIndexVersionCreated());
|
||||
this.parentFilter = context.bitsetFilterCache().getBitSetProducer(parentFilter);
|
||||
this.childFilter = childObjectMapper.nestedTypeFilter();
|
||||
this.collectsFromSingleBucket = cardinality != CardinalityUpperBound.MANY;
|
||||
this.collectsFromSingleBucket = cardinality.map(estimate -> estimate < 2);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -34,7 +34,7 @@ public abstract class BytesKeyedBucketOrds implements Releasable {
|
|||
* Build a {@link LongKeyedBucketOrds}.
|
||||
*/
|
||||
public static BytesKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
|
||||
return cardinality == CardinalityUpperBound.ONE ? new FromSingle(bigArrays) : new FromMany(bigArrays);
|
||||
return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
|
||||
}
|
||||
|
||||
private BytesKeyedBucketOrds() {}
|
||||
|
|
|
@ -106,10 +106,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
if (remapGlobalOrds) {
|
||||
this.collectionStrategy = new RemapGlobalOrds(cardinality);
|
||||
} else {
|
||||
if (cardinality == CardinalityUpperBound.MANY) {
|
||||
throw new AggregationExecutionException("Dense ords don't know how to collect from many buckets");
|
||||
}
|
||||
this.collectionStrategy = new DenseGlobalOrds();
|
||||
this.collectionStrategy = cardinality.map(estimate -> {
|
||||
if (estimate > 1) {
|
||||
throw new AggregationExecutionException("Dense ords don't know how to collect from many buckets");
|
||||
}
|
||||
return new DenseGlobalOrds();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,8 +33,7 @@ public abstract class LongKeyedBucketOrds implements Releasable {
|
|||
* Build a {@link LongKeyedBucketOrds}.
|
||||
*/
|
||||
public static LongKeyedBucketOrds build(BigArrays bigArrays, CardinalityUpperBound cardinality) {
|
||||
// TODO nothing NONE?
|
||||
return cardinality != CardinalityUpperBound.MANY ? new FromSingle(bigArrays) : new FromMany(bigArrays);
|
||||
return cardinality.map(estimate -> estimate < 2 ? new FromSingle(bigArrays) : new FromMany(bigArrays));
|
||||
}
|
||||
|
||||
private LongKeyedBucketOrds() {}
|
||||
|
|
|
@ -22,20 +22,47 @@ package org.elasticsearch.search.aggregations;
|
|||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.sameInstance;
|
||||
|
||||
public class CardinalityUpperBoundTests extends ESTestCase {
|
||||
public void testNoneMultiply() {
|
||||
assertThat(CardinalityUpperBound.NONE.multiply(randomInt()), equalTo(CardinalityUpperBound.NONE));
|
||||
assertThat(CardinalityUpperBound.NONE.multiply(randomInt()), sameInstance(CardinalityUpperBound.NONE));
|
||||
}
|
||||
|
||||
public void testNoneMap() {
|
||||
assertThat(CardinalityUpperBound.NONE.map(i -> i), equalTo(0));
|
||||
}
|
||||
|
||||
public void testOneMultiply() {
|
||||
assertThat(CardinalityUpperBound.ONE.multiply(0), equalTo(CardinalityUpperBound.NONE));
|
||||
assertThat(CardinalityUpperBound.ONE.multiply(1), equalTo(CardinalityUpperBound.ONE));
|
||||
assertThat(CardinalityUpperBound.ONE.multiply(between(2, Integer.MAX_VALUE)), equalTo(CardinalityUpperBound.MANY));
|
||||
assertThat(CardinalityUpperBound.ONE.multiply(0), sameInstance(CardinalityUpperBound.NONE));
|
||||
assertThat(CardinalityUpperBound.ONE.multiply(1), sameInstance(CardinalityUpperBound.ONE));
|
||||
assertThat(CardinalityUpperBound.ONE.multiply(Integer.MAX_VALUE), sameInstance(CardinalityUpperBound.MANY));
|
||||
}
|
||||
|
||||
public void testOneMap() {
|
||||
assertThat(CardinalityUpperBound.ONE.map(i -> i), equalTo(1));
|
||||
}
|
||||
|
||||
public void testLargerKnownValues() {
|
||||
int estimate = between(2, Short.MAX_VALUE);
|
||||
CardinalityUpperBound known = CardinalityUpperBound.ONE.multiply(estimate);
|
||||
assertThat(known.map(i -> i), equalTo(estimate));
|
||||
|
||||
assertThat(known.multiply(0), sameInstance(CardinalityUpperBound.NONE));
|
||||
assertThat(known.multiply(1), sameInstance(known));
|
||||
int minOverflow = (int) Math.ceil((double) Integer.MAX_VALUE / estimate);
|
||||
assertThat(known.multiply(between(minOverflow, Integer.MAX_VALUE)), sameInstance(CardinalityUpperBound.MANY));
|
||||
|
||||
int multiplier = between(2, Short.MAX_VALUE - 1);
|
||||
assertThat(known.multiply(multiplier).map(i -> i), equalTo(estimate * multiplier));
|
||||
}
|
||||
|
||||
public void testManyMultiply() {
|
||||
assertThat(CardinalityUpperBound.MANY.multiply(0), equalTo(CardinalityUpperBound.NONE));
|
||||
assertThat(CardinalityUpperBound.MANY.multiply(between(1, Integer.MAX_VALUE)), equalTo(CardinalityUpperBound.MANY));
|
||||
assertThat(CardinalityUpperBound.MANY.multiply(0), sameInstance(CardinalityUpperBound.NONE));
|
||||
assertThat(CardinalityUpperBound.MANY.multiply(between(1, Integer.MAX_VALUE)), sameInstance(CardinalityUpperBound.MANY));
|
||||
}
|
||||
|
||||
public void testManyMap() {
|
||||
assertThat(CardinalityUpperBound.MANY.map(i -> i), equalTo(Integer.MAX_VALUE));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -289,9 +289,9 @@ public class RangeAggregatorTests extends AggregatorTestCase {
|
|||
simpleTestCase(aggregationBuilder, new MatchAllDocsQuery(), range -> {
|
||||
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
|
||||
InternalAggCardinality pc = ranges.get(0).getAggregations().get("c");
|
||||
assertThat(pc.cardinality(), equalTo(CardinalityUpperBound.MANY));
|
||||
assertThat(pc.cardinality().map(i -> i), equalTo(2));
|
||||
pc = ranges.get(1).getAggregations().get("c");
|
||||
assertThat(pc.cardinality(), equalTo(CardinalityUpperBound.MANY));
|
||||
assertThat(pc.cardinality().map(i -> i), equalTo(2));
|
||||
});
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue