Small cleanups for terms aggregator (#57315) (#57381)

This includes a few small cleanups for the `TermsAggregatorFactory`:

1. Removes an unused `DeprecationLogger`
2. Moves the members to right above the ctor.
3. Merges some all of the heuristics for picking `SubAggCollectionMode`
   into a single method.
This commit is contained in:
Nik Everett 2020-05-29 16:59:35 -04:00 committed by GitHub
parent ebe433343f
commit d5e86d7c4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 38 deletions

View File

@ -19,10 +19,8 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
@ -52,17 +50,8 @@ import java.util.Map;
import java.util.function.Function;
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(TermsAggregatorFactory.class));
static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS;
private final BucketOrder order;
private final IncludeExclude includeExclude;
private final String executionHint;
private final SubAggCollectionMode collectMode;
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private final boolean showTermDocCountError;
static void registerAggregators(ValuesSourceRegistry.Builder builder) {
builder.register(TermsAggregationBuilder.NAME,
Arrays.asList(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP),
@ -98,7 +87,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
ExecutionMode execution = null;
if (executionHint != null) {
execution = ExecutionMode.fromString(executionHint, deprecationLogger);
execution = ExecutionMode.fromString(executionHint);
}
// In some cases, using ordinals is just not supported: override it
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) {
@ -109,11 +98,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
}
final long maxOrd = execution == ExecutionMode.GLOBAL_ORDINALS ? getMaxOrd(valuesSource, context.searcher()) : -1;
if (subAggCollectMode == null) {
subAggCollectMode = SubAggCollectionMode.DEPTH_FIRST;
// TODO can we remove concept of AggregatorFactories.EMPTY?
if (factories != AggregatorFactories.EMPTY) {
subAggCollectMode = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd);
}
subAggCollectMode = pickSubAggColectMode(factories, bucketCountThresholds.getShardSize(), maxOrd);
}
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
@ -165,12 +150,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
}
if (subAggCollectMode == null) {
// TODO can we remove concept of AggregatorFactories.EMPTY?
if (factories != AggregatorFactories.EMPTY) {
subAggCollectMode = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1);
} else {
subAggCollectMode = SubAggCollectionMode.DEPTH_FIRST;
}
subAggCollectMode = pickSubAggColectMode(factories, bucketCountThresholds.getShardSize(), -1);
}
ValuesSource.Numeric numericValuesSource = (ValuesSource.Numeric) valuesSource;
@ -198,6 +178,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
};
}
private final BucketOrder order;
private final IncludeExclude includeExclude;
private final String executionHint;
private final SubAggCollectionMode collectMode;
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private final boolean showTermDocCountError;
TermsAggregatorFactory(String name,
ValuesSourceConfig config,
BucketOrder order,
@ -280,17 +267,29 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
showTermDocCountError, collectsFromSingleBucket, metadata);
}
// return the SubAggCollectionMode that this aggregation should use based on the expected size
// and the cardinality of the field
static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) {
/**
* Pick a {@link SubAggCollectionMode} based on heuristics about what
* we're collecting.
*/
static SubAggCollectionMode pickSubAggColectMode(AggregatorFactories factories, int expectedSize, long maxOrd) {
if (factories.countAggregators() == 0) {
// Without sub-aggregations we pretty much ignore this field value so just pick something
return SubAggCollectionMode.DEPTH_FIRST;
}
if (expectedSize == Integer.MAX_VALUE) {
// return all buckets
// We expect to return all buckets so delaying them won't save any time
return SubAggCollectionMode.DEPTH_FIRST;
}
if (maxOrd == -1 || maxOrd > expectedSize) {
// use breadth_first if the cardinality is bigger than the expected size or unknown (-1)
/*
* We either don't know how many buckets we expect there to be
* (maxOrd == -1) or we expect there to be more buckets than
* we will collect from this shard. So delaying collection of
* the sub-buckets *should* save time.
*/
return SubAggCollectionMode.BREADTH_FIRST;
}
// We expect to collect so many buckets that we may as well collect them all.
return SubAggCollectionMode.DEPTH_FIRST;
}
@ -398,7 +397,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
}
};
public static ExecutionMode fromString(String value, final DeprecationLogger deprecationLogger) {
public static ExecutionMode fromString(String value) {
switch (value) {
case "global_ordinals":
return GLOBAL_ORDINALS;

View File

@ -20,25 +20,37 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.test.ESTestCase;
import static org.hamcrest.Matchers.equalTo;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class TermsAggregatorFactoryTests extends ESTestCase {
public void testSubAggCollectMode() throws Exception {
assertThat(TermsAggregatorFactory.subAggCollectionMode(Integer.MAX_VALUE, -1),
public void testPickEmpty() throws Exception {
AggregatorFactories empty = mock(AggregatorFactories.class);
when(empty.countAggregators()).thenReturn(0);
assertThat(TermsAggregatorFactory.pickSubAggColectMode(empty, randomInt(), randomInt()),
equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
assertThat(TermsAggregatorFactory.subAggCollectionMode(10, -1),
equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 5),
}
public void testPickNonEempty() {
AggregatorFactories nonEmpty = mock(AggregatorFactories.class);
when(nonEmpty.countAggregators()).thenReturn(1);
assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, Integer.MAX_VALUE, -1),
equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 10),
assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, -1),
equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, 5),
equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 100),
assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, 10),
equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, 100),
equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
assertThat(TermsAggregatorFactory.subAggCollectionMode(1, 2),
assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 1, 2),
equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
assertThat(TermsAggregatorFactory.subAggCollectionMode(1, 100),
assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 1, 100),
equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
}
}