Aggregations: change to default shard_size in terms aggregation
The default shard size in the terms aggregation now uses BucketUtils.suggestShardSideQueueSize() to set the shard size if the user does not specify it as a parameter. Closes #6857
This commit is contained in:
parent
5487c56c70
commit
dc9e9cb4cc
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator;
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
||||||
|
import org.elasticsearch.search.aggregations.bucket.BucketUtils;
|
||||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
|
import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
@ -32,7 +33,6 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public class TermsParser implements Aggregator.Parser {
|
public class TermsParser implements Aggregator.Parser {
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String type() {
|
public String type() {
|
||||||
return StringTerms.TYPE.name();
|
return StringTerms.TYPE.name();
|
||||||
|
@ -41,19 +41,22 @@ public class TermsParser implements Aggregator.Parser {
|
||||||
@Override
|
@Override
|
||||||
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
|
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
|
||||||
TermsParametersParser aggParser = new TermsParametersParser();
|
TermsParametersParser aggParser = new TermsParametersParser();
|
||||||
ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context)
|
ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true)
|
||||||
.scriptable(true)
|
.requiresSortedValues(true).requiresUniqueValues(true).build();
|
||||||
.formattable(true)
|
|
||||||
.requiresSortedValues(true)
|
|
||||||
.requiresUniqueValues(true)
|
|
||||||
.build();
|
|
||||||
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context);
|
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context);
|
||||||
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
||||||
|
|
||||||
TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();
|
|
||||||
bucketCountThresholds.ensureValidity();
|
|
||||||
InternalOrder order = resolveOrder(aggParser.getOrderKey(), aggParser.isOrderAsc());
|
InternalOrder order = resolveOrder(aggParser.getOrderKey(), aggParser.isOrderAsc());
|
||||||
return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, bucketCountThresholds, aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getCollectionMode());
|
TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();
|
||||||
|
if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC)
|
||||||
|
&& bucketCountThresholds.getShardSize() == aggParser.getDefaultBucketCountThresholds().getShardSize()) {
|
||||||
|
// The user has not made a shardSize selection. Use default heuristic to avoid any wrong-ranking caused by distributed counting
|
||||||
|
bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(),
|
||||||
|
context.numberOfShards()));
|
||||||
|
}
|
||||||
|
bucketCountThresholds.ensureValidity();
|
||||||
|
return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, bucketCountThresholds, aggParser.getIncludeExclude(),
|
||||||
|
aggParser.getExecutionHint(), aggParser.getCollectionMode());
|
||||||
}
|
}
|
||||||
|
|
||||||
static InternalOrder resolveOrder(String key, boolean asc) {
|
static InternalOrder resolveOrder(String key, boolean asc) {
|
||||||
|
|
|
@ -45,6 +45,31 @@ public class ShardSizeTermsTests extends ShardSizeTests {
|
||||||
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("keys");
|
||||||
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
Map<String, Long> expected = ImmutableMap.<String, Long>builder()
|
||||||
|
.put("1", 8l)
|
||||||
|
.put("3", 8l)
|
||||||
|
.put("2", 5l)
|
||||||
|
.build();
|
||||||
|
for (Terms.Bucket bucket : buckets) {
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsText().string())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shardSizeEqualsSize_string() throws Exception {
|
||||||
|
createIdx("type=string,index=not_analyzed");
|
||||||
|
|
||||||
|
indexData();
|
||||||
|
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
|
.setQuery(matchAllQuery())
|
||||||
|
.addAggregation(terms("keys").field("key").size(3).shardSize(3)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
Terms terms = response.getAggregations().get("keys");
|
Terms terms = response.getAggregations().get("keys");
|
||||||
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
assertThat(buckets.size(), equalTo(3));
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
@ -110,6 +135,31 @@ public class ShardSizeTermsTests extends ShardSizeTests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void noShardSizeTermOrder_string() throws Exception {
|
||||||
|
createIdx("type=string,index=not_analyzed");
|
||||||
|
|
||||||
|
indexData();
|
||||||
|
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
|
.setQuery(matchAllQuery())
|
||||||
|
.addAggregation(terms("keys").field("key").size(3)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.term(true)))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("keys");
|
||||||
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
Map<String, Long> expected = ImmutableMap.<String, Long>builder()
|
||||||
|
.put("1", 8l)
|
||||||
|
.put("2", 5l)
|
||||||
|
.put("3", 8l)
|
||||||
|
.build();
|
||||||
|
for (Terms.Bucket bucket : buckets) {
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsText().string())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void noShardSize_long() throws Exception {
|
public void noShardSize_long() throws Exception {
|
||||||
|
|
||||||
|
@ -123,6 +173,32 @@ public class ShardSizeTermsTests extends ShardSizeTests {
|
||||||
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("keys");
|
||||||
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder()
|
||||||
|
.put(1, 8l)
|
||||||
|
.put(3, 8l)
|
||||||
|
.put(2, 5l)
|
||||||
|
.build();
|
||||||
|
for (Terms.Bucket bucket : buckets) {
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shardSizeEqualsSize_long() throws Exception {
|
||||||
|
|
||||||
|
createIdx("type=long");
|
||||||
|
|
||||||
|
indexData();
|
||||||
|
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
|
.setQuery(matchAllQuery())
|
||||||
|
.addAggregation(terms("keys").field("key").size(3).shardSize(3)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
Terms terms = response.getAggregations().get("keys");
|
Terms terms = response.getAggregations().get("keys");
|
||||||
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
assertThat(buckets.size(), equalTo(3));
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
@ -188,6 +264,32 @@ public class ShardSizeTermsTests extends ShardSizeTests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void noShardSizeTermOrder_long() throws Exception {
|
||||||
|
|
||||||
|
createIdx("type=long");
|
||||||
|
|
||||||
|
indexData();
|
||||||
|
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
|
.setQuery(matchAllQuery())
|
||||||
|
.addAggregation(terms("keys").field("key").size(3)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.term(true)))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("keys");
|
||||||
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder()
|
||||||
|
.put(1, 8l)
|
||||||
|
.put(2, 5l)
|
||||||
|
.put(3, 8l)
|
||||||
|
.build();
|
||||||
|
for (Terms.Bucket bucket : buckets) {
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void noShardSize_double() throws Exception {
|
public void noShardSize_double() throws Exception {
|
||||||
|
|
||||||
|
@ -201,6 +303,32 @@ public class ShardSizeTermsTests extends ShardSizeTests {
|
||||||
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("keys");
|
||||||
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder()
|
||||||
|
.put(1, 8l)
|
||||||
|
.put(3, 8l)
|
||||||
|
.put(2, 5l)
|
||||||
|
.build();
|
||||||
|
for (Terms.Bucket bucket : buckets) {
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shardSizeEqualsSize_double() throws Exception {
|
||||||
|
|
||||||
|
createIdx("type=double");
|
||||||
|
|
||||||
|
indexData();
|
||||||
|
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
|
.setQuery(matchAllQuery())
|
||||||
|
.addAggregation(terms("keys").field("key").size(3).shardSize(3)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.count(false)))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
Terms terms = response.getAggregations().get("keys");
|
Terms terms = response.getAggregations().get("keys");
|
||||||
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
assertThat(buckets.size(), equalTo(3));
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
@ -265,4 +393,30 @@ public class ShardSizeTermsTests extends ShardSizeTests {
|
||||||
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
|
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void noShardSizeTermOrder_double() throws Exception {
|
||||||
|
|
||||||
|
createIdx("type=double");
|
||||||
|
|
||||||
|
indexData();
|
||||||
|
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
|
.setQuery(matchAllQuery())
|
||||||
|
.addAggregation(terms("keys").field("key").size(3)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values())).order(Terms.Order.term(true)))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("keys");
|
||||||
|
Collection<Terms.Bucket> buckets = terms.getBuckets();
|
||||||
|
assertThat(buckets.size(), equalTo(3));
|
||||||
|
Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder()
|
||||||
|
.put(1, 8l)
|
||||||
|
.put(2, 5l)
|
||||||
|
.put(3, 8l)
|
||||||
|
.build();
|
||||||
|
for (Terms.Bucket bucket : buckets) {
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(expected.get(bucket.getKeyAsNumber().intValue())));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue