Remove size 0 options in aggregations
This removes the ability to set `size: 0` in the `terms`, `significant_terms` and `geohash_grid` aggregations for the reasons described in https://github.com/elastic/elasticsearch/issues/18838 Closes #18838
This commit is contained in:
parent
f8738c853b
commit
cfd3356ee3
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.geogrid;
|
|||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.geo.GeoHashUtils;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
|
@ -84,9 +85,9 @@ public class GeoGridAggregationBuilder extends ValuesSourceAggregationBuilder<Va
|
|||
}
|
||||
|
||||
public GeoGridAggregationBuilder size(int size) {
|
||||
if (size < -1) {
|
||||
if (size <= 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"[size] must be greater than or equal to 0. Found [" + shardSize + "] in [" + name + "]");
|
||||
"[size] must be greater than 0. Found [" + size + "] in [" + name + "]");
|
||||
}
|
||||
this.requiredSize = size;
|
||||
return this;
|
||||
|
@ -97,9 +98,9 @@ public class GeoGridAggregationBuilder extends ValuesSourceAggregationBuilder<Va
|
|||
}
|
||||
|
||||
public GeoGridAggregationBuilder shardSize(int shardSize) {
|
||||
if (shardSize < -1) {
|
||||
if (shardSize < -1 || shardSize == 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"[shardSize] must be greater than or equal to 0. Found [" + shardSize + "] in [" + name + "]");
|
||||
"[shardSize] must be greater than 0. Found [" + shardSize + "] in [" + name + "]");
|
||||
}
|
||||
this.shardSize = shardSize;
|
||||
return this;
|
||||
|
@ -114,20 +115,20 @@ public class GeoGridAggregationBuilder extends ValuesSourceAggregationBuilder<Va
|
|||
ValuesSourceConfig<ValuesSource.GeoPoint> config, AggregatorFactory<?> parent, Builder subFactoriesBuilder)
|
||||
throws IOException {
|
||||
int shardSize = this.shardSize;
|
||||
if (shardSize == 0) {
|
||||
shardSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
int requiredSize = this.requiredSize;
|
||||
if (requiredSize == 0) {
|
||||
requiredSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (shardSize < 0) {
|
||||
// Use default heuristic to avoid any wrong-ranking caused by distributed counting
|
||||
// Use default heuristic to avoid any wrong-ranking caused by
|
||||
// distributed counting
|
||||
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, context.searchContext().numberOfShards());
|
||||
}
|
||||
|
||||
if (requiredSize <= 0 || shardSize <= 0) {
|
||||
throw new ElasticsearchException(
|
||||
"parameters [required_size] and [shard_size] must be >0 in geohash_grid aggregation [" + name + "].");
|
||||
}
|
||||
|
||||
if (shardSize < requiredSize) {
|
||||
shardSize = requiredSize;
|
||||
}
|
||||
|
|
|
@ -83,13 +83,6 @@ public abstract class TermsAggregator extends BucketsAggregator {
|
|||
|
||||
public void ensureValidity() {
|
||||
|
||||
if (shardSize == 0) {
|
||||
setShardSize(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
if (requiredSize == 0) {
|
||||
setRequiredSize(Integer.MAX_VALUE);
|
||||
}
|
||||
// shard_size cannot be smaller than size as we need to at least fetch <size> entries from every shards in order to return <size>
|
||||
if (shardSize < requiredSize) {
|
||||
setShardSize(requiredSize);
|
||||
|
@ -100,8 +93,12 @@ public abstract class TermsAggregator extends BucketsAggregator {
|
|||
setShardMinDocCount(minDocCount);
|
||||
}
|
||||
|
||||
if (requiredSize < 0 || minDocCount < 0) {
|
||||
throw new ElasticsearchException("parameters [requiredSize] and [minDocCount] must be >=0 in terms aggregation.");
|
||||
if (requiredSize <= 0 || shardSize <= 0) {
|
||||
throw new ElasticsearchException("parameters [required_size] and [shard_size] must be >0 in terms aggregation.");
|
||||
}
|
||||
|
||||
if (minDocCount < 0 || shardMinDocCount < 0) {
|
||||
throw new ElasticsearchException("parameter [min_doc_count] and [shardMinDocCount] must be >=0 in terms aggregation.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -134,10 +134,9 @@ public class ChildrenIT extends ESIntegTestCase {
|
|||
SearchResponse searchResponse = client().prepareSearch("test")
|
||||
.setQuery(matchQuery("randomized", true))
|
||||
.addAggregation(
|
||||
terms("category").field("category").size(0).subAggregation(
|
||||
children("to_comment", "comment")
|
||||
terms("category").field("category").size(10000).subAggregation(children("to_comment", "comment")
|
||||
.subAggregation(
|
||||
terms("commenters").field("commenter").size(0).subAggregation(
|
||||
terms("commenters").field("commenter").size(10000).subAggregation(
|
||||
topHits("top_comments")
|
||||
))
|
||||
)
|
||||
|
@ -176,7 +175,7 @@ children("to_comment", "comment")
|
|||
SearchResponse searchResponse = client().prepareSearch("test")
|
||||
.setQuery(matchQuery("randomized", false))
|
||||
.addAggregation(
|
||||
terms("category").field("category").size(0).subAggregation(
|
||||
terms("category").field("category").size(10000).subAggregation(
|
||||
children("to_comment", "comment").subAggregation(topHits("top_comments").sort("_uid", SortOrder.ASC))
|
||||
)
|
||||
).get();
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket;
|
|||
import com.carrotsearch.hppc.ObjectIntHashMap;
|
||||
import com.carrotsearch.hppc.ObjectIntMap;
|
||||
import com.carrotsearch.hppc.cursors.ObjectIntCursor;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
|
@ -52,8 +53,8 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
|||
import static org.elasticsearch.search.aggregations.AggregationBuilders.geohashGrid;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||
|
||||
@ESIntegTestCase.SuiteScopeTestCase
|
||||
public class GeoHashGridIT extends ESIntegTestCase {
|
||||
|
@ -305,24 +306,24 @@ public class GeoHashGridIT extends ESIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// making sure this doesn't runs into an OOME
|
||||
public void testSizeIsZero() {
|
||||
for (int precision = 1; precision <= PRECISION; precision++) {
|
||||
final int size = randomBoolean() ? 0 : randomIntBetween(1, Integer.MAX_VALUE);
|
||||
final int shardSize = randomBoolean() ? -1 : 0;
|
||||
SearchResponse response = client().prepareSearch("idx")
|
||||
.addAggregation(geohashGrid("geohashgrid")
|
||||
.field("location")
|
||||
.size(size)
|
||||
.shardSize(shardSize)
|
||||
.precision(precision)
|
||||
)
|
||||
.execute().actionGet();
|
||||
final int size = 0;
|
||||
final int shardSize = 10000;
|
||||
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class,
|
||||
() -> client().prepareSearch("idx")
|
||||
.addAggregation(geohashGrid("geohashgrid").field("location").size(size).shardSize(shardSize)).execute()
|
||||
.actionGet());
|
||||
assertThat(exception.getMessage(), containsString("[size] must be greater than 0. Found [0] in [geohashgrid]"));
|
||||
}
|
||||
|
||||
assertSearchResponse(response);
|
||||
GeoHashGrid geoGrid = response.getAggregations().get("geohashgrid");
|
||||
assertThat(geoGrid.getBuckets().size(), greaterThanOrEqualTo(1));
|
||||
}
|
||||
public void testShardSizeIsZero() {
|
||||
final int size = 100;
|
||||
final int shardSize = 0;
|
||||
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class,
|
||||
() -> client().prepareSearch("idx")
|
||||
.addAggregation(geohashGrid("geohashgrid").field("location").size(size).shardSize(shardSize))
|
||||
.execute().actionGet());
|
||||
assertThat(exception.getMessage(), containsString("[shardSize] must be greater than 0. Found [0] in [geohashgrid]"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -33,33 +33,10 @@ public class GeoHashGridTests extends BaseAggregationTestCase<GeoGridAggregation
|
|||
factory.precision(precision);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int size = randomInt(5);
|
||||
switch (size) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
size = randomIntBetween(0, Integer.MAX_VALUE);
|
||||
break;
|
||||
}
|
||||
factory.size(size);
|
||||
|
||||
factory.size(randomIntBetween(1, Integer.MAX_VALUE));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int shardSize = randomInt(5);
|
||||
switch (shardSize) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
shardSize = randomIntBetween(0, Integer.MAX_VALUE);
|
||||
break;
|
||||
}
|
||||
factory.shardSize(shardSize);
|
||||
factory.shardSize(randomIntBetween(1, Integer.MAX_VALUE));
|
||||
}
|
||||
return factory;
|
||||
}
|
||||
|
|
|
@ -353,7 +353,7 @@ public class ReverseNestedIT extends ESIntegTestCase {
|
|||
.subAggregation(
|
||||
terms("field2").field("nested1.nested2.field2").order(Terms.Order.term(true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.size(0)
|
||||
.size(10000)
|
||||
.subAggregation(
|
||||
reverseNested("nested1_to_field1").path("nested1")
|
||||
.subAggregation(
|
||||
|
|
|
@ -73,37 +73,11 @@ public class SignificantTermsTests extends BaseAggregationTestCase<SignificantTe
|
|||
factory.missing("MISSING");
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int size = randomInt(4);
|
||||
switch (size) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
size = randomInt();
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
factory.bucketCountThresholds().setRequiredSize(size);
|
||||
factory.bucketCountThresholds().setRequiredSize(randomIntBetween(1, Integer.MAX_VALUE));
|
||||
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int shardSize = randomInt(4);
|
||||
switch (shardSize) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
shardSize = randomInt();
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
factory.bucketCountThresholds().setShardSize(shardSize);
|
||||
factory.bucketCountThresholds().setShardSize(randomIntBetween(1, Integer.MAX_VALUE));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int minDocCount = randomInt(4);
|
||||
|
|
|
@ -202,8 +202,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(STRING_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -232,8 +231,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(STRING_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -281,8 +279,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(STRING_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.count(true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -313,8 +310,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(STRING_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.term(true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -345,8 +341,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(STRING_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.term(false))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -377,8 +372,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(STRING_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.aggregation("sortAgg", true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.subAggregation(sum("sortAgg").field(LONG_FIELD_NAME)))
|
||||
|
@ -411,8 +405,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(STRING_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.aggregation("sortAgg", false))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.subAggregation(sum("sortAgg").field(LONG_FIELD_NAME)))
|
||||
|
@ -445,8 +438,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(LONG_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -475,8 +467,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(LONG_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -524,8 +515,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(LONG_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.count(true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -556,8 +546,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(LONG_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.term(true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -588,8 +577,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(LONG_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.term(false))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -620,8 +608,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(LONG_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.aggregation("sortAgg", true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.subAggregation(sum("sortAgg").field(LONG_FIELD_NAME)))
|
||||
|
@ -654,8 +641,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(LONG_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.aggregation("sortAgg", false))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.subAggregation(sum("sortAgg").field(DOUBLE_FIELD_NAME)))
|
||||
|
@ -688,8 +674,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(DOUBLE_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -718,8 +703,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(DOUBLE_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
|
@ -767,8 +751,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(DOUBLE_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.count(true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -799,8 +782,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(DOUBLE_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.term(true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -831,8 +813,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(DOUBLE_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.term(false))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
@ -863,8 +844,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(DOUBLE_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.aggregation("sortAgg", true))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.subAggregation(sum("sortAgg").field(LONG_FIELD_NAME)))
|
||||
|
@ -897,8 +877,7 @@ public class TermsDocCountErrorIT extends ESIntegTestCase {
|
|||
.executionHint(randomExecutionHint())
|
||||
.field(DOUBLE_FIELD_NAME)
|
||||
.showTermDocCountError(true)
|
||||
.size(0)
|
||||
.shardSize(0)
|
||||
.size(10000).shardSize(10000)
|
||||
.order(Order.aggregation("sortAgg", false))
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.subAggregation(sum("sortAgg").field(LONG_FIELD_NAME)))
|
||||
|
|
|
@ -69,37 +69,10 @@ public class TermsTests extends BaseAggregationTestCase<TermsAggregationBuilder>
|
|||
factory.missing("MISSING");
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int size = randomInt(4);
|
||||
switch (size) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
size = randomInt();
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
factory.bucketCountThresholds().setRequiredSize(size);
|
||||
|
||||
factory.bucketCountThresholds().setRequiredSize(randomIntBetween(1, Integer.MAX_VALUE));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int shardSize = randomInt(4);
|
||||
switch (shardSize) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
shardSize = randomInt();
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
factory.bucketCountThresholds().setShardSize(shardSize);
|
||||
factory.bucketCountThresholds().setShardSize(randomIntBetween(1, Integer.MAX_VALUE));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int minDocCount = randomInt(4);
|
||||
|
|
|
@ -117,15 +117,9 @@ precision:: Optional. The string length of the geohashes used to define
|
|||
size:: Optional. The maximum number of geohash buckets to return
|
||||
(defaults to 10,000). When results are trimmed, buckets are
|
||||
prioritised based on the volumes of documents they contain.
|
||||
A value of `0` will return all buckets that
|
||||
contain a hit, use with caution as this could use a lot of CPU
|
||||
and network bandwidth if there are many buckets.
|
||||
|
||||
shard_size:: Optional. To allow for more accurate counting of the top cells
|
||||
returned in the final result the aggregation defaults to
|
||||
returning `max(10,(size x number-of-shards))` buckets from each
|
||||
shard. If this heuristic is undesirable, the number considered
|
||||
from each shard can be over-ridden using this parameter.
|
||||
A value of `0` makes the shard size unlimited.
|
||||
|
||||
|
||||
|
|
|
@ -376,8 +376,6 @@ If the number of unique terms is greater than `size`, the returned list can be s
|
|||
(it could be that the term counts are slightly off and it could even be that a term that should have been in the top
|
||||
size buckets was not returned).
|
||||
|
||||
If set to `0`, the `size` will be set to `Integer.MAX_VALUE`.
|
||||
|
||||
To ensure better accuracy a multiple of the final `size` is used as the number of terms to request from each shard
|
||||
using a heuristic based on the number of shards. To take manual control of this setting the `shard_size` parameter
|
||||
can be used to control the volumes of candidate terms produced by each shard.
|
||||
|
@ -389,9 +387,6 @@ a consolidated review by the reducing node before the final selection. Obviously
|
|||
will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter.
|
||||
|
||||
|
||||
If set to `0`, the `shard_size` will be set to `Integer.MAX_VALUE`.
|
||||
|
||||
|
||||
NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, elasticsearch will
|
||||
override it and reset it to be equal to `size`.
|
||||
|
||||
|
@ -514,4 +509,3 @@ in inner aggregations.
|
|||
<1> the possible values are `map`, `global_ordinals` and `global_ordinals_hash`
|
||||
|
||||
Please note that Elasticsearch will ignore this execution hint if it is not applicable.
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ default, the node coordinating the search process will request each shard to pro
|
|||
and once all shards respond, it will reduce the results to the final list that will then be returned to the client.
|
||||
This means that if the number of unique terms is greater than `size`, the returned list is slightly off and not accurate
|
||||
(it could be that the term counts are slightly off and it could even be that a term that should have been in the top
|
||||
size buckets was not returned). If set to `0`, the `size` will be set to `Integer.MAX_VALUE`.
|
||||
size buckets was not returned).
|
||||
|
||||
[[search-aggregations-bucket-terms-aggregation-approximate-counts]]
|
||||
==== Document counts are approximate
|
||||
|
@ -149,14 +149,12 @@ The `shard_size` parameter can be used to minimize the extra work that comes wi
|
|||
it will determine how many terms the coordinating node will request from each shard. Once all the shards responded, the
|
||||
coordinating node will then reduce them to a final result which will be based on the `size` parameter - this way,
|
||||
one can increase the accuracy of the returned terms and avoid the overhead of streaming a big list of buckets back to
|
||||
the client. If set to `0`, the `shard_size` will be set to `Integer.MAX_VALUE`.
|
||||
the client.
|
||||
|
||||
|
||||
NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, elasticsearch will
|
||||
override it and reset it to be equal to `size`.
|
||||
|
||||
It is possible to not limit the number of terms that are returned by setting `size` to `0`. Don't use this
|
||||
on high-cardinality fields as this will kill both your CPU since terms need to be return sorted, and your network.
|
||||
|
||||
The default `shard_size` is a multiple of the `size` parameter which is dependant on the number of shards.
|
||||
|
||||
|
|
|
@ -20,3 +20,9 @@ Now that Elasticsearch supports `ipv6`, `ip` addresses are encoded in the index
|
|||
using a binary representation rather than a numeric representation. As a
|
||||
consequence, the output of `ip_range` aggregations does not give numeric values
|
||||
for `from` and `to` anymore.
|
||||
|
||||
==== `size: 0` on Terms, Significant Terms and Geohash Grid Aggregations
|
||||
|
||||
`size: 0` is no longer valid for the terms, significant terms and geohash grid
|
||||
aggregations. Instead a size should be explicitly specified with a number greater
|
||||
than zero.
|
||||
|
|
|
@ -63,6 +63,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
|
|||
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.core.IsNull.notNullValue;
|
||||
|
@ -235,20 +236,13 @@ public class DoubleTermsTests extends AbstractTermsTestCase {
|
|||
|
||||
// the main purpose of this test is to make sure we're not allocating 2GB of memory per shard
|
||||
public void testSizeIsZero() {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field(SINGLE_VALUED_FIELD_NAME)
|
||||
.minDocCount(randomInt(1))
|
||||
.size(0)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
Terms terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.getBuckets().size(), equalTo(100));
|
||||
ElasticsearchException exception = expectThrows(ElasticsearchException.class,
|
||||
() -> client()
|
||||
.prepareSearch("idx").setTypes("high_card_type").addAggregation(terms("terms").field(SINGLE_VALUED_FIELD_NAME)
|
||||
.minDocCount(randomInt(1)).size(0).collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet());
|
||||
assertThat(exception.getDetailedMessage(),
|
||||
containsString("parameters [required_size] and [shard_size] must be >0 in terms aggregation."));
|
||||
}
|
||||
|
||||
public void testSingleValueField() throws Exception {
|
||||
|
|
|
@ -61,6 +61,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
|
|||
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.core.IsNull.notNullValue;
|
||||
|
@ -237,20 +238,16 @@ public class LongTermsTests extends AbstractTermsTestCase {
|
|||
|
||||
// the main purpose of this test is to make sure we're not allocating 2GB of memory per shard
|
||||
public void testSizeIsZero() {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
ElasticsearchException exception = expectThrows(ElasticsearchException.class,
|
||||
() -> client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field(SINGLE_VALUED_FIELD_NAME)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.minDocCount(randomInt(1))
|
||||
.size(0))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
Terms terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.getBuckets().size(), equalTo(100));
|
||||
.execute().actionGet());
|
||||
assertThat(exception.getDetailedMessage(),
|
||||
containsString("parameters [required_size] and [shard_size] must be >0 in terms aggregation."));
|
||||
}
|
||||
|
||||
public void testSingleValueField() throws Exception {
|
||||
|
|
|
@ -70,6 +70,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
|
|||
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.core.IsNull.notNullValue;
|
||||
|
@ -202,20 +203,15 @@ public class StringTermsTests extends AbstractTermsTestCase {
|
|||
// the main purpose of this test is to make sure we're not allocating 2GB of memory per shard
|
||||
public void testSizeIsZero() {
|
||||
final int minDocCount = randomInt(1);
|
||||
SearchResponse response = client()
|
||||
ElasticsearchException exception = expectThrows(ElasticsearchException.class, () -> client()
|
||||
.prepareSearch("idx")
|
||||
.setTypes("high_card_type")
|
||||
.addAggregation(
|
||||
terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())).minDocCount(minDocCount).size(0)).execute()
|
||||
.actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
Terms terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.getBuckets().size(), equalTo(minDocCount == 0 ? 105 : 100)); // 105 because of the other type
|
||||
.actionGet());
|
||||
assertThat(exception.getDetailedMessage(),
|
||||
containsString("parameters [required_size] and [shard_size] must be >0 in terms aggregation."));
|
||||
}
|
||||
|
||||
public void testSingleValueField() throws Exception {
|
||||
|
|
|
@ -48,7 +48,7 @@ public abstract class AbstractTermsTestCase extends ESIntegTestCase {
|
|||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field(fieldName)
|
||||
.size(0)
|
||||
.size(10000)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.get();
|
||||
assertSearchResponse(allTerms);
|
||||
|
|
Loading…
Reference in New Issue