Make size=0 return all buckets for the geohash_grid aggregation.
Close #4875
This commit is contained in:
parent
e1c1120949
commit
9cb17408cb
docs/reference/search/aggregations/bucket
src
main/java/org/elasticsearch/search/aggregations
InternalAggregation.java
bucket
test/java/org/elasticsearch/search/aggregations/bucket
|
@ -117,11 +117,15 @@ precision:: Optional. The string length of the geohashes used to define
|
|||
size:: Optional. The maximum number of geohash buckets to return
|
||||
(defaults to 10,000). When results are trimmed, buckets are
|
||||
prioritised based on the volumes of documents they contain.
|
||||
added[1.1.0] A value of `0` will return all buckets that
|
||||
contain a hit, use with caution as this could use a lot of CPU
|
||||
and network bandwith if there are many buckets.
|
||||
|
||||
shard_size:: Optional. To allow for more accurate counting of the top cells
|
||||
returned in the final result the aggregation defaults to
|
||||
returning `max(10,(size x number-of-shards))` buckets from each
|
||||
shard. If this heuristic is undesirable, the number considered
|
||||
from each shard can be over-ridden using this parameter.
|
||||
added[1.1.0] A value of `0` makes the shard size unlimited.
|
||||
|
||||
|
||||
|
|
|
@ -21,10 +21,13 @@ package org.elasticsearch.search.aggregations;
|
|||
import org.elasticsearch.cache.recycler.CacheRecycler;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilderString;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
@ -125,6 +128,23 @@ public abstract class InternalAggregation implements Aggregation, ToXContent, St
|
|||
*/
|
||||
public abstract InternalAggregation reduce(ReduceContext reduceContext);
|
||||
|
||||
/**
|
||||
* Read a size under the assumption that a value of 0 means unlimited.
|
||||
*/
|
||||
protected static int readSize(StreamInput in) throws IOException {
|
||||
final int size = in.readVInt();
|
||||
return size == 0 ? Integer.MAX_VALUE : size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a size under the assumption that a value of 0 means unlimited.
|
||||
*/
|
||||
protected static void writeSize(int size, StreamOutput out) throws IOException {
|
||||
if (size == Integer.MAX_VALUE) {
|
||||
size = 0;
|
||||
}
|
||||
out.writeVInt(size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Common xcontent fields that are shared among addAggregation
|
||||
|
|
|
@ -39,11 +39,11 @@ public class BucketUtils {
|
|||
return finalSize;
|
||||
}
|
||||
//Cap the multiplier used for shards to avoid excessive data transfer
|
||||
final int shardSampleSize = finalSize * Math.min(10, numberOfShards);
|
||||
final long shardSampleSize = (long) finalSize * Math.min(10, numberOfShards);
|
||||
// When finalSize is very small e.g. 1 and there is a low number of
|
||||
// shards then we need to ensure we still gather a reasonable sample of statistics from each
|
||||
// shard (at low cost) to improve the chances of the final result being accurate.
|
||||
return Math.max(10, shardSampleSize);
|
||||
return (int) Math.min(Integer.MAX_VALUE, Math.max(10, shardSampleSize));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
|||
String field = null;
|
||||
int precision = DEFAULT_PRECISION;
|
||||
int requiredSize = DEFAULT_MAX_NUM_CELLS;
|
||||
int shardSize = 0;
|
||||
int shardSize = -1;
|
||||
|
||||
|
||||
XContentParser.Token token;
|
||||
|
@ -78,11 +78,24 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
if (shardSize == 0) {
|
||||
shardSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (requiredSize == 0) {
|
||||
requiredSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (shardSize < 0) {
|
||||
//Use default heuristic to avoid any wrong-ranking caused by distributed counting
|
||||
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, context.numberOfShards());
|
||||
}
|
||||
|
||||
if (shardSize < requiredSize) {
|
||||
shardSize = requiredSize;
|
||||
}
|
||||
|
||||
ValuesSourceConfig<GeoPointValuesSource> config = new ValuesSourceConfig<GeoPointValuesSource>(GeoPointValuesSource.class);
|
||||
if (field == null) {
|
||||
return new GeoGridFactory(aggregationName, config, precision, requiredSize, shardSize);
|
||||
|
|
|
@ -256,7 +256,7 @@ public class InternalGeoHashGrid extends InternalAggregation implements GeoHashG
|
|||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
this.name = in.readString();
|
||||
this.requiredSize = in.readVInt();
|
||||
this.requiredSize = readSize(in);
|
||||
int size = in.readVInt();
|
||||
List<Bucket> buckets = new ArrayList<Bucket>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
@ -269,7 +269,7 @@ public class InternalGeoHashGrid extends InternalAggregation implements GeoHashG
|
|||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(name);
|
||||
out.writeVInt(requiredSize);
|
||||
writeSize(requiredSize, out);
|
||||
out.writeVInt(buckets.size());
|
||||
for (Bucket bucket : buckets) {
|
||||
out.writeLong(bucket.geohashAsLong);
|
||||
|
|
|
@ -21,8 +21,6 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.elasticsearch.cache.recycler.CacheRecycler;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
|
@ -31,7 +29,6 @@ import org.elasticsearch.search.aggregations.InternalAggregation;
|
|||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
@ -184,17 +181,4 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
|||
buckets = newBuckets;
|
||||
}
|
||||
|
||||
// 0 actually means unlimited
|
||||
protected static int readSize(StreamInput in) throws IOException {
|
||||
final int size = in.readVInt();
|
||||
return size == 0 ? Integer.MAX_VALUE : size;
|
||||
}
|
||||
|
||||
protected static void writeSize(int size, StreamOutput out) throws IOException {
|
||||
if (size == Integer.MAX_VALUE) {
|
||||
size = 0;
|
||||
}
|
||||
out.writeVInt(size);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ import java.util.Random;
|
|||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.geohashGrid;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -241,4 +242,25 @@ public class GeoHashGridTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
// making sure this doesn't runs into an OOME
|
||||
public void sizeIsZero() {
|
||||
for (int precision = 1; precision <= highestPrecisionGeohash; precision++) {
|
||||
final int size = randomBoolean() ? 0 : randomIntBetween(1, Integer.MAX_VALUE);
|
||||
final int shardSize = randomBoolean() ? -1 : 0;
|
||||
SearchResponse response = client().prepareSearch("idx")
|
||||
.addAggregation(geohashGrid("geohashgrid")
|
||||
.field("location")
|
||||
.size(size)
|
||||
.shardSize(shardSize)
|
||||
.precision(precision)
|
||||
)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
GeoHashGrid geoGrid = response.getAggregations().get("geohashgrid");
|
||||
assertThat(geoGrid.getBuckets().size(), greaterThanOrEqualTo(1));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue