Aggregations Refactor: Refactor Geohash Grid Aggregation
This commit is contained in:
parent
ac772c70e9
commit
2772e88447
|
@ -21,108 +21,118 @@ package org.elasticsearch.search.aggregations.bucket.geogrid;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.util.GeoHashUtils;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||
import org.elasticsearch.index.fielddata.MultiGeoPointValues;
|
||||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.index.fielddata.SortingNumericDocValues;
|
||||
import org.elasticsearch.index.query.GeoBoundingBoxQueryBuilder;
|
||||
import org.elasticsearch.search.SearchParseException;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.NonCollectingAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketUtils;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.AbstractValuesSourceParser.GeoPointValuesSourceParser;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValueType;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Aggregates Geo information into cells determined by geohashes of a given precision.
|
||||
* WARNING - for high-precision geohashes it may prove necessary to use a {@link GeoBoundingBoxQueryBuilder}
|
||||
* aggregation to focus in on a smaller area to avoid generating too many buckets and using too much RAM
|
||||
*/
|
||||
public class GeoHashGridParser implements Aggregator.Parser {
|
||||
public class GeoHashGridParser extends GeoPointValuesSourceParser {
|
||||
|
||||
public static final int DEFAULT_PRECISION = 5;
|
||||
public static final int DEFAULT_MAX_NUM_CELLS = 10000;
|
||||
|
||||
public GeoHashGridParser() {
|
||||
super(false, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String type() {
|
||||
return InternalGeoHashGrid.TYPE.name();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
|
||||
|
||||
ValuesSourceParser<ValuesSource.GeoPoint> vsParser = ValuesSourceParser
|
||||
.geoPoint(aggregationName, InternalGeoHashGrid.TYPE, context).build();
|
||||
|
||||
int precision = GeoHashGridParams.DEFAULT_PRECISION;
|
||||
int requiredSize = GeoHashGridParams.DEFAULT_MAX_NUM_CELLS;
|
||||
int shardSize = -1;
|
||||
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (vsParser.token(currentFieldName, token, parser)) {
|
||||
continue;
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER ||
|
||||
token == XContentParser.Token.VALUE_STRING) { //Be lenient and also allow numbers enclosed in quotes
|
||||
if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) {
|
||||
precision = GeoHashGridParams.checkPrecision(parser.intValue());
|
||||
} else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) {
|
||||
requiredSize = parser.intValue();
|
||||
} else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) {
|
||||
shardSize = parser.intValue();
|
||||
}
|
||||
} else if (token != XContentParser.Token.START_OBJECT) {
|
||||
throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "].",
|
||||
parser.getTokenLocation());
|
||||
}
|
||||
}
|
||||
|
||||
if (shardSize == 0) {
|
||||
shardSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (requiredSize == 0) {
|
||||
requiredSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (shardSize < 0) {
|
||||
//Use default heuristic to avoid any wrong-ranking caused by distributed counting
|
||||
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, context.numberOfShards());
|
||||
}
|
||||
|
||||
if (shardSize < requiredSize) {
|
||||
shardSize = requiredSize;
|
||||
}
|
||||
|
||||
return new GeoGridFactory(aggregationName, vsParser.input(), precision, requiredSize, shardSize);
|
||||
|
||||
public AggregatorFactory getFactoryPrototype() {
|
||||
return new GeoGridFactory(null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ValuesSourceAggregatorFactory<org.elasticsearch.search.aggregations.support.ValuesSource.GeoPoint> createFactory(
|
||||
String aggregationName, ValuesSourceType valuesSourceType,
|
||||
ValueType targetValueType, Map<ParseField, Object> otherOptions) {
|
||||
GeoGridFactory factory = new GeoGridFactory(aggregationName);
|
||||
Integer precision = (Integer) otherOptions.get(GeoHashGridParams.FIELD_PRECISION);
|
||||
if (precision != null) {
|
||||
factory.precision(precision);
|
||||
}
|
||||
Integer size = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SIZE);
|
||||
if (size != null) {
|
||||
factory.size(size);
|
||||
}
|
||||
Integer shardSize = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SHARD_SIZE);
|
||||
if (shardSize != null) {
|
||||
factory.shardSize(shardSize);
|
||||
}
|
||||
return factory;
|
||||
}
|
||||
|
||||
static class GeoGridFactory extends ValuesSourceAggregatorFactory<ValuesSource.GeoPoint> {
|
||||
@Override
|
||||
protected boolean token(String aggregationName, String currentFieldName, Token token, XContentParser parser,
|
||||
ParseFieldMatcher parseFieldMatcher, Map<ParseField, Object> otherOptions) throws IOException {
|
||||
if (token == XContentParser.Token.VALUE_NUMBER || token == XContentParser.Token.VALUE_STRING) {
|
||||
if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) {
|
||||
otherOptions.put(GeoHashGridParams.FIELD_PRECISION, parser.intValue());
|
||||
return true;
|
||||
} else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) {
|
||||
otherOptions.put(GeoHashGridParams.FIELD_SIZE, parser.intValue());
|
||||
return true;
|
||||
} else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) {
|
||||
otherOptions.put(GeoHashGridParams.FIELD_SHARD_SIZE, parser.intValue());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private final int precision;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
public static class GeoGridFactory extends ValuesSourceAggregatorFactory<ValuesSource.GeoPoint> {
|
||||
|
||||
public GeoGridFactory(String name, ValuesSourceParser.Input<ValuesSource.GeoPoint> input, int precision, int requiredSize,
|
||||
int shardSize) {
|
||||
super(name, InternalGeoHashGrid.TYPE.name(), input);
|
||||
this.precision = precision;
|
||||
this.requiredSize = requiredSize;
|
||||
private int precision = DEFAULT_PRECISION;
|
||||
private int requiredSize = DEFAULT_MAX_NUM_CELLS;
|
||||
private int shardSize = -1;
|
||||
|
||||
public GeoGridFactory(String name) {
|
||||
super(name, InternalGeoHashGrid.TYPE.name(), ValuesSourceType.GEOPOINT, ValueType.GEOPOINT);
|
||||
}
|
||||
|
||||
public void precision(int precision) {
|
||||
this.precision = GeoHashGridParams.checkPrecision(precision);
|
||||
}
|
||||
|
||||
public void size(int size) {
|
||||
this.requiredSize = size;
|
||||
}
|
||||
|
||||
public void shardSize(int shardSize) {
|
||||
this.shardSize = shardSize;
|
||||
}
|
||||
|
||||
|
@ -143,6 +153,23 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
|||
protected Aggregator doCreateInternal(final ValuesSource.GeoPoint valuesSource, AggregationContext aggregationContext,
|
||||
Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||
throws IOException {
|
||||
if (shardSize == 0) {
|
||||
shardSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (requiredSize == 0) {
|
||||
requiredSize = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (shardSize < 0) {
|
||||
// Use default heuristic to avoid any wrong-ranking caused by
|
||||
// distributed counting
|
||||
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, aggregationContext.searchContext().numberOfShards());
|
||||
}
|
||||
|
||||
if (shardSize < requiredSize) {
|
||||
shardSize = requiredSize;
|
||||
}
|
||||
if (collectsFromSingleBucket == false) {
|
||||
return asMultiBucketAggregator(this, aggregationContext, parent);
|
||||
}
|
||||
|
@ -152,6 +179,52 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
|||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ValuesSourceAggregatorFactory<org.elasticsearch.search.aggregations.support.ValuesSource.GeoPoint> innerReadFrom(
|
||||
String name, ValuesSourceType valuesSourceType,
|
||||
ValueType targetValueType, StreamInput in) throws IOException {
|
||||
GeoGridFactory factory = new GeoGridFactory(name);
|
||||
factory.precision = in.readVInt();
|
||||
factory.requiredSize = in.readVInt();
|
||||
factory.shardSize = in.readVInt();
|
||||
return factory;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void innerWriteTo(StreamOutput out) throws IOException {
|
||||
out.writeVInt(precision);
|
||||
out.writeVInt(requiredSize);
|
||||
out.writeVInt(shardSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(GeoHashGridParams.FIELD_PRECISION.getPreferredName(), precision);
|
||||
builder.field(GeoHashGridParams.FIELD_SIZE.getPreferredName(), requiredSize);
|
||||
builder.field(GeoHashGridParams.FIELD_SHARD_SIZE.getPreferredName(), shardSize);
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean innerEquals(Object obj) {
|
||||
GeoGridFactory other = (GeoGridFactory) obj;
|
||||
if (precision != other.precision) {
|
||||
return false;
|
||||
}
|
||||
if (requiredSize != other.requiredSize) {
|
||||
return false;
|
||||
}
|
||||
if (shardSize != other.shardSize) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int innerHashCode() {
|
||||
return Objects.hash(precision, requiredSize, shardSize);
|
||||
}
|
||||
|
||||
private static class CellValues extends SortingNumericDocValues {
|
||||
private MultiGeoPointValues geoValues;
|
||||
private int precision;
|
||||
|
@ -209,10 +282,4 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
|||
|
||||
}
|
||||
}
|
||||
// NORELEASE implement this method when refactoring this aggregation
|
||||
@Override
|
||||
public AggregatorFactory getFactoryPrototype() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket;
|
||||
|
||||
import org.elasticsearch.search.aggregations.BaseAggregationTestCase;
|
||||
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridParser.GeoGridFactory;
|
||||
|
||||
public class GeoHashGridTests extends BaseAggregationTestCase<GeoGridFactory> {
|
||||
|
||||
@Override
|
||||
protected GeoGridFactory createTestAggregatorFactory() {
|
||||
String name = randomAsciiOfLengthBetween(3, 20);
|
||||
GeoGridFactory factory = new GeoGridFactory(name);
|
||||
if (randomBoolean()) {
|
||||
int precision = randomIntBetween(1, 12);
|
||||
factory.precision(precision);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int size = randomInt(5);
|
||||
switch (size) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
size = randomInt();
|
||||
break;
|
||||
}
|
||||
factory.size(size);
|
||||
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
int shardSize = randomInt(5);
|
||||
switch (shardSize) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
shardSize = randomInt();
|
||||
break;
|
||||
}
|
||||
factory.shardSize(shardSize);
|
||||
}
|
||||
return factory;
|
||||
}
|
||||
|
||||
}
|
|
@ -31,6 +31,8 @@ public class GeoHashGridParserTests extends ESTestCase {
|
|||
int precision = randomIntBetween(1, 12);
|
||||
XContentParser stParser = JsonXContent.jsonXContent.createParser(
|
||||
"{\"field\":\"my_loc\", \"precision\":" + precision + ", \"size\": 500, \"shard_size\": 550}");
|
||||
XContentParser.Token token = stParser.nextToken();
|
||||
assertSame(XContentParser.Token.START_OBJECT, token);
|
||||
GeoHashGridParser parser = new GeoHashGridParser();
|
||||
// can create a factory
|
||||
assertNotNull(parser.parse("geohash_grid", stParser, searchContext));
|
||||
|
@ -41,6 +43,8 @@ public class GeoHashGridParserTests extends ESTestCase {
|
|||
int precision = randomIntBetween(1, 12);
|
||||
XContentParser stParser = JsonXContent.jsonXContent.createParser(
|
||||
"{\"field\":\"my_loc\", \"precision\":\"" + precision + "\", \"size\": \"500\", \"shard_size\": \"550\"}");
|
||||
XContentParser.Token token = stParser.nextToken();
|
||||
assertSame(XContentParser.Token.START_OBJECT, token);
|
||||
GeoHashGridParser parser = new GeoHashGridParser();
|
||||
// can create a factory
|
||||
assertNotNull(parser.parse("geohash_grid", stParser, searchContext));
|
||||
|
@ -49,6 +53,8 @@ public class GeoHashGridParserTests extends ESTestCase {
|
|||
public void testParseErrorOnNonIntPrecision() throws Exception {
|
||||
SearchContext searchContext = new TestSearchContext();
|
||||
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"2.0\"}");
|
||||
XContentParser.Token token = stParser.nextToken();
|
||||
assertSame(XContentParser.Token.START_OBJECT, token);
|
||||
GeoHashGridParser parser = new GeoHashGridParser();
|
||||
try {
|
||||
parser.parse("geohash_grid", stParser, searchContext);
|
||||
|
@ -61,18 +67,22 @@ public class GeoHashGridParserTests extends ESTestCase {
|
|||
public void testParseErrorOnBooleanPrecision() throws Exception {
|
||||
SearchContext searchContext = new TestSearchContext();
|
||||
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":false}");
|
||||
XContentParser.Token token = stParser.nextToken();
|
||||
assertSame(XContentParser.Token.START_OBJECT, token);
|
||||
GeoHashGridParser parser = new GeoHashGridParser();
|
||||
try {
|
||||
parser.parse("geohash_grid", stParser, searchContext);
|
||||
fail();
|
||||
} catch (SearchParseException ex) {
|
||||
assertEquals("Unexpected token VALUE_BOOLEAN in [geohash_grid].", ex.getMessage());
|
||||
assertEquals("Unexpected token VALUE_BOOLEAN [precision] in [geohash_grid].", ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testParseErrorOnPrecisionOutOfRange() throws Exception {
|
||||
SearchContext searchContext = new TestSearchContext();
|
||||
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"13\"}");
|
||||
XContentParser.Token token = stParser.nextToken();
|
||||
assertSame(XContentParser.Token.START_OBJECT, token);
|
||||
GeoHashGridParser parser = new GeoHashGridParser();
|
||||
try {
|
||||
parser.parse("geohash_grid", stParser, searchContext);
|
||||
|
|
Loading…
Reference in New Issue