Aggregations Refactor: Refactor Geohash Grid Aggregation

This commit is contained in:
Colin Goodheart-Smithe 2015-08-25 13:32:19 +02:00
parent ac772c70e9
commit 2772e88447
3 changed files with 215 additions and 71 deletions

View File

@ -21,108 +21,118 @@ package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.GeoHashUtils;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.fielddata.MultiGeoPointValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.index.fielddata.SortingNumericDocValues;
import org.elasticsearch.index.query.GeoBoundingBoxQueryBuilder;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.NonCollectingAggregator;
import org.elasticsearch.search.aggregations.bucket.BucketUtils;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.search.aggregations.support.AbstractValuesSourceParser.GeoPointValuesSourceParser;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
* Aggregates Geo information into cells determined by geohashes of a given precision.
* WARNING - for high-precision geohashes it may prove necessary to use a {@link GeoBoundingBoxQueryBuilder}
* aggregation to focus in on a smaller area to avoid generating too many buckets and using too much RAM
*/
public class GeoHashGridParser implements Aggregator.Parser {
public class GeoHashGridParser extends GeoPointValuesSourceParser {
public static final int DEFAULT_PRECISION = 5;
public static final int DEFAULT_MAX_NUM_CELLS = 10000;
public GeoHashGridParser() {
super(false, false);
}
@Override
public String type() {
return InternalGeoHashGrid.TYPE.name();
}
@Override
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
ValuesSourceParser<ValuesSource.GeoPoint> vsParser = ValuesSourceParser
.geoPoint(aggregationName, InternalGeoHashGrid.TYPE, context).build();
int precision = GeoHashGridParams.DEFAULT_PRECISION;
int requiredSize = GeoHashGridParams.DEFAULT_MAX_NUM_CELLS;
int shardSize = -1;
XContentParser.Token token;
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (vsParser.token(currentFieldName, token, parser)) {
continue;
} else if (token == XContentParser.Token.VALUE_NUMBER ||
token == XContentParser.Token.VALUE_STRING) { //Be lenient and also allow numbers enclosed in quotes
if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) {
precision = GeoHashGridParams.checkPrecision(parser.intValue());
} else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) {
requiredSize = parser.intValue();
} else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) {
shardSize = parser.intValue();
}
} else if (token != XContentParser.Token.START_OBJECT) {
throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "].",
parser.getTokenLocation());
}
}
if (shardSize == 0) {
shardSize = Integer.MAX_VALUE;
}
if (requiredSize == 0) {
requiredSize = Integer.MAX_VALUE;
}
if (shardSize < 0) {
//Use default heuristic to avoid any wrong-ranking caused by distributed counting
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, context.numberOfShards());
}
if (shardSize < requiredSize) {
shardSize = requiredSize;
}
return new GeoGridFactory(aggregationName, vsParser.input(), precision, requiredSize, shardSize);
public AggregatorFactory getFactoryPrototype() {
return new GeoGridFactory(null);
}
@Override
protected ValuesSourceAggregatorFactory<org.elasticsearch.search.aggregations.support.ValuesSource.GeoPoint> createFactory(
String aggregationName, ValuesSourceType valuesSourceType,
ValueType targetValueType, Map<ParseField, Object> otherOptions) {
GeoGridFactory factory = new GeoGridFactory(aggregationName);
Integer precision = (Integer) otherOptions.get(GeoHashGridParams.FIELD_PRECISION);
if (precision != null) {
factory.precision(precision);
}
Integer size = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SIZE);
if (size != null) {
factory.size(size);
}
Integer shardSize = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SHARD_SIZE);
if (shardSize != null) {
factory.shardSize(shardSize);
}
return factory;
}
static class GeoGridFactory extends ValuesSourceAggregatorFactory<ValuesSource.GeoPoint> {
@Override
protected boolean token(String aggregationName, String currentFieldName, Token token, XContentParser parser,
ParseFieldMatcher parseFieldMatcher, Map<ParseField, Object> otherOptions) throws IOException {
if (token == XContentParser.Token.VALUE_NUMBER || token == XContentParser.Token.VALUE_STRING) {
if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) {
otherOptions.put(GeoHashGridParams.FIELD_PRECISION, parser.intValue());
return true;
} else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) {
otherOptions.put(GeoHashGridParams.FIELD_SIZE, parser.intValue());
return true;
} else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) {
otherOptions.put(GeoHashGridParams.FIELD_SHARD_SIZE, parser.intValue());
return true;
}
}
return false;
}
private final int precision;
private final int requiredSize;
private final int shardSize;
public static class GeoGridFactory extends ValuesSourceAggregatorFactory<ValuesSource.GeoPoint> {
public GeoGridFactory(String name, ValuesSourceParser.Input<ValuesSource.GeoPoint> input, int precision, int requiredSize,
int shardSize) {
super(name, InternalGeoHashGrid.TYPE.name(), input);
this.precision = precision;
this.requiredSize = requiredSize;
private int precision = DEFAULT_PRECISION;
private int requiredSize = DEFAULT_MAX_NUM_CELLS;
private int shardSize = -1;
public GeoGridFactory(String name) {
super(name, InternalGeoHashGrid.TYPE.name(), ValuesSourceType.GEOPOINT, ValueType.GEOPOINT);
}
public void precision(int precision) {
this.precision = GeoHashGridParams.checkPrecision(precision);
}
public void size(int size) {
this.requiredSize = size;
}
public void shardSize(int shardSize) {
this.shardSize = shardSize;
}
@ -143,6 +153,23 @@ public class GeoHashGridParser implements Aggregator.Parser {
protected Aggregator doCreateInternal(final ValuesSource.GeoPoint valuesSource, AggregationContext aggregationContext,
Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
if (shardSize == 0) {
shardSize = Integer.MAX_VALUE;
}
if (requiredSize == 0) {
requiredSize = Integer.MAX_VALUE;
}
if (shardSize < 0) {
// Use default heuristic to avoid any wrong-ranking caused by
// distributed counting
shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, aggregationContext.searchContext().numberOfShards());
}
if (shardSize < requiredSize) {
shardSize = requiredSize;
}
if (collectsFromSingleBucket == false) {
return asMultiBucketAggregator(this, aggregationContext, parent);
}
@ -152,6 +179,52 @@ public class GeoHashGridParser implements Aggregator.Parser {
}
@Override
protected ValuesSourceAggregatorFactory<org.elasticsearch.search.aggregations.support.ValuesSource.GeoPoint> innerReadFrom(
String name, ValuesSourceType valuesSourceType,
ValueType targetValueType, StreamInput in) throws IOException {
GeoGridFactory factory = new GeoGridFactory(name);
factory.precision = in.readVInt();
factory.requiredSize = in.readVInt();
factory.shardSize = in.readVInt();
return factory;
}
@Override
protected void innerWriteTo(StreamOutput out) throws IOException {
out.writeVInt(precision);
out.writeVInt(requiredSize);
out.writeVInt(shardSize);
}
@Override
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.field(GeoHashGridParams.FIELD_PRECISION.getPreferredName(), precision);
builder.field(GeoHashGridParams.FIELD_SIZE.getPreferredName(), requiredSize);
builder.field(GeoHashGridParams.FIELD_SHARD_SIZE.getPreferredName(), shardSize);
return builder;
}
@Override
protected boolean innerEquals(Object obj) {
GeoGridFactory other = (GeoGridFactory) obj;
if (precision != other.precision) {
return false;
}
if (requiredSize != other.requiredSize) {
return false;
}
if (shardSize != other.shardSize) {
return false;
}
return true;
}
@Override
protected int innerHashCode() {
return Objects.hash(precision, requiredSize, shardSize);
}
private static class CellValues extends SortingNumericDocValues {
private MultiGeoPointValues geoValues;
private int precision;
@ -209,10 +282,4 @@ public class GeoHashGridParser implements Aggregator.Parser {
}
}
// NORELEASE implement this method when refactoring this aggregation
@Override
public AggregatorFactory getFactoryPrototype() {
return null;
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket;
import org.elasticsearch.search.aggregations.BaseAggregationTestCase;
import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridParser.GeoGridFactory;
public class GeoHashGridTests extends BaseAggregationTestCase<GeoGridFactory> {
@Override
protected GeoGridFactory createTestAggregatorFactory() {
String name = randomAsciiOfLengthBetween(3, 20);
GeoGridFactory factory = new GeoGridFactory(name);
if (randomBoolean()) {
int precision = randomIntBetween(1, 12);
factory.precision(precision);
}
if (randomBoolean()) {
int size = randomInt(5);
switch (size) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
size = randomInt();
break;
}
factory.size(size);
}
if (randomBoolean()) {
int shardSize = randomInt(5);
switch (shardSize) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
shardSize = randomInt();
break;
}
factory.shardSize(shardSize);
}
return factory;
}
}

View File

@ -31,6 +31,8 @@ public class GeoHashGridParserTests extends ESTestCase {
int precision = randomIntBetween(1, 12);
XContentParser stParser = JsonXContent.jsonXContent.createParser(
"{\"field\":\"my_loc\", \"precision\":" + precision + ", \"size\": 500, \"shard_size\": 550}");
XContentParser.Token token = stParser.nextToken();
assertSame(XContentParser.Token.START_OBJECT, token);
GeoHashGridParser parser = new GeoHashGridParser();
// can create a factory
assertNotNull(parser.parse("geohash_grid", stParser, searchContext));
@ -41,6 +43,8 @@ public class GeoHashGridParserTests extends ESTestCase {
int precision = randomIntBetween(1, 12);
XContentParser stParser = JsonXContent.jsonXContent.createParser(
"{\"field\":\"my_loc\", \"precision\":\"" + precision + "\", \"size\": \"500\", \"shard_size\": \"550\"}");
XContentParser.Token token = stParser.nextToken();
assertSame(XContentParser.Token.START_OBJECT, token);
GeoHashGridParser parser = new GeoHashGridParser();
// can create a factory
assertNotNull(parser.parse("geohash_grid", stParser, searchContext));
@ -49,6 +53,8 @@ public class GeoHashGridParserTests extends ESTestCase {
public void testParseErrorOnNonIntPrecision() throws Exception {
SearchContext searchContext = new TestSearchContext();
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"2.0\"}");
XContentParser.Token token = stParser.nextToken();
assertSame(XContentParser.Token.START_OBJECT, token);
GeoHashGridParser parser = new GeoHashGridParser();
try {
parser.parse("geohash_grid", stParser, searchContext);
@ -61,18 +67,22 @@ public class GeoHashGridParserTests extends ESTestCase {
public void testParseErrorOnBooleanPrecision() throws Exception {
SearchContext searchContext = new TestSearchContext();
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":false}");
XContentParser.Token token = stParser.nextToken();
assertSame(XContentParser.Token.START_OBJECT, token);
GeoHashGridParser parser = new GeoHashGridParser();
try {
parser.parse("geohash_grid", stParser, searchContext);
fail();
} catch (SearchParseException ex) {
assertEquals("Unexpected token VALUE_BOOLEAN in [geohash_grid].", ex.getMessage());
assertEquals("Unexpected token VALUE_BOOLEAN [precision] in [geohash_grid].", ex.getMessage());
}
}
public void testParseErrorOnPrecisionOutOfRange() throws Exception {
SearchContext searchContext = new TestSearchContext();
XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"13\"}");
XContentParser.Token token = stParser.nextToken();
assertSame(XContentParser.Token.START_OBJECT, token);
GeoHashGridParser parser = new GeoHashGridParser();
try {
parser.parse("geohash_grid", stParser, searchContext);