From 2772e88447b955798d44358f777f894e2e6d3e98 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Tue, 25 Aug 2015 13:32:19 +0200 Subject: [PATCH] Aggregations Refactor: Refactor Geohash Grid Aggregation --- .../bucket/geogrid/GeoHashGridParser.java | 207 ++++++++++++------ .../aggregations/bucket/GeoHashGridTests.java | 67 ++++++ .../geogrid/GeoHashGridParserTests.java | 12 +- 3 files changed, 215 insertions(+), 71 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoHashGridTests.java diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParser.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParser.java index 8951b7715c9..7369bdac53e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParser.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParser.java @@ -21,108 +21,118 @@ package org.elasticsearch.search.aggregations.bucket.geogrid; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.util.GeoHashUtils; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.index.fielddata.MultiGeoPointValues; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.index.fielddata.SortingNumericDocValues; import org.elasticsearch.index.query.GeoBoundingBoxQueryBuilder; -import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.NonCollectingAggregator; import org.elasticsearch.search.aggregations.bucket.BucketUtils; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.AbstractValuesSourceParser.GeoPointValuesSourceParser; import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; -import org.elasticsearch.search.aggregations.support.ValuesSourceParser; -import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; /** * Aggregates Geo information into cells determined by geohashes of a given precision. * WARNING - for high-precision geohashes it may prove necessary to use a {@link GeoBoundingBoxQueryBuilder} * aggregation to focus in on a smaller area to avoid generating too many buckets and using too much RAM */ -public class GeoHashGridParser implements Aggregator.Parser { +public class GeoHashGridParser extends GeoPointValuesSourceParser { + + public static final int DEFAULT_PRECISION = 5; + public static final int DEFAULT_MAX_NUM_CELLS = 10000; + + public GeoHashGridParser() { + super(false, false); + } @Override public String type() { return InternalGeoHashGrid.TYPE.name(); } - @Override - public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - - ValuesSourceParser vsParser = ValuesSourceParser - .geoPoint(aggregationName, InternalGeoHashGrid.TYPE, context).build(); - - int precision = GeoHashGridParams.DEFAULT_PRECISION; - int requiredSize = GeoHashGridParams.DEFAULT_MAX_NUM_CELLS; - int shardSize = -1; - - XContentParser.Token token; - String currentFieldName = null; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (vsParser.token(currentFieldName, token, parser)) { - continue; - } else if (token == XContentParser.Token.VALUE_NUMBER || - token == XContentParser.Token.VALUE_STRING) { //Be lenient and also allow numbers enclosed in quotes - if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) { - precision = GeoHashGridParams.checkPrecision(parser.intValue()); - } else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) { - requiredSize = parser.intValue(); - } else if (context.parseFieldMatcher().match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) { - shardSize = parser.intValue(); - } - } else if (token != XContentParser.Token.START_OBJECT) { - throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "].", - parser.getTokenLocation()); - } - } - - if (shardSize == 0) { - shardSize = Integer.MAX_VALUE; - } - - if (requiredSize == 0) { - requiredSize = Integer.MAX_VALUE; - } - - if (shardSize < 0) { - //Use default heuristic to avoid any wrong-ranking caused by distributed counting - shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, context.numberOfShards()); - } - - if (shardSize < requiredSize) { - shardSize = requiredSize; - } - - return new GeoGridFactory(aggregationName, vsParser.input(), precision, requiredSize, shardSize); - + public AggregatorFactory getFactoryPrototype() { + return new GeoGridFactory(null); } + @Override + protected ValuesSourceAggregatorFactory createFactory( + String aggregationName, ValuesSourceType valuesSourceType, + ValueType targetValueType, Map otherOptions) { + GeoGridFactory factory = new GeoGridFactory(aggregationName); + Integer precision = (Integer) otherOptions.get(GeoHashGridParams.FIELD_PRECISION); + if (precision != null) { + factory.precision(precision); + } + Integer size = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SIZE); + if (size != null) { + factory.size(size); + } + Integer shardSize = (Integer) otherOptions.get(GeoHashGridParams.FIELD_SHARD_SIZE); + if (shardSize != null) { + factory.shardSize(shardSize); + } + return factory; + } - static class GeoGridFactory extends ValuesSourceAggregatorFactory { + @Override + protected boolean token(String aggregationName, String currentFieldName, Token token, XContentParser parser, + ParseFieldMatcher parseFieldMatcher, Map otherOptions) throws IOException { + if (token == XContentParser.Token.VALUE_NUMBER || token == XContentParser.Token.VALUE_STRING) { + if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_PRECISION)) { + otherOptions.put(GeoHashGridParams.FIELD_PRECISION, parser.intValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SIZE)) { + otherOptions.put(GeoHashGridParams.FIELD_SIZE, parser.intValue()); + return true; + } else if (parseFieldMatcher.match(currentFieldName, GeoHashGridParams.FIELD_SHARD_SIZE)) { + otherOptions.put(GeoHashGridParams.FIELD_SHARD_SIZE, parser.intValue()); + return true; + } + } + return false; + } - private final int precision; - private final int requiredSize; - private final int shardSize; + public static class GeoGridFactory extends ValuesSourceAggregatorFactory { - public GeoGridFactory(String name, ValuesSourceParser.Input input, int precision, int requiredSize, - int shardSize) { - super(name, InternalGeoHashGrid.TYPE.name(), input); - this.precision = precision; - this.requiredSize = requiredSize; + private int precision = DEFAULT_PRECISION; + private int requiredSize = DEFAULT_MAX_NUM_CELLS; + private int shardSize = -1; + + public GeoGridFactory(String name) { + super(name, InternalGeoHashGrid.TYPE.name(), ValuesSourceType.GEOPOINT, ValueType.GEOPOINT); + } + + public void precision(int precision) { + this.precision = GeoHashGridParams.checkPrecision(precision); + } + + public void size(int size) { + this.requiredSize = size; + } + + public void shardSize(int shardSize) { this.shardSize = shardSize; } @@ -143,6 +153,23 @@ public class GeoHashGridParser implements Aggregator.Parser { protected Aggregator doCreateInternal(final ValuesSource.GeoPoint valuesSource, AggregationContext aggregationContext, Aggregator parent, boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) throws IOException { + if (shardSize == 0) { + shardSize = Integer.MAX_VALUE; + } + + if (requiredSize == 0) { + requiredSize = Integer.MAX_VALUE; + } + + if (shardSize < 0) { + // Use default heuristic to avoid any wrong-ranking caused by + // distributed counting + shardSize = BucketUtils.suggestShardSideQueueSize(requiredSize, aggregationContext.searchContext().numberOfShards()); + } + + if (shardSize < requiredSize) { + shardSize = requiredSize; + } if (collectsFromSingleBucket == false) { return asMultiBucketAggregator(this, aggregationContext, parent); } @@ -152,6 +179,52 @@ public class GeoHashGridParser implements Aggregator.Parser { } + @Override + protected ValuesSourceAggregatorFactory innerReadFrom( + String name, ValuesSourceType valuesSourceType, + ValueType targetValueType, StreamInput in) throws IOException { + GeoGridFactory factory = new GeoGridFactory(name); + factory.precision = in.readVInt(); + factory.requiredSize = in.readVInt(); + factory.shardSize = in.readVInt(); + return factory; + } + + @Override + protected void innerWriteTo(StreamOutput out) throws IOException { + out.writeVInt(precision); + out.writeVInt(requiredSize); + out.writeVInt(shardSize); + } + + @Override + protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { + builder.field(GeoHashGridParams.FIELD_PRECISION.getPreferredName(), precision); + builder.field(GeoHashGridParams.FIELD_SIZE.getPreferredName(), requiredSize); + builder.field(GeoHashGridParams.FIELD_SHARD_SIZE.getPreferredName(), shardSize); + return builder; + } + + @Override + protected boolean innerEquals(Object obj) { + GeoGridFactory other = (GeoGridFactory) obj; + if (precision != other.precision) { + return false; + } + if (requiredSize != other.requiredSize) { + return false; + } + if (shardSize != other.shardSize) { + return false; + } + return true; + } + + @Override + protected int innerHashCode() { + return Objects.hash(precision, requiredSize, shardSize); + } + private static class CellValues extends SortingNumericDocValues { private MultiGeoPointValues geoValues; private int precision; @@ -209,10 +282,4 @@ public class GeoHashGridParser implements Aggregator.Parser { } } - // NORELEASE implement this method when refactoring this aggregation - @Override - public AggregatorFactory getFactoryPrototype() { - return null; - } - } \ No newline at end of file diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoHashGridTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoHashGridTests.java new file mode 100644 index 00000000000..8836ece2e47 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoHashGridTests.java @@ -0,0 +1,67 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket; + +import org.elasticsearch.search.aggregations.BaseAggregationTestCase; +import org.elasticsearch.search.aggregations.bucket.geogrid.GeoHashGridParser.GeoGridFactory; + +public class GeoHashGridTests extends BaseAggregationTestCase { + + @Override + protected GeoGridFactory createTestAggregatorFactory() { + String name = randomAsciiOfLengthBetween(3, 20); + GeoGridFactory factory = new GeoGridFactory(name); + if (randomBoolean()) { + int precision = randomIntBetween(1, 12); + factory.precision(precision); + } + if (randomBoolean()) { + int size = randomInt(5); + switch (size) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + size = randomInt(); + break; + } + factory.size(size); + + } + if (randomBoolean()) { + int shardSize = randomInt(5); + switch (shardSize) { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + shardSize = randomInt(); + break; + } + factory.shardSize(shardSize); + } + return factory; + } + +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParserTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParserTests.java index cd7dadd7eeb..6dd485beddb 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParserTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoHashGridParserTests.java @@ -31,6 +31,8 @@ public class GeoHashGridParserTests extends ESTestCase { int precision = randomIntBetween(1, 12); XContentParser stParser = JsonXContent.jsonXContent.createParser( "{\"field\":\"my_loc\", \"precision\":" + precision + ", \"size\": 500, \"shard_size\": 550}"); + XContentParser.Token token = stParser.nextToken(); + assertSame(XContentParser.Token.START_OBJECT, token); GeoHashGridParser parser = new GeoHashGridParser(); // can create a factory assertNotNull(parser.parse("geohash_grid", stParser, searchContext)); @@ -41,6 +43,8 @@ public class GeoHashGridParserTests extends ESTestCase { int precision = randomIntBetween(1, 12); XContentParser stParser = JsonXContent.jsonXContent.createParser( "{\"field\":\"my_loc\", \"precision\":\"" + precision + "\", \"size\": \"500\", \"shard_size\": \"550\"}"); + XContentParser.Token token = stParser.nextToken(); + assertSame(XContentParser.Token.START_OBJECT, token); GeoHashGridParser parser = new GeoHashGridParser(); // can create a factory assertNotNull(parser.parse("geohash_grid", stParser, searchContext)); @@ -49,6 +53,8 @@ public class GeoHashGridParserTests extends ESTestCase { public void testParseErrorOnNonIntPrecision() throws Exception { SearchContext searchContext = new TestSearchContext(); XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"2.0\"}"); + XContentParser.Token token = stParser.nextToken(); + assertSame(XContentParser.Token.START_OBJECT, token); GeoHashGridParser parser = new GeoHashGridParser(); try { parser.parse("geohash_grid", stParser, searchContext); @@ -61,18 +67,22 @@ public class GeoHashGridParserTests extends ESTestCase { public void testParseErrorOnBooleanPrecision() throws Exception { SearchContext searchContext = new TestSearchContext(); XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":false}"); + XContentParser.Token token = stParser.nextToken(); + assertSame(XContentParser.Token.START_OBJECT, token); GeoHashGridParser parser = new GeoHashGridParser(); try { parser.parse("geohash_grid", stParser, searchContext); fail(); } catch (SearchParseException ex) { - assertEquals("Unexpected token VALUE_BOOLEAN in [geohash_grid].", ex.getMessage()); + assertEquals("Unexpected token VALUE_BOOLEAN [precision] in [geohash_grid].", ex.getMessage()); } } public void testParseErrorOnPrecisionOutOfRange() throws Exception { SearchContext searchContext = new TestSearchContext(); XContentParser stParser = JsonXContent.jsonXContent.createParser("{\"field\":\"my_loc\", \"precision\":\"13\"}"); + XContentParser.Token token = stParser.nextToken(); + assertSame(XContentParser.Token.START_OBJECT, token); GeoHashGridParser parser = new GeoHashGridParser(); try { parser.parse("geohash_grid", stParser, searchContext);