[TEST] Added unit tests for GeoHashGridAggregator and InternalGeoHashGrid

Part of #22278
This commit is contained in:
Martijn van Groningen 2017-02-28 22:24:07 +01:00
parent b388389ada
commit 524d7f592d
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
7 changed files with 261 additions and 25 deletions

View File

@ -24,7 +24,6 @@ import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
@ -39,9 +38,7 @@ import java.util.Map;
/**
* Aggregates data expressed as GeoHash longs (for efficiency's sake) but formats results as Geohash strings.
*
*/
public class GeoHashGridAggregator extends BucketsAggregator {
private final int requiredSize;
@ -49,7 +46,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
private final GeoGridAggregationBuilder.CellIdSource valuesSource;
private final LongHash bucketOrds;
public GeoHashGridAggregator(String name, AggregatorFactories factories, GeoGridAggregationBuilder.CellIdSource valuesSource,
GeoHashGridAggregator(String name, AggregatorFactories factories, GeoGridAggregationBuilder.CellIdSource valuesSource,
int requiredSize, int shardSize, SearchContext aggregationContext, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
@ -99,7 +96,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
long bucketOrd;
OrdinalBucket() {
super(0, 0, (InternalAggregations) null);
super(0, 0, null);
}
}
@ -133,7 +130,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
@Override
public InternalGeoHashGrid buildEmptyAggregation() {
return new InternalGeoHashGrid(name, requiredSize, Collections.<InternalGeoHashGrid.Bucket> emptyList(), pipelineAggregators(), metaData());
return new InternalGeoHashGrid(name, requiredSize, Collections.emptyList(), pipelineAggregators(), metaData());
}

View File

@ -43,7 +43,7 @@ public class GeoHashGridAggregatorFactory extends ValuesSourceAggregatorFactory<
private final int requiredSize;
private final int shardSize;
public GeoHashGridAggregatorFactory(String name, ValuesSourceConfig<GeoPoint> config, int precision, int requiredSize,
GeoHashGridAggregatorFactory(String name, ValuesSourceConfig<GeoPoint> config, int precision, int requiredSize,
int shardSize, SearchContext context, AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metaData) throws IOException {
super(name, config, context, parent, subFactoriesBuilder, metaData);

View File

@ -24,17 +24,14 @@ import org.elasticsearch.common.ParseField;
* Encapsulates relevant parameter defaults and validations for the geo hash grid aggregation.
*/
final class GeoHashGridParams {
/* default values */
public static final int DEFAULT_PRECISION = 5;
public static final int DEFAULT_MAX_NUM_CELLS = 10000;
/* recognized field names in JSON */
public static final ParseField FIELD_PRECISION = new ParseField("precision");
public static final ParseField FIELD_SIZE = new ParseField("size");
public static final ParseField FIELD_SHARD_SIZE = new ParseField("shard_size");
static final ParseField FIELD_PRECISION = new ParseField("precision");
static final ParseField FIELD_SIZE = new ParseField("size");
static final ParseField FIELD_SHARD_SIZE = new ParseField("shard_size");
public static int checkPrecision(int precision) {
static int checkPrecision(int precision) {
if ((precision < 1) || (precision > 12)) {
throw new IllegalArgumentException("Invalid geohash aggregation precision of " + precision
+ ". Must be between 1 and 12.");

View File

@ -36,6 +36,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import static java.util.Collections.unmodifiableList;
@ -74,7 +75,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
aggregations.writeTo(out);
}
@Override
public String getKeyAsString() {
return GeoHashUtils.stringEncode(geohashAsLong);
@ -126,12 +126,28 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
builder.endObject();
return builder;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Bucket bucket = (Bucket) o;
return geohashAsLong == bucket.geohashAsLong &&
docCount == bucket.docCount &&
Objects.equals(aggregations, bucket.aggregations);
}
@Override
public int hashCode() {
return Objects.hash(geohashAsLong, docCount, aggregations);
}
}
private final int requiredSize;
private final List<Bucket> buckets;
public InternalGeoHashGrid(String name, int requiredSize, List<Bucket> buckets, List<PipelineAggregator> pipelineAggregators,
InternalGeoHashGrid(String name, int requiredSize, List<Bucket> buckets, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) {
super(name, pipelineAggregators, metaData);
this.requiredSize = requiredSize;
@ -175,7 +191,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
@Override
public InternalGeoHashGrid doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
LongObjectPagedHashMap<List<Bucket>> buckets = null;
for (InternalAggregation aggregation : aggregations) {
InternalGeoHashGrid grid = (InternalGeoHashGrid) aggregation;
@ -216,6 +231,23 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
return builder;
}
// package protected for testing
int getRequiredSize() {
return requiredSize;
}
@Override
protected int doHashCode() {
return Objects.hash(requiredSize, buckets);
}
@Override
protected boolean doEquals(Object obj) {
InternalGeoHashGrid other = (InternalGeoHashGrid) obj;
return Objects.equals(requiredSize, other.requiredSize) &&
Objects.equals(buckets, other.buckets);
}
static class BucketPriorityQueue extends PriorityQueue<Bucket> {
BucketPriorityQueue(int size) {
@ -224,14 +256,14 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
@Override
protected boolean lessThan(Bucket o1, Bucket o2) {
long i = o2.getDocCount() - o1.getDocCount();
if (i == 0) {
i = o2.compareTo(o1);
if (i == 0) {
i = System.identityHashCode(o2) - System.identityHashCode(o1);
int cmp = Long.compare(o2.getDocCount(), o1.getDocCount());
if (cmp == 0) {
cmp = o2.compareTo(o1);
if (cmp == 0) {
cmp = System.identityHashCode(o2) - System.identityHashCode(o1);
}
}
return i > 0;
return cmp > 0;
}
}
}

View File

@ -21,6 +21,8 @@ package org.elasticsearch.search.aggregations;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
@ -57,17 +59,18 @@ public abstract class InternalAggregationTestCase<T extends InternalAggregation>
toReduce.add(t);
}
ScriptService mockScriptService = mockScriptService();
MockBigArrays bigArrays = new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService());
if (randomBoolean() && toReduce.size() > 1) {
// we leave at least the first element in the list
List<InternalAggregation> internalAggregations = randomSubsetOf(randomIntBetween(1, toReduceSize - 1),
toReduce.subList(1, toReduceSize));
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(null, mockScriptService, false);
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(bigArrays, mockScriptService, false);
@SuppressWarnings("unchecked")
T reduced = (T) inputs.get(0).reduce(internalAggregations, context);
toReduce.removeAll(internalAggregations);
toReduce.add(reduced);
}
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(null, mockScriptService, true);
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(bigArrays, mockScriptService, true);
@SuppressWarnings("unchecked")
T reduced = (T) inputs.get(0).reduce(toReduce, context);
assertReduced(reduced, inputs);

View File

@ -0,0 +1,117 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import static org.elasticsearch.common.geo.GeoHashUtils.stringEncode;
public class GeoHashGridAggregatorTests extends AggregatorTestCase {
private static final String FIELD_NAME = "location";
public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), FIELD_NAME, 1, iw -> {
// Intentionally not writing any docs
}, geoHashGrid -> {
assertEquals(0, geoHashGrid.getBuckets().size());
});
}
public void testFieldMissing() throws IOException {
testCase(new MatchAllDocsQuery(), "wrong_field", 1, iw -> {
iw.addDocument(Collections.singleton(new LatLonDocValuesField(FIELD_NAME, 10D, 10D)));
}, geoHashGrid -> {
assertEquals(0, geoHashGrid.getBuckets().size());
});
}
public void testWithSeveralDocs() throws IOException {
int precision = randomIntBetween(1, 12);
int numPoints = randomIntBetween(8, 128);
Map<String, Integer> expectedCountPerGeoHash = new HashMap<>();
testCase(new MatchAllDocsQuery(), FIELD_NAME, precision, iw -> {
List<LatLonDocValuesField> points = new ArrayList<>();
for (int pointId = 0; pointId < numPoints; pointId++) {
double lat = (180d * randomDouble()) - 90d;
double lng = (360d * randomDouble()) - 180d;
points.add(new LatLonDocValuesField(FIELD_NAME, lat, lng));
String hash = stringEncode(lng, lat, precision);
expectedCountPerGeoHash.put(hash, expectedCountPerGeoHash.getOrDefault(hash, 0) + 1);
if (usually()) {
iw.addDocument(points);
points.clear();
}
}
if (points.size() != 0) {
iw.addDocument(points);
}
}, geoHashGrid -> {
assertEquals(expectedCountPerGeoHash.size(), geoHashGrid.getBuckets().size());
for (GeoHashGrid.Bucket bucket : geoHashGrid.getBuckets()) {
assertEquals((long) expectedCountPerGeoHash.get(bucket.getKeyAsString()), bucket.getDocCount());
}
});
}
private void testCase(Query query, String field, int precision, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalGeoHashGrid> verify) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
GeoGridAggregationBuilder aggregationBuilder = new GeoGridAggregationBuilder("_name").field(field);
aggregationBuilder.precision(precision);
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
fieldType.setHasDocValues(true);
fieldType.setName(FIELD_NAME);
try (Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)) {
aggregator.preCollection();
indexSearcher.search(query, aggregator);
aggregator.postCollection();
verify.accept((InternalGeoHashGrid) aggregator.buildAggregation(0L));
}
indexReader.close();
directory.close();
}
}

View File

@ -0,0 +1,90 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.apache.lucene.index.IndexWriter;
import org.elasticsearch.common.geo.GeoHashUtils;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class InternalGeoHashGridTests extends InternalAggregationTestCase<InternalGeoHashGrid> {
@Override
protected InternalGeoHashGrid createTestInstance(String name, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) {
int size = randomIntBetween(1, 100);
List<InternalGeoHashGrid.Bucket> buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
long geoHashAsLong = GeoHashUtils.longEncode(randomInt(90), randomInt(90), 4);
buckets.add(new InternalGeoHashGrid.Bucket(geoHashAsLong, randomInt(IndexWriter.MAX_DOCS), InternalAggregations.EMPTY));
}
return new InternalGeoHashGrid(name, size, buckets, pipelineAggregators, metaData);
}
@Override
protected Writeable.Reader<InternalGeoHashGrid> instanceReader() {
return InternalGeoHashGrid::new;
}
@Override
protected void assertReduced(InternalGeoHashGrid reduced, List<InternalGeoHashGrid> inputs) {
Map<Long, List<InternalGeoHashGrid.Bucket>> map = new HashMap<>();
for (InternalGeoHashGrid input : inputs) {
for (GeoHashGrid.Bucket bucket : input.getBuckets()) {
InternalGeoHashGrid.Bucket internalBucket = (InternalGeoHashGrid.Bucket) bucket;
List<InternalGeoHashGrid.Bucket> buckets = map.get(internalBucket.geohashAsLong);
if (buckets == null) {
map.put(internalBucket.geohashAsLong, buckets = new ArrayList<>());
}
buckets.add(internalBucket);
}
}
List<InternalGeoHashGrid.Bucket> expectedBuckets = new ArrayList<>();
for (Map.Entry<Long, List<InternalGeoHashGrid.Bucket>> entry : map.entrySet()) {
long docCount = 0;
for (InternalGeoHashGrid.Bucket bucket : entry.getValue()) {
docCount += bucket.docCount;
}
expectedBuckets.add(new InternalGeoHashGrid.Bucket(entry.getKey(), docCount, InternalAggregations.EMPTY));
}
expectedBuckets.sort((first, second) -> {
int cmp = Long.compare(second.docCount, first.docCount);
if (cmp == 0) {
return second.compareTo(first);
}
return cmp;
});
int requestedSize = inputs.get(0).getRequiredSize();
expectedBuckets = expectedBuckets.subList(0, Math.min(requestedSize, expectedBuckets.size()));
assertEquals(expectedBuckets.size(), reduced.getBuckets().size());
for (int i = 0; i < reduced.getBuckets().size(); i++) {
GeoHashGrid.Bucket expected = expectedBuckets.get(i);
GeoHashGrid.Bucket actual = reduced.getBuckets().get(i);
assertEquals(expected.getDocCount(), actual.getDocCount());
assertEquals(expected.getKey(), actual.getKey());
}
}
}