[TEST] Added unit tests for GeoHashGridAggregator and InternalGeoHashGrid
Part of #22278
This commit is contained in:
parent
b388389ada
commit
524d7f592d
|
@ -24,7 +24,6 @@ import org.elasticsearch.common.lease.Releasables;
|
|||
import org.elasticsearch.common.util.LongHash;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
|
||||
|
@ -39,9 +38,7 @@ import java.util.Map;
|
|||
|
||||
/**
|
||||
* Aggregates data expressed as GeoHash longs (for efficiency's sake) but formats results as Geohash strings.
|
||||
*
|
||||
*/
|
||||
|
||||
public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
|
||||
private final int requiredSize;
|
||||
|
@ -49,7 +46,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
|||
private final GeoGridAggregationBuilder.CellIdSource valuesSource;
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public GeoHashGridAggregator(String name, AggregatorFactories factories, GeoGridAggregationBuilder.CellIdSource valuesSource,
|
||||
GeoHashGridAggregator(String name, AggregatorFactories factories, GeoGridAggregationBuilder.CellIdSource valuesSource,
|
||||
int requiredSize, int shardSize, SearchContext aggregationContext, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
|
||||
|
@ -99,7 +96,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
|||
long bucketOrd;
|
||||
|
||||
OrdinalBucket() {
|
||||
super(0, 0, (InternalAggregations) null);
|
||||
super(0, 0, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -133,7 +130,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
|||
|
||||
@Override
|
||||
public InternalGeoHashGrid buildEmptyAggregation() {
|
||||
return new InternalGeoHashGrid(name, requiredSize, Collections.<InternalGeoHashGrid.Bucket> emptyList(), pipelineAggregators(), metaData());
|
||||
return new InternalGeoHashGrid(name, requiredSize, Collections.emptyList(), pipelineAggregators(), metaData());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ public class GeoHashGridAggregatorFactory extends ValuesSourceAggregatorFactory<
|
|||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
|
||||
public GeoHashGridAggregatorFactory(String name, ValuesSourceConfig<GeoPoint> config, int precision, int requiredSize,
|
||||
GeoHashGridAggregatorFactory(String name, ValuesSourceConfig<GeoPoint> config, int precision, int requiredSize,
|
||||
int shardSize, SearchContext context, AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, config, context, parent, subFactoriesBuilder, metaData);
|
||||
|
|
|
@ -24,17 +24,14 @@ import org.elasticsearch.common.ParseField;
|
|||
* Encapsulates relevant parameter defaults and validations for the geo hash grid aggregation.
|
||||
*/
|
||||
final class GeoHashGridParams {
|
||||
/* default values */
|
||||
public static final int DEFAULT_PRECISION = 5;
|
||||
public static final int DEFAULT_MAX_NUM_CELLS = 10000;
|
||||
|
||||
/* recognized field names in JSON */
|
||||
public static final ParseField FIELD_PRECISION = new ParseField("precision");
|
||||
public static final ParseField FIELD_SIZE = new ParseField("size");
|
||||
public static final ParseField FIELD_SHARD_SIZE = new ParseField("shard_size");
|
||||
static final ParseField FIELD_PRECISION = new ParseField("precision");
|
||||
static final ParseField FIELD_SIZE = new ParseField("size");
|
||||
static final ParseField FIELD_SHARD_SIZE = new ParseField("shard_size");
|
||||
|
||||
|
||||
public static int checkPrecision(int precision) {
|
||||
static int checkPrecision(int precision) {
|
||||
if ((precision < 1) || (precision > 12)) {
|
||||
throw new IllegalArgumentException("Invalid geohash aggregation precision of " + precision
|
||||
+ ". Must be between 1 and 12.");
|
||||
|
|
|
@ -36,6 +36,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static java.util.Collections.unmodifiableList;
|
||||
|
||||
|
@ -74,7 +75,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
|||
aggregations.writeTo(out);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getKeyAsString() {
|
||||
return GeoHashUtils.stringEncode(geohashAsLong);
|
||||
|
@ -126,12 +126,28 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
|||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
Bucket bucket = (Bucket) o;
|
||||
return geohashAsLong == bucket.geohashAsLong &&
|
||||
docCount == bucket.docCount &&
|
||||
Objects.equals(aggregations, bucket.aggregations);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(geohashAsLong, docCount, aggregations);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final int requiredSize;
|
||||
private final List<Bucket> buckets;
|
||||
|
||||
public InternalGeoHashGrid(String name, int requiredSize, List<Bucket> buckets, List<PipelineAggregator> pipelineAggregators,
|
||||
InternalGeoHashGrid(String name, int requiredSize, List<Bucket> buckets, List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) {
|
||||
super(name, pipelineAggregators, metaData);
|
||||
this.requiredSize = requiredSize;
|
||||
|
@ -175,7 +191,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
|||
|
||||
@Override
|
||||
public InternalGeoHashGrid doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
|
||||
|
||||
LongObjectPagedHashMap<List<Bucket>> buckets = null;
|
||||
for (InternalAggregation aggregation : aggregations) {
|
||||
InternalGeoHashGrid grid = (InternalGeoHashGrid) aggregation;
|
||||
|
@ -216,6 +231,23 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
|||
return builder;
|
||||
}
|
||||
|
||||
// package protected for testing
|
||||
int getRequiredSize() {
|
||||
return requiredSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return Objects.hash(requiredSize, buckets);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(Object obj) {
|
||||
InternalGeoHashGrid other = (InternalGeoHashGrid) obj;
|
||||
return Objects.equals(requiredSize, other.requiredSize) &&
|
||||
Objects.equals(buckets, other.buckets);
|
||||
}
|
||||
|
||||
static class BucketPriorityQueue extends PriorityQueue<Bucket> {
|
||||
|
||||
BucketPriorityQueue(int size) {
|
||||
|
@ -224,14 +256,14 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
|||
|
||||
@Override
|
||||
protected boolean lessThan(Bucket o1, Bucket o2) {
|
||||
long i = o2.getDocCount() - o1.getDocCount();
|
||||
if (i == 0) {
|
||||
i = o2.compareTo(o1);
|
||||
if (i == 0) {
|
||||
i = System.identityHashCode(o2) - System.identityHashCode(o1);
|
||||
int cmp = Long.compare(o2.getDocCount(), o1.getDocCount());
|
||||
if (cmp == 0) {
|
||||
cmp = o2.compareTo(o1);
|
||||
if (cmp == 0) {
|
||||
cmp = System.identityHashCode(o2) - System.identityHashCode(o1);
|
||||
}
|
||||
}
|
||||
return i > 0;
|
||||
return cmp > 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,8 @@ package org.elasticsearch.search.aggregations;
|
|||
|
||||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.util.MockBigArrays;
|
||||
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
import org.elasticsearch.search.SearchModule;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
|
@ -57,17 +59,18 @@ public abstract class InternalAggregationTestCase<T extends InternalAggregation>
|
|||
toReduce.add(t);
|
||||
}
|
||||
ScriptService mockScriptService = mockScriptService();
|
||||
MockBigArrays bigArrays = new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService());
|
||||
if (randomBoolean() && toReduce.size() > 1) {
|
||||
// we leave at least the first element in the list
|
||||
List<InternalAggregation> internalAggregations = randomSubsetOf(randomIntBetween(1, toReduceSize - 1),
|
||||
toReduce.subList(1, toReduceSize));
|
||||
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(null, mockScriptService, false);
|
||||
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(bigArrays, mockScriptService, false);
|
||||
@SuppressWarnings("unchecked")
|
||||
T reduced = (T) inputs.get(0).reduce(internalAggregations, context);
|
||||
toReduce.removeAll(internalAggregations);
|
||||
toReduce.add(reduced);
|
||||
}
|
||||
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(null, mockScriptService, true);
|
||||
InternalAggregation.ReduceContext context = new InternalAggregation.ReduceContext(bigArrays, mockScriptService, true);
|
||||
@SuppressWarnings("unchecked")
|
||||
T reduced = (T) inputs.get(0).reduce(toReduce, context);
|
||||
assertReduced(reduced, inputs);
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.geogrid;
|
||||
|
||||
import org.apache.lucene.document.LatLonDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.common.CheckedConsumer;
|
||||
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static org.elasticsearch.common.geo.GeoHashUtils.stringEncode;
|
||||
|
||||
public class GeoHashGridAggregatorTests extends AggregatorTestCase {
|
||||
|
||||
private static final String FIELD_NAME = "location";
|
||||
|
||||
public void testNoDocs() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), FIELD_NAME, 1, iw -> {
|
||||
// Intentionally not writing any docs
|
||||
}, geoHashGrid -> {
|
||||
assertEquals(0, geoHashGrid.getBuckets().size());
|
||||
});
|
||||
}
|
||||
|
||||
public void testFieldMissing() throws IOException {
|
||||
testCase(new MatchAllDocsQuery(), "wrong_field", 1, iw -> {
|
||||
iw.addDocument(Collections.singleton(new LatLonDocValuesField(FIELD_NAME, 10D, 10D)));
|
||||
}, geoHashGrid -> {
|
||||
assertEquals(0, geoHashGrid.getBuckets().size());
|
||||
});
|
||||
}
|
||||
|
||||
public void testWithSeveralDocs() throws IOException {
|
||||
int precision = randomIntBetween(1, 12);
|
||||
int numPoints = randomIntBetween(8, 128);
|
||||
Map<String, Integer> expectedCountPerGeoHash = new HashMap<>();
|
||||
testCase(new MatchAllDocsQuery(), FIELD_NAME, precision, iw -> {
|
||||
List<LatLonDocValuesField> points = new ArrayList<>();
|
||||
for (int pointId = 0; pointId < numPoints; pointId++) {
|
||||
double lat = (180d * randomDouble()) - 90d;
|
||||
double lng = (360d * randomDouble()) - 180d;
|
||||
points.add(new LatLonDocValuesField(FIELD_NAME, lat, lng));
|
||||
String hash = stringEncode(lng, lat, precision);
|
||||
expectedCountPerGeoHash.put(hash, expectedCountPerGeoHash.getOrDefault(hash, 0) + 1);
|
||||
if (usually()) {
|
||||
iw.addDocument(points);
|
||||
points.clear();
|
||||
}
|
||||
}
|
||||
if (points.size() != 0) {
|
||||
iw.addDocument(points);
|
||||
}
|
||||
}, geoHashGrid -> {
|
||||
assertEquals(expectedCountPerGeoHash.size(), geoHashGrid.getBuckets().size());
|
||||
for (GeoHashGrid.Bucket bucket : geoHashGrid.getBuckets()) {
|
||||
assertEquals((long) expectedCountPerGeoHash.get(bucket.getKeyAsString()), bucket.getDocCount());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void testCase(Query query, String field, int precision, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
|
||||
Consumer<InternalGeoHashGrid> verify) throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
buildIndex.accept(indexWriter);
|
||||
indexWriter.close();
|
||||
|
||||
IndexReader indexReader = DirectoryReader.open(directory);
|
||||
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
|
||||
|
||||
GeoGridAggregationBuilder aggregationBuilder = new GeoGridAggregationBuilder("_name").field(field);
|
||||
aggregationBuilder.precision(precision);
|
||||
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
|
||||
fieldType.setHasDocValues(true);
|
||||
fieldType.setName(FIELD_NAME);
|
||||
try (Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)) {
|
||||
aggregator.preCollection();
|
||||
indexSearcher.search(query, aggregator);
|
||||
aggregator.postCollection();
|
||||
verify.accept((InternalGeoHashGrid) aggregator.buildAggregation(0L));
|
||||
}
|
||||
indexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.geogrid;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.elasticsearch.common.geo.GeoHashUtils;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class InternalGeoHashGridTests extends InternalAggregationTestCase<InternalGeoHashGrid> {
|
||||
|
||||
@Override
|
||||
protected InternalGeoHashGrid createTestInstance(String name, List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) {
|
||||
int size = randomIntBetween(1, 100);
|
||||
List<InternalGeoHashGrid.Bucket> buckets = new ArrayList<>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
long geoHashAsLong = GeoHashUtils.longEncode(randomInt(90), randomInt(90), 4);
|
||||
buckets.add(new InternalGeoHashGrid.Bucket(geoHashAsLong, randomInt(IndexWriter.MAX_DOCS), InternalAggregations.EMPTY));
|
||||
}
|
||||
return new InternalGeoHashGrid(name, size, buckets, pipelineAggregators, metaData);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<InternalGeoHashGrid> instanceReader() {
|
||||
return InternalGeoHashGrid::new;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertReduced(InternalGeoHashGrid reduced, List<InternalGeoHashGrid> inputs) {
|
||||
Map<Long, List<InternalGeoHashGrid.Bucket>> map = new HashMap<>();
|
||||
for (InternalGeoHashGrid input : inputs) {
|
||||
for (GeoHashGrid.Bucket bucket : input.getBuckets()) {
|
||||
InternalGeoHashGrid.Bucket internalBucket = (InternalGeoHashGrid.Bucket) bucket;
|
||||
List<InternalGeoHashGrid.Bucket> buckets = map.get(internalBucket.geohashAsLong);
|
||||
if (buckets == null) {
|
||||
map.put(internalBucket.geohashAsLong, buckets = new ArrayList<>());
|
||||
}
|
||||
buckets.add(internalBucket);
|
||||
}
|
||||
}
|
||||
List<InternalGeoHashGrid.Bucket> expectedBuckets = new ArrayList<>();
|
||||
for (Map.Entry<Long, List<InternalGeoHashGrid.Bucket>> entry : map.entrySet()) {
|
||||
long docCount = 0;
|
||||
for (InternalGeoHashGrid.Bucket bucket : entry.getValue()) {
|
||||
docCount += bucket.docCount;
|
||||
}
|
||||
expectedBuckets.add(new InternalGeoHashGrid.Bucket(entry.getKey(), docCount, InternalAggregations.EMPTY));
|
||||
}
|
||||
expectedBuckets.sort((first, second) -> {
|
||||
int cmp = Long.compare(second.docCount, first.docCount);
|
||||
if (cmp == 0) {
|
||||
return second.compareTo(first);
|
||||
}
|
||||
return cmp;
|
||||
});
|
||||
int requestedSize = inputs.get(0).getRequiredSize();
|
||||
expectedBuckets = expectedBuckets.subList(0, Math.min(requestedSize, expectedBuckets.size()));
|
||||
assertEquals(expectedBuckets.size(), reduced.getBuckets().size());
|
||||
for (int i = 0; i < reduced.getBuckets().size(); i++) {
|
||||
GeoHashGrid.Bucket expected = expectedBuckets.get(i);
|
||||
GeoHashGrid.Bucket actual = reduced.getBuckets().get(i);
|
||||
assertEquals(expected.getDocCount(), actual.getDocCount());
|
||||
assertEquals(expected.getKey(), actual.getKey());
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue