Refactored GeoHashGrid unit tests (#37832)

* Refactored GeoHashGrid unit tests

This change allows other grid aggregations to reuse the same tests.

The change mostly just moves code to the base classes, trying to
keep changes to a bare minimum.

* rename createInternalGeoHashGridBucket to createInternalGeoGridBucket

* indentation
This commit is contained in:
Yuri Astrakhan 2019-01-25 13:37:24 -05:00 committed by GitHub
parent afd4618851
commit f1e71be8b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 328 additions and 203 deletions

View File

@ -0,0 +1,141 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
public abstract class GeoGridAggregatorTestCase<T extends InternalGeoGridBucket> extends AggregatorTestCase {
private static final String FIELD_NAME = "location";
/**
* Generate a random precision according to the rules of the given aggregation.
*/
protected abstract int randomPrecision();
/**
* Convert geo point into a hash string (bucket string ID)
*/
protected abstract String hashAsString(double lng, double lat, int precision);
/**
* Create a new named {@link GeoGridAggregationBuilder}-derived builder
*/
protected abstract GeoGridAggregationBuilder createBuilder(String name);
public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), FIELD_NAME, randomPrecision(), iw -> {
// Intentionally not writing any docs
}, geoGrid -> {
assertEquals(0, geoGrid.getBuckets().size());
});
}
public void testFieldMissing() throws IOException {
testCase(new MatchAllDocsQuery(), "wrong_field", randomPrecision(), iw -> {
iw.addDocument(Collections.singleton(new LatLonDocValuesField(FIELD_NAME, 10D, 10D)));
}, geoGrid -> {
assertEquals(0, geoGrid.getBuckets().size());
});
}
public void testWithSeveralDocs() throws IOException {
int precision = randomPrecision();
int numPoints = randomIntBetween(8, 128);
Map<String, Integer> expectedCountPerGeoHash = new HashMap<>();
testCase(new MatchAllDocsQuery(), FIELD_NAME, precision, iw -> {
List<LatLonDocValuesField> points = new ArrayList<>();
Set<String> distinctHashesPerDoc = new HashSet<>();
for (int pointId = 0; pointId < numPoints; pointId++) {
double lat = (180d * randomDouble()) - 90d;
double lng = (360d * randomDouble()) - 180d;
points.add(new LatLonDocValuesField(FIELD_NAME, lat, lng));
String hash = hashAsString(lng, lat, precision);
if (distinctHashesPerDoc.contains(hash) == false) {
expectedCountPerGeoHash.put(hash, expectedCountPerGeoHash.getOrDefault(hash, 0) + 1);
}
distinctHashesPerDoc.add(hash);
if (usually()) {
iw.addDocument(points);
points.clear();
distinctHashesPerDoc.clear();
}
}
if (points.size() != 0) {
iw.addDocument(points);
}
}, geoHashGrid -> {
assertEquals(expectedCountPerGeoHash.size(), geoHashGrid.getBuckets().size());
for (GeoGrid.Bucket bucket : geoHashGrid.getBuckets()) {
assertEquals((long) expectedCountPerGeoHash.get(bucket.getKeyAsString()), bucket.getDocCount());
}
assertTrue(AggregationInspectionHelper.hasValue(geoHashGrid));
});
}
private void testCase(Query query, String field, int precision, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalGeoGrid<T>> verify) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
GeoGridAggregationBuilder aggregationBuilder = createBuilder("_name").field(field);
aggregationBuilder.precision(precision);
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
fieldType.setHasDocValues(true);
fieldType.setName(FIELD_NAME);
Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(query, aggregator);
aggregator.postCollection();
verify.accept((InternalGeoGrid<T>) aggregator.buildAggregation(0L));
indexReader.close();
directory.close();
}
}

View File

@ -0,0 +1,167 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.apache.lucene.index.IndexWriter;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.test.InternalMultiBucketAggregationTestCase;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
public abstract class GeoGridTestCase<B extends InternalGeoGridBucket, T extends InternalGeoGrid<B>>
extends InternalMultiBucketAggregationTestCase<T> {
/**
* Instantiate a {@link InternalGeoGrid}-derived class using the same parameters as constructor.
*/
protected abstract T createInternalGeoGrid(String name, int size, List<InternalGeoGridBucket> buckets,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData);
/**
* Instantiate a {@link InternalGeoGridBucket}-derived class using the same parameters as constructor.
*/
protected abstract B createInternalGeoGridBucket(Long key, long docCount, InternalAggregations aggregations);
/**
* Encode longitude and latitude with a given precision as a long hash.
*/
protected abstract long longEncode(double lng, double lat, int precision);
/**
* Generate a random precision according to the rules of the given aggregation.
*/
protected abstract int randomPrecision();
@Override
protected int minNumberOfBuckets() {
return 1;
}
@Override
protected int maxNumberOfBuckets() {
return 3;
}
@Override
protected T createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
InternalAggregations aggregations) {
final int precision = randomPrecision();
int size = randomNumberOfBuckets();
List<InternalGeoGridBucket> buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
double latitude = randomDoubleBetween(-90.0, 90.0, false);
double longitude = randomDoubleBetween(-180.0, 180.0, false);
long hashAsLong = longEncode(longitude, latitude, precision);
buckets.add(createInternalGeoGridBucket(hashAsLong, randomInt(IndexWriter.MAX_DOCS), aggregations));
}
return createInternalGeoGrid(name, size, buckets, pipelineAggregators, metaData);
}
@Override
protected void assertReduced(T reduced, List<T> inputs) {
Map<Long, List<B>> map = new HashMap<>();
for (T input : inputs) {
for (GeoGrid.Bucket bucketBase : input.getBuckets()) {
B bucket = (B) bucketBase;
List<B> buckets = map.get(bucket.hashAsLong);
if (buckets == null) {
map.put(bucket.hashAsLong, buckets = new ArrayList<>());
}
buckets.add(bucket);
}
}
List<B> expectedBuckets = new ArrayList<>();
for (Map.Entry<Long, List<B>> entry : map.entrySet()) {
long docCount = 0;
for (B bucket : entry.getValue()) {
docCount += bucket.docCount;
}
expectedBuckets.add(createInternalGeoGridBucket(entry.getKey(), docCount, InternalAggregations.EMPTY));
}
expectedBuckets.sort((first, second) -> {
int cmp = Long.compare(second.docCount, first.docCount);
if (cmp == 0) {
return second.compareTo(first);
}
return cmp;
});
int requestedSize = inputs.get(0).getRequiredSize();
expectedBuckets = expectedBuckets.subList(0, Math.min(requestedSize, expectedBuckets.size()));
assertEquals(expectedBuckets.size(), reduced.getBuckets().size());
for (int i = 0; i < reduced.getBuckets().size(); i++) {
GeoGrid.Bucket expected = expectedBuckets.get(i);
GeoGrid.Bucket actual = reduced.getBuckets().get(i);
assertEquals(expected.getDocCount(), actual.getDocCount());
assertEquals(expected.getKey(), actual.getKey());
}
}
@Override
protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
return ParsedGeoGrid.class;
}
@Override
protected T mutateInstance(T instance) {
String name = instance.getName();
int size = instance.getRequiredSize();
List<InternalGeoGridBucket> buckets = instance.getBuckets();
List<PipelineAggregator> pipelineAggregators = instance.pipelineAggregators();
Map<String, Object> metaData = instance.getMetaData();
switch (between(0, 3)) {
case 0:
name += randomAlphaOfLength(5);
break;
case 1:
buckets = new ArrayList<>(buckets);
buckets.add(
createInternalGeoGridBucket(randomNonNegativeLong(), randomInt(IndexWriter.MAX_DOCS), InternalAggregations.EMPTY));
break;
case 2:
size = size + between(1, 10);
break;
case 3:
if (metaData == null) {
metaData = new HashMap<>(1);
} else {
metaData = new HashMap<>(instance.getMetaData());
}
metaData.put(randomAlphaOfLength(15), randomInt());
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
return createInternalGeoGrid(name, size, buckets, pipelineAggregators, metaData);
}
public void testCreateFromBuckets() {
InternalGeoGrid original = createTestInstance();
assertThat(original, equalTo(original.create(original.buckets)));
}
}

View File

@ -16,114 +16,25 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import static org.elasticsearch.common.geo.GeoHashUtils.stringEncode;
public class GeoHashGridAggregatorTests extends AggregatorTestCase {
public class GeoHashGridAggregatorTests extends GeoGridAggregatorTestCase<InternalGeoHashGridBucket> {
private static final String FIELD_NAME = "location";
public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), FIELD_NAME, 1, iw -> {
// Intentionally not writing any docs
}, geoHashGrid -> {
assertEquals(0, geoHashGrid.getBuckets().size());
assertFalse(AggregationInspectionHelper.hasValue(geoHashGrid));
});
@Override
protected int randomPrecision() {
return randomIntBetween(1, 12);
}
public void testFieldMissing() throws IOException {
testCase(new MatchAllDocsQuery(), "wrong_field", 1, iw -> {
iw.addDocument(Collections.singleton(new LatLonDocValuesField(FIELD_NAME, 10D, 10D)));
}, geoHashGrid -> {
assertEquals(0, geoHashGrid.getBuckets().size());
assertFalse(AggregationInspectionHelper.hasValue(geoHashGrid));
});
@Override
protected String hashAsString(double lng, double lat, int precision) {
return stringEncode(lng, lat, precision);
}
public void testWithSeveralDocs() throws IOException {
int precision = randomIntBetween(1, 12);
int numPoints = randomIntBetween(8, 128);
Map<String, Integer> expectedCountPerGeoHash = new HashMap<>();
testCase(new MatchAllDocsQuery(), FIELD_NAME, precision, iw -> {
List<LatLonDocValuesField> points = new ArrayList<>();
Set<String> distinctHashesPerDoc = new HashSet<>();
for (int pointId = 0; pointId < numPoints; pointId++) {
double lat = (180d * randomDouble()) - 90d;
double lng = (360d * randomDouble()) - 180d;
points.add(new LatLonDocValuesField(FIELD_NAME, lat, lng));
String hash = stringEncode(lng, lat, precision);
if (distinctHashesPerDoc.contains(hash) == false) {
expectedCountPerGeoHash.put(hash, expectedCountPerGeoHash.getOrDefault(hash, 0) + 1);
}
distinctHashesPerDoc.add(hash);
if (usually()) {
iw.addDocument(points);
points.clear();
distinctHashesPerDoc.clear();
}
}
if (points.size() != 0) {
iw.addDocument(points);
}
}, geoHashGrid -> {
assertEquals(expectedCountPerGeoHash.size(), geoHashGrid.getBuckets().size());
for (GeoGrid.Bucket bucket : geoHashGrid.getBuckets()) {
assertEquals((long) expectedCountPerGeoHash.get(bucket.getKeyAsString()), bucket.getDocCount());
}
assertTrue(AggregationInspectionHelper.hasValue(geoHashGrid));
});
}
private void testCase(Query query, String field, int precision, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalGeoHashGrid> verify) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
GeoGridAggregationBuilder aggregationBuilder = new GeoHashGridAggregationBuilder("_name").field(field);
aggregationBuilder.precision(precision);
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
fieldType.setHasDocValues(true);
fieldType.setName(FIELD_NAME);
Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
aggregator.preCollection();
indexSearcher.search(query, aggregator);
aggregator.postCollection();
verify.accept((InternalGeoHashGrid) aggregator.buildAggregation(0L));
indexReader.close();
directory.close();
@Override
protected GeoGridAggregationBuilder createBuilder(String name) {
return new GeoHashGridAggregationBuilder(name);
}
}

View File

@ -18,47 +18,19 @@
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.apache.lucene.index.IndexWriter;
import org.elasticsearch.common.geo.GeoHashUtils;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.test.InternalMultiBucketAggregationTestCase;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
public class GeoHashGridTests extends InternalMultiBucketAggregationTestCase<InternalGeoHashGrid> {
public class GeoHashGridTests extends GeoGridTestCase<InternalGeoHashGridBucket, InternalGeoHashGrid> {
@Override
protected int minNumberOfBuckets() {
return 1;
}
@Override
protected int maxNumberOfBuckets() {
return 3;
}
@Override
protected InternalGeoHashGrid createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
InternalAggregations aggregations) {
int size = randomNumberOfBuckets();
List<InternalGeoGridBucket> buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
double latitude = randomDoubleBetween(-90.0, 90.0, false);
double longitude = randomDoubleBetween(-180.0, 180.0, false);
long geoHashAsLong = GeoHashUtils.longEncode(longitude, latitude, 4);
buckets.add(new InternalGeoHashGridBucket(geoHashAsLong, randomInt(IndexWriter.MAX_DOCS), aggregations));
}
protected InternalGeoHashGrid createInternalGeoGrid(String name, int size, List<InternalGeoGridBucket> buckets,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
return new InternalGeoHashGrid(name, size, buckets, pipelineAggregators, metaData);
}
@ -68,83 +40,17 @@ public class GeoHashGridTests extends InternalMultiBucketAggregationTestCase<Int
}
@Override
protected void assertReduced(InternalGeoHashGrid reduced, List<InternalGeoHashGrid> inputs) {
Map<Long, List<InternalGeoGridBucket>> map = new HashMap<>();
for (InternalGeoHashGrid input : inputs) {
for (InternalGeoGridBucket bucket : input.getBuckets()) {
List<InternalGeoGridBucket> buckets = map.get(bucket.hashAsLong);
if (buckets == null) {
map.put(bucket.hashAsLong, buckets = new ArrayList<>());
}
buckets.add(bucket);
}
}
List<InternalGeoGridBucket> expectedBuckets = new ArrayList<>();
for (Map.Entry<Long, List<InternalGeoGridBucket>> entry : map.entrySet()) {
long docCount = 0;
for (InternalGeoGridBucket bucket : entry.getValue()) {
docCount += bucket.docCount;
}
expectedBuckets.add(new InternalGeoHashGridBucket(entry.getKey(), docCount, InternalAggregations.EMPTY));
}
expectedBuckets.sort((first, second) -> {
int cmp = Long.compare(second.docCount, first.docCount);
if (cmp == 0) {
return second.compareTo(first);
}
return cmp;
});
int requestedSize = inputs.get(0).getRequiredSize();
expectedBuckets = expectedBuckets.subList(0, Math.min(requestedSize, expectedBuckets.size()));
assertEquals(expectedBuckets.size(), reduced.getBuckets().size());
for (int i = 0; i < reduced.getBuckets().size(); i++) {
GeoGrid.Bucket expected = expectedBuckets.get(i);
GeoGrid.Bucket actual = reduced.getBuckets().get(i);
assertEquals(expected.getDocCount(), actual.getDocCount());
assertEquals(expected.getKey(), actual.getKey());
}
protected InternalGeoHashGridBucket createInternalGeoGridBucket(Long key, long docCount, InternalAggregations aggregations) {
return new InternalGeoHashGridBucket(key, docCount, aggregations);
}
@Override
protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
return ParsedGeoHashGrid.class;
protected long longEncode(double lng, double lat, int precision) {
return GeoHashUtils.longEncode(lng, lat, precision);
}
@Override
protected InternalGeoHashGrid mutateInstance(InternalGeoHashGrid instance) {
String name = instance.getName();
int size = instance.getRequiredSize();
List<InternalGeoGridBucket> buckets = instance.getBuckets();
List<PipelineAggregator> pipelineAggregators = instance.pipelineAggregators();
Map<String, Object> metaData = instance.getMetaData();
switch (between(0, 3)) {
case 0:
name += randomAlphaOfLength(5);
break;
case 1:
buckets = new ArrayList<>(buckets);
buckets.add(
new InternalGeoHashGridBucket(randomNonNegativeLong(), randomInt(IndexWriter.MAX_DOCS), InternalAggregations.EMPTY));
break;
case 2:
size = size + between(1, 10);
break;
case 3:
if (metaData == null) {
metaData = new HashMap<>(1);
} else {
metaData = new HashMap<>(instance.getMetaData());
}
metaData.put(randomAlphaOfLength(15), randomInt());
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
return new InternalGeoHashGrid(name, size, buckets, pipelineAggregators, metaData);
}
public void testCreateFromBuckets() {
InternalGeoHashGrid original = createTestInstance();
assertThat(original, equalTo(original.create(original.buckets)));
protected int randomPrecision() {
return randomIntBetween(1, 12);
}
}