Aggregations: Fix geohash grid aggregation on multi-valued fields.
This aggregation creates an anonymous fielddata instance that takes geo points and turns them into a geo hash encoded as a long. A bug was introduced in 1.4 because of a fielddata refactoring: the fielddata instance tries to populate an array with values without first making sure that it is large enough. Close #8507
This commit is contained in:
parent
f30a0e846d
commit
a94fb92ac5
|
@ -29,7 +29,7 @@ import org.apache.lucene.util.Sorter;
|
|||
*/
|
||||
public abstract class SortingNumericDocValues extends SortedNumericDocValues {
|
||||
|
||||
protected int count;
|
||||
private int count;
|
||||
protected long[] values;
|
||||
private final Sorter sorter;
|
||||
|
||||
|
@ -52,9 +52,11 @@ public abstract class SortingNumericDocValues extends SortedNumericDocValues {
|
|||
}
|
||||
|
||||
/**
|
||||
* Make sure the {@link #values} array can store at least {@link #count} entries.
|
||||
* Set the {@link #count()} and ensure that the {@link #values} array can
|
||||
* store at least that many entries.
|
||||
*/
|
||||
protected final void grow() {
|
||||
protected final void resize(int newSize) {
|
||||
count = newSize;
|
||||
values = ArrayUtil.grow(values, count);
|
||||
}
|
||||
|
||||
|
|
|
@ -149,8 +149,8 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
|||
public void setDocument(int docId) {
|
||||
geoValues = geoPointValues.geoPointValues();
|
||||
geoValues.setDocument(docId);
|
||||
count = geoValues.count();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
resize(geoValues.count());
|
||||
for (int i = 0; i < count(); ++i) {
|
||||
GeoPoint target = geoValues.valueAt(i);
|
||||
values[i] = GeoHashUtils.encodeAsLong(target.getLat(), target.getLon(), precision);
|
||||
}
|
||||
|
|
|
@ -484,9 +484,8 @@ public abstract class ValuesSource {
|
|||
public void setDocument(int docId) {
|
||||
script.setNextDocId(docId);
|
||||
source.longValues().setDocument(docId);
|
||||
count = source.longValues().count();
|
||||
grow();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
resize(source.longValues().count());
|
||||
for (int i = 0; i < count(); ++i) {
|
||||
script.setNextVar("_value", source.longValues().valueAt(i));
|
||||
values[i] = script.runAsLong();
|
||||
}
|
||||
|
|
|
@ -51,30 +51,28 @@ public class ScriptLongValues extends SortingNumericDocValues implements ScriptV
|
|||
final Object value = script.run();
|
||||
|
||||
if (value == null) {
|
||||
count = 0;
|
||||
resize(0);
|
||||
}
|
||||
|
||||
else if (value instanceof Number) {
|
||||
count = 1;
|
||||
resize(1);
|
||||
values[0] = ((Number) value).longValue();
|
||||
}
|
||||
|
||||
else if (value.getClass().isArray()) {
|
||||
count = Array.getLength(value);
|
||||
grow();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
resize(Array.getLength(value));
|
||||
for (int i = 0; i < count(); ++i) {
|
||||
values[i] = ((Number) Array.get(value, i)).longValue();
|
||||
}
|
||||
}
|
||||
|
||||
else if (value instanceof Collection) {
|
||||
count = ((Collection<?>) value).size();
|
||||
grow();
|
||||
resize(((Collection<?>) value).size());
|
||||
int i = 0;
|
||||
for (Iterator<?> it = ((Collection<?>) value).iterator(); it.hasNext(); ++i) {
|
||||
values[i] = ((Number) it.next()).longValue();
|
||||
}
|
||||
assert i == count;
|
||||
assert i == count();
|
||||
}
|
||||
|
||||
else {
|
||||
|
|
|
@ -33,8 +33,11 @@ import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.geohashGrid;
|
||||
|
@ -46,39 +49,43 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
|||
@ElasticsearchIntegrationTest.SuiteScopeTest
|
||||
public class GeoHashGridTests extends ElasticsearchIntegrationTest {
|
||||
|
||||
private IndexRequestBuilder indexCity(String name, String latLon) throws Exception {
|
||||
static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
|
||||
static ObjectIntMap<String> multiValuedExpectedDocCountsForGeoHash = null;
|
||||
static int highestPrecisionGeohash = 12;
|
||||
static int numDocs = 100;
|
||||
|
||||
static String smallestGeoHash = null;
|
||||
|
||||
private static IndexRequestBuilder indexCity(String index, String name, List<String> latLon) throws Exception {
|
||||
XContentBuilder source = jsonBuilder().startObject().field("city", name);
|
||||
if (latLon != null) {
|
||||
source = source.field("location", latLon);
|
||||
}
|
||||
source = source.endObject();
|
||||
return client().prepareIndex("idx", "type").setSource(source);
|
||||
return client().prepareIndex(index, "type").setSource(source);
|
||||
}
|
||||
|
||||
|
||||
static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
|
||||
static int highestPrecisionGeohash = 12;
|
||||
static int numRandomPoints = 100;
|
||||
|
||||
static String smallestGeoHash = null;
|
||||
private static IndexRequestBuilder indexCity(String index, String name, String latLon) throws Exception {
|
||||
return indexCity(index, name, Arrays.<String>asList(latLon));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setupSuiteScopeCluster() throws Exception {
|
||||
createIndex("idx_unmapped");
|
||||
|
||||
assertAcked(prepareCreate("idx")
|
||||
.addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed"));
|
||||
|
||||
createIndex("idx_unmapped");
|
||||
|
||||
List<IndexRequestBuilder> cities = new ArrayList<>();
|
||||
Random random = getRandom();
|
||||
expectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numRandomPoints * 2);
|
||||
for (int i = 0; i < numRandomPoints; i++) {
|
||||
expectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numDocs * 2);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
//generate random point
|
||||
double lat = (180d * random.nextDouble()) - 90d;
|
||||
double lng = (360d * random.nextDouble()) - 180d;
|
||||
String randomGeoHash = GeoHashUtils.encode(lat, lng, highestPrecisionGeohash);
|
||||
//Index at the highest resolution
|
||||
cities.add(indexCity(randomGeoHash, lat + ", " + lng));
|
||||
cities.add(indexCity("idx", randomGeoHash, lat + ", " + lng));
|
||||
expectedDocCountsForGeoHash.put(randomGeoHash, expectedDocCountsForGeoHash.getOrDefault(randomGeoHash, 0) + 1);
|
||||
//Update expected doc counts for all resolutions..
|
||||
for (int precision = highestPrecisionGeohash - 1; precision > 0; precision--) {
|
||||
|
@ -90,6 +97,35 @@ public class GeoHashGridTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
indexRandom(true, cities);
|
||||
|
||||
assertAcked(prepareCreate("multi_valued_idx")
|
||||
.addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed"));
|
||||
|
||||
cities = new ArrayList<>();
|
||||
multiValuedExpectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numDocs * 2);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
final int numPoints = random.nextInt(4);
|
||||
List<String> points = new ArrayList<>();
|
||||
// TODO (#8512): this should be a Set, not a List. Currently if a document has two positions that have
|
||||
// the same geo hash, it will increase the doc_count for this geo hash by 2 instead of 1
|
||||
List<String> geoHashes = new ArrayList<>();
|
||||
for (int j = 0; j < numPoints; ++j) {
|
||||
double lat = (180d * random.nextDouble()) - 90d;
|
||||
double lng = (360d * random.nextDouble()) - 180d;
|
||||
points.add(lat + "," + lng);
|
||||
// Update expected doc counts for all resolutions..
|
||||
for (int precision = highestPrecisionGeohash; precision > 0; precision--) {
|
||||
final String geoHash = GeoHashUtils.encode(lat, lng, precision);
|
||||
geoHashes.add(geoHash);
|
||||
}
|
||||
}
|
||||
cities.add(indexCity("multi_valued_idx", Integer.toString(i), points));
|
||||
for (String hash : geoHashes) {
|
||||
multiValuedExpectedDocCountsForGeoHash.put(hash, multiValuedExpectedDocCountsForGeoHash.getOrDefault(hash, 0) + 1);
|
||||
}
|
||||
}
|
||||
indexRandom(true, cities);
|
||||
|
||||
ensureSearchable();
|
||||
}
|
||||
|
||||
|
@ -119,6 +155,31 @@ public class GeoHashGridTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void multivalued() throws Exception {
|
||||
for (int precision = 1; precision <= highestPrecisionGeohash; precision++) {
|
||||
SearchResponse response = client().prepareSearch("multi_valued_idx")
|
||||
.addAggregation(geohashGrid("geohashgrid")
|
||||
.field("location")
|
||||
.precision(precision)
|
||||
)
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
GeoHashGrid geoGrid = response.getAggregations().get("geohashgrid");
|
||||
for (GeoHashGrid.Bucket cell : geoGrid.getBuckets()) {
|
||||
String geohash = cell.getKey();
|
||||
|
||||
long bucketCount = cell.getDocCount();
|
||||
int expectedBucketCount = multiValuedExpectedDocCountsForGeoHash.get(geohash);
|
||||
assertNotSame(bucketCount, 0);
|
||||
assertEquals("Geohash " + geohash + " has wrong doc count ",
|
||||
expectedBucketCount, bucketCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void filtered() throws Exception {
|
||||
GeoBoundingBoxFilterBuilder bbox = new GeoBoundingBoxFilterBuilder("location");
|
||||
|
|
Loading…
Reference in New Issue