Aggregations: Fix geohash grid aggregation on multi-valued fields.

This aggregation creates an anonymous fielddata instance that takes geo points
and turns them into a geo hash encoded as a long. A bug was introduced in 1.4
because of a fielddata refactoring: the fielddata instance tries to populate
an array with values without first making sure that it is large enough.

Close #8507
This commit is contained in:
Adrien Grand 2014-11-17 18:43:22 +01:00
parent f30a0e846d
commit a94fb92ac5
5 changed files with 89 additions and 29 deletions

View File

@ -29,7 +29,7 @@ import org.apache.lucene.util.Sorter;
*/
public abstract class SortingNumericDocValues extends SortedNumericDocValues {
protected int count;
private int count;
protected long[] values;
private final Sorter sorter;
@ -52,9 +52,11 @@ public abstract class SortingNumericDocValues extends SortedNumericDocValues {
}
/**
* Make sure the {@link #values} array can store at least {@link #count} entries.
* Set the {@link #count()} and ensure that the {@link #values} array can
* store at least that many entries.
*/
protected final void grow() {
protected final void resize(int newSize) {
count = newSize;
values = ArrayUtil.grow(values, count);
}

View File

@ -149,8 +149,8 @@ public class GeoHashGridParser implements Aggregator.Parser {
public void setDocument(int docId) {
geoValues = geoPointValues.geoPointValues();
geoValues.setDocument(docId);
count = geoValues.count();
for (int i = 0; i < count; ++i) {
resize(geoValues.count());
for (int i = 0; i < count(); ++i) {
GeoPoint target = geoValues.valueAt(i);
values[i] = GeoHashUtils.encodeAsLong(target.getLat(), target.getLon(), precision);
}

View File

@ -484,9 +484,8 @@ public abstract class ValuesSource {
public void setDocument(int docId) {
script.setNextDocId(docId);
source.longValues().setDocument(docId);
count = source.longValues().count();
grow();
for (int i = 0; i < count; ++i) {
resize(source.longValues().count());
for (int i = 0; i < count(); ++i) {
script.setNextVar("_value", source.longValues().valueAt(i));
values[i] = script.runAsLong();
}

View File

@ -51,30 +51,28 @@ public class ScriptLongValues extends SortingNumericDocValues implements ScriptV
final Object value = script.run();
if (value == null) {
count = 0;
resize(0);
}
else if (value instanceof Number) {
count = 1;
resize(1);
values[0] = ((Number) value).longValue();
}
else if (value.getClass().isArray()) {
count = Array.getLength(value);
grow();
for (int i = 0; i < count; ++i) {
resize(Array.getLength(value));
for (int i = 0; i < count(); ++i) {
values[i] = ((Number) Array.get(value, i)).longValue();
}
}
else if (value instanceof Collection) {
count = ((Collection<?>) value).size();
grow();
resize(((Collection<?>) value).size());
int i = 0;
for (Iterator<?> it = ((Collection<?>) value).iterator(); it.hasNext(); ++i) {
values[i] = ((Number) it.next()).longValue();
}
assert i == count;
assert i == count();
}
else {

View File

@ -33,8 +33,11 @@ import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.geohashGrid;
@ -46,39 +49,43 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@ElasticsearchIntegrationTest.SuiteScopeTest
public class GeoHashGridTests extends ElasticsearchIntegrationTest {
private IndexRequestBuilder indexCity(String name, String latLon) throws Exception {
static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
static ObjectIntMap<String> multiValuedExpectedDocCountsForGeoHash = null;
static int highestPrecisionGeohash = 12;
static int numDocs = 100;
static String smallestGeoHash = null;
private static IndexRequestBuilder indexCity(String index, String name, List<String> latLon) throws Exception {
XContentBuilder source = jsonBuilder().startObject().field("city", name);
if (latLon != null) {
source = source.field("location", latLon);
}
source = source.endObject();
return client().prepareIndex("idx", "type").setSource(source);
return client().prepareIndex(index, "type").setSource(source);
}
static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
static int highestPrecisionGeohash = 12;
static int numRandomPoints = 100;
static String smallestGeoHash = null;
private static IndexRequestBuilder indexCity(String index, String name, String latLon) throws Exception {
return indexCity(index, name, Arrays.<String>asList(latLon));
}
@Override
public void setupSuiteScopeCluster() throws Exception {
createIndex("idx_unmapped");
assertAcked(prepareCreate("idx")
.addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed"));
createIndex("idx_unmapped");
List<IndexRequestBuilder> cities = new ArrayList<>();
Random random = getRandom();
expectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numRandomPoints * 2);
for (int i = 0; i < numRandomPoints; i++) {
expectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numDocs * 2);
for (int i = 0; i < numDocs; i++) {
//generate random point
double lat = (180d * random.nextDouble()) - 90d;
double lng = (360d * random.nextDouble()) - 180d;
String randomGeoHash = GeoHashUtils.encode(lat, lng, highestPrecisionGeohash);
//Index at the highest resolution
cities.add(indexCity(randomGeoHash, lat + ", " + lng));
cities.add(indexCity("idx", randomGeoHash, lat + ", " + lng));
expectedDocCountsForGeoHash.put(randomGeoHash, expectedDocCountsForGeoHash.getOrDefault(randomGeoHash, 0) + 1);
//Update expected doc counts for all resolutions..
for (int precision = highestPrecisionGeohash - 1; precision > 0; precision--) {
@ -90,6 +97,35 @@ public class GeoHashGridTests extends ElasticsearchIntegrationTest {
}
}
indexRandom(true, cities);
assertAcked(prepareCreate("multi_valued_idx")
.addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed"));
cities = new ArrayList<>();
multiValuedExpectedDocCountsForGeoHash = new ObjectIntOpenHashMap<>(numDocs * 2);
for (int i = 0; i < numDocs; i++) {
final int numPoints = random.nextInt(4);
List<String> points = new ArrayList<>();
// TODO (#8512): this should be a Set, not a List. Currently if a document has two positions that have
// the same geo hash, it will increase the doc_count for this geo hash by 2 instead of 1
List<String> geoHashes = new ArrayList<>();
for (int j = 0; j < numPoints; ++j) {
double lat = (180d * random.nextDouble()) - 90d;
double lng = (360d * random.nextDouble()) - 180d;
points.add(lat + "," + lng);
// Update expected doc counts for all resolutions..
for (int precision = highestPrecisionGeohash; precision > 0; precision--) {
final String geoHash = GeoHashUtils.encode(lat, lng, precision);
geoHashes.add(geoHash);
}
}
cities.add(indexCity("multi_valued_idx", Integer.toString(i), points));
for (String hash : geoHashes) {
multiValuedExpectedDocCountsForGeoHash.put(hash, multiValuedExpectedDocCountsForGeoHash.getOrDefault(hash, 0) + 1);
}
}
indexRandom(true, cities);
ensureSearchable();
}
@ -119,6 +155,31 @@ public class GeoHashGridTests extends ElasticsearchIntegrationTest {
}
}
@Test
public void multivalued() throws Exception {
for (int precision = 1; precision <= highestPrecisionGeohash; precision++) {
SearchResponse response = client().prepareSearch("multi_valued_idx")
.addAggregation(geohashGrid("geohashgrid")
.field("location")
.precision(precision)
)
.execute().actionGet();
assertSearchResponse(response);
GeoHashGrid geoGrid = response.getAggregations().get("geohashgrid");
for (GeoHashGrid.Bucket cell : geoGrid.getBuckets()) {
String geohash = cell.getKey();
long bucketCount = cell.getDocCount();
int expectedBucketCount = multiValuedExpectedDocCountsForGeoHash.get(geohash);
assertNotSame(bucketCount, 0);
assertEquals("Geohash " + geohash + " has wrong doc count ",
expectedBucketCount, bucketCount);
}
}
}
@Test
public void filtered() throws Exception {
GeoBoundingBoxFilterBuilder bbox = new GeoBoundingBoxFilterBuilder("location");