remove centroid calculation from GeoHashGridAggregation

This commit is contained in:
Nicholas Knize 2015-10-09 09:32:35 -05:00
parent b31d3ddd3e
commit ceefe2e91a
6 changed files with 12 additions and 78 deletions

View File

@ -40,7 +40,7 @@ import java.util.Map;
*/
public abstract class BucketsAggregator extends AggregatorBase {
protected final BigArrays bigArrays;
private final BigArrays bigArrays;
private IntArray docCounts;
public BucketsAggregator(String name, AggregatorFactories factories, AggregationContext context, Aggregator parent,
@ -67,7 +67,7 @@ public abstract class BucketsAggregator extends AggregatorBase {
/**
* Utility method to collect the given doc in the given bucket (identified by the bucket ordinal)
*/
public void collectBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
public final void collectBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
grow(bucketOrd + 1);
collectExistingBucket(subCollector, doc, bucketOrd);
}

View File

@ -18,7 +18,6 @@
*/
package org.elasticsearch.search.aggregations.bucket.geogrid;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import java.util.List;
@ -33,7 +32,6 @@ public interface GeoHashGrid extends MultiBucketsAggregation {
* A bucket that is associated with a {@code geohash_grid} cell. The key of the bucket is the {@cod geohash} of the cell
*/
public static interface Bucket extends MultiBucketsAggregation.Bucket {
public GeoPoint getCentroid();
}
/**

View File

@ -51,7 +51,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
private final int shardSize;
private final GeoHashGridParser.GeoGridFactory.CellIdSource valuesSource;
private final LongHash bucketOrds;
private LongArray bucketCentroids;
public GeoHashGridAggregator(String name, AggregatorFactories factories, GeoHashGridParser.GeoGridFactory.CellIdSource valuesSource,
int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
@ -61,7 +60,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
this.requiredSize = requiredSize;
this.shardSize = shardSize;
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
bucketCentroids = aggregationContext.bigArrays().newLongArray(1, true);
}
@Override
@ -69,28 +67,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
return (valuesSource != null && valuesSource.needsScores()) || super.needsScores();
}
@Override
public void collectBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
bucketCentroids = bigArrays.grow(bucketCentroids, bucketOrd + 1);
super.collectBucket(subCollector, doc, bucketOrd);
}
protected final void adjustCentroid(long bucketOrd, long geohash) {
final int numDocs = getDocCounts().get(bucketOrd);
final GeoPoint oldCentroid = new GeoPoint();
final GeoPoint nextLoc = new GeoPoint();
if (numDocs > 1) {
final long curCentroid = bucketCentroids.get(bucketOrd);
oldCentroid.resetFromGeoHash(curCentroid);
nextLoc.resetFromGeoHash(geohash);
bucketCentroids.set(bucketOrd, XGeoHashUtils.longEncode(oldCentroid.lon() + (nextLoc.lon() - oldCentroid.lon()) / numDocs,
oldCentroid.lat() + (nextLoc.lat() - oldCentroid.lat()) / numDocs, XGeoHashUtils.PRECISION));
} else {
bucketCentroids.set(bucketOrd, geohash);
}
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
@ -104,8 +80,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
long previous = Long.MAX_VALUE;
for (int i = 0; i < valuesCount; ++i) {
final long valFullRes = values.valueAt(i);
final long val = XGeoHashUtils.longEncode(valFullRes, valuesSource.precision());
final long val = values.valueAt(i);
if (previous != val || i == 0) {
long bucketOrdinal = bucketOrds.add(val);
if (bucketOrdinal < 0) { // already seen
@ -114,7 +89,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
} else {
collectBucket(sub, doc, bucketOrdinal);
}
adjustCentroid(bucketOrdinal, valFullRes);
previous = val;
}
}
@ -128,7 +102,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
long bucketOrd;
public OrdinalBucket() {
super(0, 0, new GeoPoint(), (InternalAggregations) null);
super(0, 0, (InternalAggregations) null);
}
}
@ -146,7 +120,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
}
spare.geohashAsLong = bucketOrds.get(i);
spare.centroid.resetFromGeoHash(bucketCentroids.get(i));
spare.docCount = bucketDocCount(i);
spare.bucketOrd = i;
spare = (OrdinalBucket) ordered.insertWithOverflow(spare);
@ -170,7 +143,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
@Override
public void doClose() {
Releasables.close(bucketOrds);
Releasables.close(bucketCentroids);
}
}

View File

@ -150,9 +150,11 @@ public class GeoHashGridParser implements Aggregator.Parser {
private static class CellValues extends SortingNumericDocValues {
private MultiGeoPointValues geoValues;
private int precision;
protected CellValues(MultiGeoPointValues geoValues) {
protected CellValues(MultiGeoPointValues geoValues, int precision) {
this.geoValues = geoValues;
this.precision = precision;
}
@Override
@ -161,7 +163,7 @@ public class GeoHashGridParser implements Aggregator.Parser {
resize(geoValues.count());
for (int i = 0; i < count(); ++i) {
GeoPoint target = geoValues.valueAt(i);
values[i] = XGeoHashUtils.longEncode(target.getLon(), target.getLat(), XGeoHashUtils.PRECISION);
values[i] = XGeoHashUtils.longEncode(target.getLon(), target.getLat(), precision);
}
sort();
}
@ -188,7 +190,7 @@ public class GeoHashGridParser implements Aggregator.Parser {
@Override
public SortedNumericDocValues longValues(LeafReaderContext ctx) {
return new CellValues(valuesSource.geoPointValues(ctx));
return new CellValues(valuesSource.geoPointValues(ctx), precision);
}
@Override
@ -203,5 +205,4 @@ public class GeoHashGridParser implements Aggregator.Parser {
}
}
}

View File

@ -87,18 +87,16 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
protected long geohashAsLong;
protected long docCount;
protected GeoPoint centroid;
protected InternalAggregations aggregations;
public Bucket() {
// For Serialization only
}
public Bucket(long geohashAsLong, long docCount, GeoPoint centroid, InternalAggregations aggregations) {
public Bucket(long geohashAsLong, long docCount, InternalAggregations aggregations) {
this.docCount = docCount;
this.aggregations = aggregations;
this.geohashAsLong = geohashAsLong;
this.centroid = centroid;
}
@Override
@ -116,11 +114,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
return docCount;
}
@Override
public GeoPoint getCentroid() {
return centroid;
}
@Override
public Aggregations getAggregations() {
return aggregations;
@ -140,23 +133,18 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
public Bucket reduce(List<? extends Bucket> buckets, ReduceContext context) {
List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
long docCount = 0;
double cLon = 0;
double cLat = 0;
for (Bucket bucket : buckets) {
docCount += bucket.docCount;
cLon += (bucket.docCount * bucket.centroid.lon());
cLat += (bucket.docCount * bucket.centroid.lat());
aggregationsList.add(bucket.aggregations);
}
final InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context);
return new Bucket(geohashAsLong, docCount, new GeoPoint(cLat/docCount, cLon/docCount), aggs);
return new Bucket(geohashAsLong, docCount, aggs);
}
@Override
public void readFrom(StreamInput in) throws IOException {
geohashAsLong = in.readLong();
docCount = in.readVLong();
centroid = GeoPoint.fromGeohash(in.readLong());
aggregations = InternalAggregations.readAggregations(in);
}
@ -164,7 +152,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
public void writeTo(StreamOutput out) throws IOException {
out.writeLong(geohashAsLong);
out.writeVLong(docCount);
out.writeLong(XGeoHashUtils.longEncode(centroid.lon(), centroid.lat(), XGeoHashUtils.PRECISION));
aggregations.writeTo(out);
}
@ -173,7 +160,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
builder.startObject();
builder.field(CommonFields.KEY, getKeyAsString());
builder.field(CommonFields.DOC_COUNT, docCount);
builder.array(GeoFields.CENTROID, centroid.getLon(), centroid.getLat());
aggregations.toXContentInternal(builder, params);
builder.endObject();
return builder;
@ -205,7 +191,7 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
@Override
public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) {
return new Bucket(prototype.geohashAsLong, prototype.docCount, prototype.centroid, aggregations);
return new Bucket(prototype.geohashAsLong, prototype.docCount, aggregations);
}
@Override
@ -298,8 +284,4 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
return i > 0;
}
}
public static final class GeoFields {
public static final XContentBuilderString CENTROID = new XContentBuilderString("centroid");
}
}

View File

@ -57,7 +57,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
static ObjectIntMap<String> multiValuedExpectedDocCountsForGeoHash = null;
static ObjectObjectMap<String, GeoPoint> expectedCentroidsForGeoHash = null;
static int numDocs = 100;
static String smallestGeoHash = null;
@ -75,15 +74,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
return indexCity(index, name, Arrays.<String>asList(latLon));
}
private GeoPoint updateCentroid(GeoPoint centroid, double lat, double lon, final int docCount) {
if (centroid == null) {
return new GeoPoint(lat, lon);
}
final double newLon = centroid.lon() + (lon - centroid.lon()) / docCount;
final double newLat = centroid.lat() + (lat - centroid.lat()) / docCount;
return centroid.reset(newLat, newLon);
}
@Override
public void setupSuiteScopeCluster() throws Exception {
createIndex("idx_unmapped");
@ -94,7 +84,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
List<IndexRequestBuilder> cities = new ArrayList<>();
Random random = getRandom();
expectedDocCountsForGeoHash = new ObjectIntHashMap<>(numDocs * 2);
expectedCentroidsForGeoHash = new ObjectObjectHashMap<>(numDocs *2);
for (int i = 0; i < numDocs; i++) {
//generate random point
double lat = (180d * random.nextDouble()) - 90d;
@ -103,8 +92,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
//Index at the highest resolution
cities.add(indexCity("idx", randomGeoHash, lat + ", " + lng));
expectedDocCountsForGeoHash.put(randomGeoHash, expectedDocCountsForGeoHash.getOrDefault(randomGeoHash, 0) + 1);
expectedCentroidsForGeoHash.put(randomGeoHash, updateCentroid(expectedCentroidsForGeoHash.getOrDefault(randomGeoHash,
null), lat, lng, expectedDocCountsForGeoHash.get(randomGeoHash)));
//Update expected doc counts for all resolutions..
for (int precision = XGeoHashUtils.PRECISION - 1; precision > 0; precision--) {
String hash = XGeoHashUtils.stringEncode(lng, lat, precision);
@ -112,8 +99,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
smallestGeoHash = hash;
}
expectedDocCountsForGeoHash.put(hash, expectedDocCountsForGeoHash.getOrDefault(hash, 0) + 1);
expectedCentroidsForGeoHash.put(hash, updateCentroid(expectedCentroidsForGeoHash.getOrDefault(hash,
null), lat, lng, expectedDocCountsForGeoHash.get(hash)));
}
}
indexRandom(true, cities);
@ -170,13 +155,9 @@ public class GeoHashGridIT extends ESIntegTestCase {
long bucketCount = cell.getDocCount();
int expectedBucketCount = expectedDocCountsForGeoHash.get(geohash);
GeoPoint centroid = cell.getCentroid();
GeoPoint expectedCentroid = expectedCentroidsForGeoHash.get(geohash);
assertNotSame(bucketCount, 0);
assertEquals("Geohash " + geohash + " has wrong doc count ",
expectedBucketCount, bucketCount);
assertEquals("Geohash " + geohash + " has wrong centroid ",
expectedCentroid, centroid);
GeoPoint geoPoint = (GeoPoint) propertiesKeys[i];
assertThat(XGeoHashUtils.stringEncode(geoPoint.lon(), geoPoint.lat(), precision), equalTo(geohash));
assertThat((long) propertiesDocCounts[i], equalTo(bucketCount));