mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-26 14:54:56 +00:00
remove centroid calculation from GeoHashGridAggregation
This commit is contained in:
parent
b31d3ddd3e
commit
ceefe2e91a
@ -40,7 +40,7 @@ import java.util.Map;
|
||||
*/
|
||||
public abstract class BucketsAggregator extends AggregatorBase {
|
||||
|
||||
protected final BigArrays bigArrays;
|
||||
private final BigArrays bigArrays;
|
||||
private IntArray docCounts;
|
||||
|
||||
public BucketsAggregator(String name, AggregatorFactories factories, AggregationContext context, Aggregator parent,
|
||||
@ -67,7 +67,7 @@ public abstract class BucketsAggregator extends AggregatorBase {
|
||||
/**
|
||||
* Utility method to collect the given doc in the given bucket (identified by the bucket ordinal)
|
||||
*/
|
||||
public void collectBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
|
||||
public final void collectBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
|
||||
grow(bucketOrd + 1);
|
||||
collectExistingBucket(subCollector, doc, bucketOrd);
|
||||
}
|
||||
|
@ -18,7 +18,6 @@
|
||||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.geogrid;
|
||||
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
|
||||
|
||||
import java.util.List;
|
||||
@ -33,7 +32,6 @@ public interface GeoHashGrid extends MultiBucketsAggregation {
|
||||
* A bucket that is associated with a {@code geohash_grid} cell. The key of the bucket is the {@cod geohash} of the cell
|
||||
*/
|
||||
public static interface Bucket extends MultiBucketsAggregation.Bucket {
|
||||
public GeoPoint getCentroid();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -51,7 +51,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
private final int shardSize;
|
||||
private final GeoHashGridParser.GeoGridFactory.CellIdSource valuesSource;
|
||||
private final LongHash bucketOrds;
|
||||
private LongArray bucketCentroids;
|
||||
|
||||
public GeoHashGridAggregator(String name, AggregatorFactories factories, GeoHashGridParser.GeoGridFactory.CellIdSource valuesSource,
|
||||
int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
|
||||
@ -61,7 +60,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
|
||||
bucketCentroids = aggregationContext.bigArrays().newLongArray(1, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -69,28 +67,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
return (valuesSource != null && valuesSource.needsScores()) || super.needsScores();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collectBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
|
||||
bucketCentroids = bigArrays.grow(bucketCentroids, bucketOrd + 1);
|
||||
super.collectBucket(subCollector, doc, bucketOrd);
|
||||
}
|
||||
|
||||
protected final void adjustCentroid(long bucketOrd, long geohash) {
|
||||
final int numDocs = getDocCounts().get(bucketOrd);
|
||||
final GeoPoint oldCentroid = new GeoPoint();
|
||||
final GeoPoint nextLoc = new GeoPoint();
|
||||
|
||||
if (numDocs > 1) {
|
||||
final long curCentroid = bucketCentroids.get(bucketOrd);
|
||||
oldCentroid.resetFromGeoHash(curCentroid);
|
||||
nextLoc.resetFromGeoHash(geohash);
|
||||
bucketCentroids.set(bucketOrd, XGeoHashUtils.longEncode(oldCentroid.lon() + (nextLoc.lon() - oldCentroid.lon()) / numDocs,
|
||||
oldCentroid.lat() + (nextLoc.lat() - oldCentroid.lat()) / numDocs, XGeoHashUtils.PRECISION));
|
||||
} else {
|
||||
bucketCentroids.set(bucketOrd, geohash);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
@ -104,8 +80,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
|
||||
long previous = Long.MAX_VALUE;
|
||||
for (int i = 0; i < valuesCount; ++i) {
|
||||
final long valFullRes = values.valueAt(i);
|
||||
final long val = XGeoHashUtils.longEncode(valFullRes, valuesSource.precision());
|
||||
final long val = values.valueAt(i);
|
||||
if (previous != val || i == 0) {
|
||||
long bucketOrdinal = bucketOrds.add(val);
|
||||
if (bucketOrdinal < 0) { // already seen
|
||||
@ -114,7 +89,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
} else {
|
||||
collectBucket(sub, doc, bucketOrdinal);
|
||||
}
|
||||
adjustCentroid(bucketOrdinal, valFullRes);
|
||||
previous = val;
|
||||
}
|
||||
}
|
||||
@ -128,7 +102,7 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
long bucketOrd;
|
||||
|
||||
public OrdinalBucket() {
|
||||
super(0, 0, new GeoPoint(), (InternalAggregations) null);
|
||||
super(0, 0, (InternalAggregations) null);
|
||||
}
|
||||
|
||||
}
|
||||
@ -146,7 +120,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
}
|
||||
|
||||
spare.geohashAsLong = bucketOrds.get(i);
|
||||
spare.centroid.resetFromGeoHash(bucketCentroids.get(i));
|
||||
spare.docCount = bucketDocCount(i);
|
||||
spare.bucketOrd = i;
|
||||
spare = (OrdinalBucket) ordered.insertWithOverflow(spare);
|
||||
@ -170,7 +143,6 @@ public class GeoHashGridAggregator extends BucketsAggregator {
|
||||
@Override
|
||||
public void doClose() {
|
||||
Releasables.close(bucketOrds);
|
||||
Releasables.close(bucketCentroids);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -150,9 +150,11 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
||||
|
||||
private static class CellValues extends SortingNumericDocValues {
|
||||
private MultiGeoPointValues geoValues;
|
||||
private int precision;
|
||||
|
||||
protected CellValues(MultiGeoPointValues geoValues) {
|
||||
protected CellValues(MultiGeoPointValues geoValues, int precision) {
|
||||
this.geoValues = geoValues;
|
||||
this.precision = precision;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -161,7 +163,7 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
||||
resize(geoValues.count());
|
||||
for (int i = 0; i < count(); ++i) {
|
||||
GeoPoint target = geoValues.valueAt(i);
|
||||
values[i] = XGeoHashUtils.longEncode(target.getLon(), target.getLat(), XGeoHashUtils.PRECISION);
|
||||
values[i] = XGeoHashUtils.longEncode(target.getLon(), target.getLat(), precision);
|
||||
}
|
||||
sort();
|
||||
}
|
||||
@ -188,7 +190,7 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues longValues(LeafReaderContext ctx) {
|
||||
return new CellValues(valuesSource.geoPointValues(ctx));
|
||||
return new CellValues(valuesSource.geoPointValues(ctx), precision);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -203,5 +205,4 @@ public class GeoHashGridParser implements Aggregator.Parser {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -87,18 +87,16 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
||||
|
||||
protected long geohashAsLong;
|
||||
protected long docCount;
|
||||
protected GeoPoint centroid;
|
||||
protected InternalAggregations aggregations;
|
||||
|
||||
public Bucket() {
|
||||
// For Serialization only
|
||||
}
|
||||
|
||||
public Bucket(long geohashAsLong, long docCount, GeoPoint centroid, InternalAggregations aggregations) {
|
||||
public Bucket(long geohashAsLong, long docCount, InternalAggregations aggregations) {
|
||||
this.docCount = docCount;
|
||||
this.aggregations = aggregations;
|
||||
this.geohashAsLong = geohashAsLong;
|
||||
this.centroid = centroid;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -116,11 +114,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
||||
return docCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GeoPoint getCentroid() {
|
||||
return centroid;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Aggregations getAggregations() {
|
||||
return aggregations;
|
||||
@ -140,23 +133,18 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
||||
public Bucket reduce(List<? extends Bucket> buckets, ReduceContext context) {
|
||||
List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
|
||||
long docCount = 0;
|
||||
double cLon = 0;
|
||||
double cLat = 0;
|
||||
for (Bucket bucket : buckets) {
|
||||
docCount += bucket.docCount;
|
||||
cLon += (bucket.docCount * bucket.centroid.lon());
|
||||
cLat += (bucket.docCount * bucket.centroid.lat());
|
||||
aggregationsList.add(bucket.aggregations);
|
||||
}
|
||||
final InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context);
|
||||
return new Bucket(geohashAsLong, docCount, new GeoPoint(cLat/docCount, cLon/docCount), aggs);
|
||||
return new Bucket(geohashAsLong, docCount, aggs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
geohashAsLong = in.readLong();
|
||||
docCount = in.readVLong();
|
||||
centroid = GeoPoint.fromGeohash(in.readLong());
|
||||
aggregations = InternalAggregations.readAggregations(in);
|
||||
}
|
||||
|
||||
@ -164,7 +152,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeLong(geohashAsLong);
|
||||
out.writeVLong(docCount);
|
||||
out.writeLong(XGeoHashUtils.longEncode(centroid.lon(), centroid.lat(), XGeoHashUtils.PRECISION));
|
||||
aggregations.writeTo(out);
|
||||
}
|
||||
|
||||
@ -173,7 +160,6 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
||||
builder.startObject();
|
||||
builder.field(CommonFields.KEY, getKeyAsString());
|
||||
builder.field(CommonFields.DOC_COUNT, docCount);
|
||||
builder.array(GeoFields.CENTROID, centroid.getLon(), centroid.getLat());
|
||||
aggregations.toXContentInternal(builder, params);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
@ -205,7 +191,7 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
||||
|
||||
@Override
|
||||
public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) {
|
||||
return new Bucket(prototype.geohashAsLong, prototype.docCount, prototype.centroid, aggregations);
|
||||
return new Bucket(prototype.geohashAsLong, prototype.docCount, aggregations);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -298,8 +284,4 @@ public class InternalGeoHashGrid extends InternalMultiBucketAggregation<Internal
|
||||
return i > 0;
|
||||
}
|
||||
}
|
||||
|
||||
public static final class GeoFields {
|
||||
public static final XContentBuilderString CENTROID = new XContentBuilderString("centroid");
|
||||
}
|
||||
}
|
||||
|
@ -57,7 +57,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
|
||||
|
||||
static ObjectIntMap<String> expectedDocCountsForGeoHash = null;
|
||||
static ObjectIntMap<String> multiValuedExpectedDocCountsForGeoHash = null;
|
||||
static ObjectObjectMap<String, GeoPoint> expectedCentroidsForGeoHash = null;
|
||||
static int numDocs = 100;
|
||||
|
||||
static String smallestGeoHash = null;
|
||||
@ -75,15 +74,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
|
||||
return indexCity(index, name, Arrays.<String>asList(latLon));
|
||||
}
|
||||
|
||||
private GeoPoint updateCentroid(GeoPoint centroid, double lat, double lon, final int docCount) {
|
||||
if (centroid == null) {
|
||||
return new GeoPoint(lat, lon);
|
||||
}
|
||||
final double newLon = centroid.lon() + (lon - centroid.lon()) / docCount;
|
||||
final double newLat = centroid.lat() + (lat - centroid.lat()) / docCount;
|
||||
return centroid.reset(newLat, newLon);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setupSuiteScopeCluster() throws Exception {
|
||||
createIndex("idx_unmapped");
|
||||
@ -94,7 +84,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
|
||||
List<IndexRequestBuilder> cities = new ArrayList<>();
|
||||
Random random = getRandom();
|
||||
expectedDocCountsForGeoHash = new ObjectIntHashMap<>(numDocs * 2);
|
||||
expectedCentroidsForGeoHash = new ObjectObjectHashMap<>(numDocs *2);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
//generate random point
|
||||
double lat = (180d * random.nextDouble()) - 90d;
|
||||
@ -103,8 +92,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
|
||||
//Index at the highest resolution
|
||||
cities.add(indexCity("idx", randomGeoHash, lat + ", " + lng));
|
||||
expectedDocCountsForGeoHash.put(randomGeoHash, expectedDocCountsForGeoHash.getOrDefault(randomGeoHash, 0) + 1);
|
||||
expectedCentroidsForGeoHash.put(randomGeoHash, updateCentroid(expectedCentroidsForGeoHash.getOrDefault(randomGeoHash,
|
||||
null), lat, lng, expectedDocCountsForGeoHash.get(randomGeoHash)));
|
||||
//Update expected doc counts for all resolutions..
|
||||
for (int precision = XGeoHashUtils.PRECISION - 1; precision > 0; precision--) {
|
||||
String hash = XGeoHashUtils.stringEncode(lng, lat, precision);
|
||||
@ -112,8 +99,6 @@ public class GeoHashGridIT extends ESIntegTestCase {
|
||||
smallestGeoHash = hash;
|
||||
}
|
||||
expectedDocCountsForGeoHash.put(hash, expectedDocCountsForGeoHash.getOrDefault(hash, 0) + 1);
|
||||
expectedCentroidsForGeoHash.put(hash, updateCentroid(expectedCentroidsForGeoHash.getOrDefault(hash,
|
||||
null), lat, lng, expectedDocCountsForGeoHash.get(hash)));
|
||||
}
|
||||
}
|
||||
indexRandom(true, cities);
|
||||
@ -170,13 +155,9 @@ public class GeoHashGridIT extends ESIntegTestCase {
|
||||
|
||||
long bucketCount = cell.getDocCount();
|
||||
int expectedBucketCount = expectedDocCountsForGeoHash.get(geohash);
|
||||
GeoPoint centroid = cell.getCentroid();
|
||||
GeoPoint expectedCentroid = expectedCentroidsForGeoHash.get(geohash);
|
||||
assertNotSame(bucketCount, 0);
|
||||
assertEquals("Geohash " + geohash + " has wrong doc count ",
|
||||
expectedBucketCount, bucketCount);
|
||||
assertEquals("Geohash " + geohash + " has wrong centroid ",
|
||||
expectedCentroid, centroid);
|
||||
GeoPoint geoPoint = (GeoPoint) propertiesKeys[i];
|
||||
assertThat(XGeoHashUtils.stringEncode(geoPoint.lon(), geoPoint.lat(), precision), equalTo(geohash));
|
||||
assertThat((long) propertiesDocCounts[i], equalTo(bucketCount));
|
||||
|
Loading…
x
Reference in New Issue
Block a user