Aggregations: reduce histogram buckets on the fly using a priority queue.
This commit makes histogram reduction a bit cleaner by expecting buckets returned from shards to be sorted by key and merging them on-the-fly on the coordinating node using a priority queue. Close #8797
This commit is contained in:
parent
5059d6fe1c
commit
5694626f79
|
@ -118,7 +118,8 @@ public class HistogramAggregator extends BucketsAggregator {
|
||||||
buckets.add(histogramFactory.createBucket(rounding.valueForKey(bucketOrds.get(i)), bucketDocCount(i), bucketAggregations(i), keyed, formatter));
|
buckets.add(histogramFactory.createBucket(rounding.valueForKey(bucketOrds.get(i)), bucketDocCount(i), bucketAggregations(i), keyed, formatter));
|
||||||
}
|
}
|
||||||
|
|
||||||
CollectionUtil.introSort(buckets, order.comparator());
|
// the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order
|
||||||
|
CollectionUtil.introSort(buckets, InternalOrder.KEY_ASC.comparator());
|
||||||
|
|
||||||
// value source will be null for unmapped fields
|
// value source will be null for unmapped fields
|
||||||
InternalHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new InternalHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds) : null;
|
InternalHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new InternalHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds) : null;
|
||||||
|
|
|
@ -20,7 +20,9 @@ package org.elasticsearch.search.aggregations.bucket.histogram;
|
||||||
|
|
||||||
import com.carrotsearch.hppc.LongObjectOpenHashMap;
|
import com.carrotsearch.hppc.LongObjectOpenHashMap;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
import org.apache.lucene.util.CollectionUtil;
|
||||||
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
@ -38,6 +40,7 @@ import org.elasticsearch.search.aggregations.support.format.ValueFormatterStream
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ListIterator;
|
import java.util.ListIterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -289,37 +292,71 @@ public class InternalHistogram<B extends InternalHistogram.Bucket> extends Inter
|
||||||
return FACTORY;
|
return FACTORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private static class IteratorAndCurrent<B> {
|
||||||
public InternalAggregation reduce(ReduceContext reduceContext) {
|
|
||||||
|
private final Iterator<B> iterator;
|
||||||
|
private B current;
|
||||||
|
|
||||||
|
IteratorAndCurrent(Iterator<B> iterator) {
|
||||||
|
this.iterator = iterator;
|
||||||
|
current = iterator.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<B> reduceBuckets(ReduceContext reduceContext) {
|
||||||
List<InternalAggregation> aggregations = reduceContext.aggregations();
|
List<InternalAggregation> aggregations = reduceContext.aggregations();
|
||||||
|
|
||||||
LongObjectPagedHashMap<List<B>> bucketsByKey = new LongObjectPagedHashMap<>(reduceContext.bigArrays());
|
final PriorityQueue<IteratorAndCurrent<B>> pq = new PriorityQueue<IteratorAndCurrent<B>>(aggregations.size()) {
|
||||||
|
@Override
|
||||||
|
protected boolean lessThan(IteratorAndCurrent<B> a, IteratorAndCurrent<B> b) {
|
||||||
|
return a.current.key < b.current.key;
|
||||||
|
}
|
||||||
|
};
|
||||||
for (InternalAggregation aggregation : aggregations) {
|
for (InternalAggregation aggregation : aggregations) {
|
||||||
InternalHistogram<B> histogram = (InternalHistogram) aggregation;
|
InternalHistogram<B> histogram = (InternalHistogram) aggregation;
|
||||||
for (B bucket : histogram.buckets) {
|
if (histogram.buckets.isEmpty() == false) {
|
||||||
List<B> bucketList = bucketsByKey.get(bucket.key);
|
pq.add(new IteratorAndCurrent<>(histogram.buckets.iterator()));
|
||||||
if (bucketList == null) {
|
|
||||||
bucketList = new ArrayList<>(aggregations.size());
|
|
||||||
bucketsByKey.put(bucket.key, bucketList);
|
|
||||||
}
|
|
||||||
bucketList.add(bucket);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<B> reducedBuckets = new ArrayList<>((int) bucketsByKey.size());
|
List<B> reducedBuckets = new ArrayList<>();
|
||||||
for (LongObjectPagedHashMap.Cursor<List<B>> cursor : bucketsByKey) {
|
if (pq.size() > 0) {
|
||||||
List<B> sameTermBuckets = cursor.value;
|
// list of buckets coming from different shards that have the same key
|
||||||
B bucket = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
|
List<B> currentBuckets = new ArrayList<>();
|
||||||
if (bucket.getDocCount() >= minDocCount) {
|
long key = pq.top().current.key;
|
||||||
reducedBuckets.add(bucket);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bucketsByKey.close();
|
|
||||||
|
|
||||||
// adding empty buckets in needed
|
do {
|
||||||
if (minDocCount == 0) {
|
final IteratorAndCurrent<B> top = pq.top();
|
||||||
CollectionUtil.introSort(reducedBuckets, order.asc ? InternalOrder.KEY_ASC.comparator() : InternalOrder.KEY_DESC.comparator());
|
|
||||||
List<B> list = order.asc ? reducedBuckets : Lists.reverse(reducedBuckets);
|
if (top.current.key != key) {
|
||||||
|
// the key changes, reduce what we already buffered and reset the buffer for current buckets
|
||||||
|
reducedBuckets.add(currentBuckets.get(0).reduce(currentBuckets, reduceContext));
|
||||||
|
currentBuckets.clear();
|
||||||
|
key = top.current.key;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentBuckets.add(top.current);
|
||||||
|
|
||||||
|
if (top.iterator.hasNext()) {
|
||||||
|
final B next = top.iterator.next();
|
||||||
|
assert next.key > top.current.key : "shards must return data sorted by key";
|
||||||
|
top.current = next;
|
||||||
|
pq.updateTop();
|
||||||
|
} else {
|
||||||
|
pq.pop();
|
||||||
|
}
|
||||||
|
} while (pq.size() > 0);
|
||||||
|
|
||||||
|
if (currentBuckets.isEmpty() == false) {
|
||||||
|
reducedBuckets.add(currentBuckets.get(0).reduce(currentBuckets, reduceContext));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reducedBuckets;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addEmptyBuckets(List<B> list) {
|
||||||
B lastBucket = null;
|
B lastBucket = null;
|
||||||
ExtendedBounds bounds = emptyBucketInfo.bounds;
|
ExtendedBounds bounds = emptyBucketInfo.bounds;
|
||||||
ListIterator<B> iter = list.listIterator();
|
ListIterator<B> iter = list.listIterator();
|
||||||
|
@ -373,12 +410,26 @@ public class InternalHistogram<B extends InternalHistogram.Bucket> extends Inter
|
||||||
key = emptyBucketInfo.rounding.nextRoundingValue(key);
|
key = emptyBucketInfo.rounding.nextRoundingValue(key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (order != InternalOrder.KEY_ASC && order != InternalOrder.KEY_DESC) {
|
|
||||||
CollectionUtil.introSort(reducedBuckets, order.comparator());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InternalAggregation reduce(ReduceContext reduceContext) {
|
||||||
|
List<B> reducedBuckets = reduceBuckets(reduceContext);
|
||||||
|
|
||||||
|
// adding empty buckets if needed
|
||||||
|
if (minDocCount == 0) {
|
||||||
|
addEmptyBuckets(reducedBuckets);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (order == InternalOrder.KEY_ASC) {
|
||||||
|
// nothing to do, data are already sorted since shards return
|
||||||
|
// sorted buckets and the merge-sort performed by reduceBuckets
|
||||||
|
// maintains order
|
||||||
|
} else if (order == InternalOrder.KEY_DESC) {
|
||||||
|
// we just need to reverse here...
|
||||||
|
reducedBuckets = Lists.reverse(reducedBuckets);
|
||||||
} else {
|
} else {
|
||||||
|
// sorted by sub-aggregation, need to fall back to a costly n*log(n) sort
|
||||||
CollectionUtil.introSort(reducedBuckets, order.comparator());
|
CollectionUtil.introSort(reducedBuckets, order.comparator());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue