Add new option `min_doc_count` to terms and histogram aggregations.
`min_doc_count` is the minimum number of hits that a term or histogram key should match in order to appear in the response. `min_doc_count=0` replaces `compute_empty_buckets` for histograms and will behave exactly like facets' `all_terms=true` for terms aggregations. Close #4662
This commit is contained in:
parent
943b62634c
commit
5c237fe834
|
@ -159,6 +159,63 @@ If the histogram aggregation has a direct metrics sub-aggregation, the latter ca
|
|||
|
||||
<2> There is no need to configure the `price` field for the `price_stats` aggregation as it will inherit it by default from its parent histogram aggregation.
|
||||
|
||||
==== Minimum document count
|
||||
|
||||
It is possible to only return buckets that have a document count that is greater than or equal to a configured limit through the `min_doc_count` option.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"prices" : {
|
||||
"histogram" : {
|
||||
"field" : "price",
|
||||
"interval" : 50,
|
||||
"min_doc_count": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The above aggregation would only return buckets that contain 10 documents or more. Default value is `1`.
|
||||
|
||||
NOTE: The special value `0` can be used to add empty buckets to the response between the minimum and the maximum buckets. Here is an example of what the response could look like:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggregations": {
|
||||
"prices": {
|
||||
"0": {
|
||||
"key": 0,
|
||||
"doc_count": 2
|
||||
},
|
||||
"50": {
|
||||
"key": 50,
|
||||
"doc_count": 0
|
||||
},
|
||||
"150": {
|
||||
"key": 150,
|
||||
"doc_count": 3
|
||||
},
|
||||
"200": {
|
||||
"key": 150,
|
||||
"doc_count": 0
|
||||
},
|
||||
"250": {
|
||||
"key": 150,
|
||||
"doc_count": 0
|
||||
},
|
||||
"300": {
|
||||
"key": 150,
|
||||
"doc_count": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
==== Response Format
|
||||
|
||||
By default, the buckets are retuned as an ordered array. It is also possilbe to request the response as a hash instead keyed by the buckets keys:
|
||||
|
|
|
@ -142,6 +142,34 @@ Ordering the buckets by multi value metrics sub-aggregation (identified by the a
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
==== Minimum document count
|
||||
|
||||
It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"tags" : {
|
||||
"terms" : {
|
||||
"field" : "tag",
|
||||
"min_doc_count": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
The above aggregation would only return tags which have been found in 10 hits or more. Default value is `1`.
|
||||
|
||||
NOTE: Setting `min_doc_count`=`0` will also return buckets for terms that didn't match any hit. However, some of
|
||||
the returned terms which have a document count of zero might only belong to deleted documents, so there is
|
||||
no warranty that a `match_all` query would find a positive document count for those terms.
|
||||
|
||||
WARNING: When NOT sorting on `doc_count` descending, high values of `min_doc_count` may return a number of buckets
|
||||
which is less than `size` because not enough data was gathered from the shards. Missing buckets can be
|
||||
back by increasing `shard_size`.
|
||||
|
||||
==== Script
|
||||
|
||||
Generating the terms using a script:
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.collect;
|
||||
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.PeekingIterator;
|
||||
import com.google.common.collect.UnmodifiableIterator;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
|
||||
public enum Iterators2 {
|
||||
;
|
||||
|
||||
/** Remove duplicated elements from an iterator over sorted content. */
|
||||
public static <T> Iterator<T> deduplicateSorted(Iterator<? extends T> iterator, final Comparator<? super T> comparator) {
|
||||
final PeekingIterator<T> it = Iterators.peekingIterator(iterator);
|
||||
return new UnmodifiableIterator<T>() {
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return it.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
final T ret = it.next();
|
||||
while (it.hasNext() && comparator.compare(ret, it.peek()) == 0) {
|
||||
it.next();
|
||||
}
|
||||
assert !it.hasNext() || comparator.compare(ret, it.peek()) < 0 : "iterator is not sorted: " + ret + " > " + it.peek();
|
||||
return ret;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
/** Return a merged view over several iterators, optionally deduplicating equivalent entries. */
|
||||
public static <T> Iterator<T> mergeSorted(Iterable<Iterator<? extends T>> iterators, Comparator<? super T> comparator, boolean deduplicate) {
|
||||
Iterator<T> it = Iterators.mergeSorted(iterators, comparator);
|
||||
if (deduplicate) {
|
||||
it = deduplicateSorted(it, comparator);
|
||||
}
|
||||
return it;
|
||||
}
|
||||
|
||||
}
|
|
@ -117,7 +117,7 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
|
|||
|
||||
String type();
|
||||
|
||||
AbstractHistogramBase create(String name, List<B> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed);
|
||||
AbstractHistogramBase create(String name, List<B> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed);
|
||||
|
||||
Bucket createBucket(long key, long docCount, InternalAggregations aggregations);
|
||||
|
||||
|
@ -128,14 +128,17 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
|
|||
private InternalOrder order;
|
||||
private ValueFormatter formatter;
|
||||
private boolean keyed;
|
||||
private long minDocCount;
|
||||
private EmptyBucketInfo emptyBucketInfo;
|
||||
|
||||
protected AbstractHistogramBase() {} // for serialization
|
||||
|
||||
protected AbstractHistogramBase(String name, List<B> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
protected AbstractHistogramBase(String name, List<B> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
super(name);
|
||||
this.buckets = buckets;
|
||||
this.order = order;
|
||||
assert (minDocCount == 0) == (emptyBucketInfo != null);
|
||||
this.minDocCount = minDocCount;
|
||||
this.emptyBucketInfo = emptyBucketInfo;
|
||||
this.formatter = formatter;
|
||||
this.keyed = keyed;
|
||||
|
@ -169,28 +172,36 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
|
|||
List<InternalAggregation> aggregations = reduceContext.aggregations();
|
||||
if (aggregations.size() == 1) {
|
||||
|
||||
if (emptyBucketInfo == null) {
|
||||
if (minDocCount == 1) {
|
||||
return aggregations.get(0);
|
||||
}
|
||||
|
||||
// we need to fill the gaps with empty buckets
|
||||
AbstractHistogramBase histo = (AbstractHistogramBase) aggregations.get(0);
|
||||
CollectionUtil.introSort(histo.buckets, order.asc ? InternalOrder.KEY_ASC.comparator() : InternalOrder.KEY_DESC.comparator());
|
||||
List<HistogramBase.Bucket> list = order.asc ? histo.buckets : Lists.reverse(histo.buckets);
|
||||
HistogramBase.Bucket prevBucket = null;
|
||||
ListIterator<HistogramBase.Bucket> iter = list.listIterator();
|
||||
while (iter.hasNext()) {
|
||||
// look ahead on the next bucket without advancing the iter
|
||||
// so we'll be able to insert elements at the right position
|
||||
HistogramBase.Bucket nextBucket = list.get(iter.nextIndex());
|
||||
if (prevBucket != null) {
|
||||
long key = emptyBucketInfo.rounding.nextRoundingValue(prevBucket.getKey());
|
||||
while (key != nextBucket.getKey()) {
|
||||
iter.add(createBucket(key, 0, emptyBucketInfo.subAggregations));
|
||||
key = emptyBucketInfo.rounding.nextRoundingValue(key);
|
||||
if (minDocCount == 0) {
|
||||
// we need to fill the gaps with empty buckets
|
||||
while (iter.hasNext()) {
|
||||
// look ahead on the next bucket without advancing the iter
|
||||
// so we'll be able to insert elements at the right position
|
||||
HistogramBase.Bucket nextBucket = list.get(iter.nextIndex());
|
||||
if (prevBucket != null) {
|
||||
long key = emptyBucketInfo.rounding.nextRoundingValue(prevBucket.getKey());
|
||||
while (key != nextBucket.getKey()) {
|
||||
iter.add(createBucket(key, 0, emptyBucketInfo.subAggregations));
|
||||
key = emptyBucketInfo.rounding.nextRoundingValue(key);
|
||||
}
|
||||
}
|
||||
prevBucket = iter.next();
|
||||
}
|
||||
} else {
|
||||
while (iter.hasNext()) {
|
||||
if (iter.next().getDocCount() < minDocCount) {
|
||||
iter.remove();
|
||||
}
|
||||
}
|
||||
prevBucket = iter.next();
|
||||
}
|
||||
|
||||
if (order != InternalOrder.KEY_ASC && order != InternalOrder.KEY_DESC) {
|
||||
|
@ -222,7 +233,9 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
|
|||
for (int i = 0; i < allocated.length; i++) {
|
||||
if (allocated[i]) {
|
||||
Bucket bucket = ((List<Bucket>) buckets[i]).get(0).reduce(((List<Bucket>) buckets[i]), reduceContext.cacheRecycler());
|
||||
reducedBuckets.add(bucket);
|
||||
if (bucket.getDocCount() >= minDocCount) {
|
||||
reducedBuckets.add(bucket);
|
||||
}
|
||||
}
|
||||
}
|
||||
bucketsByKey.release();
|
||||
|
@ -230,7 +243,7 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
|
|||
|
||||
|
||||
// adding empty buckets in needed
|
||||
if (emptyBucketInfo != null) {
|
||||
if (minDocCount == 0) {
|
||||
CollectionUtil.introSort(reducedBuckets, order.asc ? InternalOrder.KEY_ASC.comparator() : InternalOrder.KEY_DESC.comparator());
|
||||
List<HistogramBase.Bucket> list = order.asc ? reducedBuckets : Lists.reverse(reducedBuckets);
|
||||
HistogramBase.Bucket prevBucket = null;
|
||||
|
@ -268,7 +281,8 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
|
|||
public void readFrom(StreamInput in) throws IOException {
|
||||
name = in.readString();
|
||||
order = InternalOrder.Streams.readOrder(in);
|
||||
if (in.readBoolean()) {
|
||||
minDocCount = in.readVLong();
|
||||
if (minDocCount == 0) {
|
||||
emptyBucketInfo = EmptyBucketInfo.readFrom(in);
|
||||
}
|
||||
formatter = ValueFormatterStreams.readOptional(in);
|
||||
|
@ -286,10 +300,8 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
|
|||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(name);
|
||||
InternalOrder.Streams.writeOrder(order, out);
|
||||
if (emptyBucketInfo == null) {
|
||||
out.writeBoolean(false);
|
||||
} else {
|
||||
out.writeBoolean(true);
|
||||
out.writeVLong(minDocCount);
|
||||
if (minDocCount == 0) {
|
||||
EmptyBucketInfo.writeTo(emptyBucketInfo, out);
|
||||
}
|
||||
ValueFormatterStreams.writeOptional(formatter, out);
|
||||
|
|
|
@ -86,7 +86,7 @@ public class DateHistogramParser implements Aggregator.Parser {
|
|||
String scriptLang = null;
|
||||
Map<String, Object> scriptParams = null;
|
||||
boolean keyed = false;
|
||||
boolean computeEmptyBuckets = false;
|
||||
long minDocCount = 1;
|
||||
InternalOrder order = (InternalOrder) Histogram.Order.KEY_ASC;
|
||||
String interval = null;
|
||||
boolean preZoneAdjustLargeInterval = false;
|
||||
|
@ -131,13 +131,17 @@ public class DateHistogramParser implements Aggregator.Parser {
|
|||
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
|
||||
if ("keyed".equals(currentFieldName)) {
|
||||
keyed = parser.booleanValue();
|
||||
} else if ("compute_empty_buckets".equals(currentFieldName) || "computeEmptyBuckets".equals(currentFieldName)) {
|
||||
computeEmptyBuckets = parser.booleanValue();
|
||||
} else if ("script_values_sorted".equals(currentFieldName)) {
|
||||
assumeSorted = parser.booleanValue();
|
||||
} else {
|
||||
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
|
||||
minDocCount = parser.longValue();
|
||||
} else {
|
||||
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
if ("params".equals(currentFieldName)) {
|
||||
scriptParams = parser.map();
|
||||
|
@ -199,17 +203,17 @@ public class DateHistogramParser implements Aggregator.Parser {
|
|||
if (searchScript != null) {
|
||||
ValueParser valueParser = new ValueParser.DateMath(new DateMathParser(DateFieldMapper.Defaults.DATE_TIME_FORMATTER, DateFieldMapper.Defaults.TIME_UNIT));
|
||||
config.parser(valueParser);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
|
||||
}
|
||||
|
||||
// falling back on the get field data context
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
|
||||
}
|
||||
|
||||
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
|
||||
if (mapper == null) {
|
||||
config.unmapped(true);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
|
||||
}
|
||||
|
||||
if (!(mapper instanceof DateFieldMapper)) {
|
||||
|
@ -218,7 +222,7 @@ public class DateHistogramParser implements Aggregator.Parser {
|
|||
|
||||
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
|
||||
config.fieldContext(new FieldContext(field, indexFieldData));
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
|
||||
}
|
||||
|
||||
private static InternalOrder resolveOrder(String key, boolean asc) {
|
||||
|
|
|
@ -45,7 +45,7 @@ public class HistogramAggregator extends BucketsAggregator {
|
|||
private final Rounding rounding;
|
||||
private final InternalOrder order;
|
||||
private final boolean keyed;
|
||||
private final boolean computeEmptyBuckets;
|
||||
private final long minDocCount;
|
||||
private final AbstractHistogramBase.Factory histogramFactory;
|
||||
|
||||
private final LongHash bucketOrds;
|
||||
|
@ -55,7 +55,7 @@ public class HistogramAggregator extends BucketsAggregator {
|
|||
Rounding rounding,
|
||||
InternalOrder order,
|
||||
boolean keyed,
|
||||
boolean computeEmptyBuckets,
|
||||
long minDocCount,
|
||||
@Nullable NumericValuesSource valuesSource,
|
||||
long initialCapacity,
|
||||
AbstractHistogramBase.Factory<?> histogramFactory,
|
||||
|
@ -67,7 +67,7 @@ public class HistogramAggregator extends BucketsAggregator {
|
|||
this.rounding = rounding;
|
||||
this.order = order;
|
||||
this.keyed = keyed;
|
||||
this.computeEmptyBuckets = computeEmptyBuckets;
|
||||
this.minDocCount = minDocCount;
|
||||
this.histogramFactory = histogramFactory;
|
||||
|
||||
bucketOrds = new LongHash(initialCapacity, aggregationContext.pageCacheRecycler());
|
||||
|
@ -118,15 +118,15 @@ public class HistogramAggregator extends BucketsAggregator {
|
|||
|
||||
// value source will be null for unmapped fields
|
||||
ValueFormatter formatter = valuesSource != null ? valuesSource.formatter() : null;
|
||||
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = computeEmptyBuckets ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
|
||||
return histogramFactory.create(name, buckets, order, emptyBucketInfo, formatter, keyed);
|
||||
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
|
||||
return histogramFactory.create(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
ValueFormatter formatter = valuesSource != null ? valuesSource.formatter() : null;
|
||||
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = computeEmptyBuckets ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
|
||||
return histogramFactory.create(name, (List<HistogramBase.Bucket>) Collections.EMPTY_LIST, order, emptyBucketInfo, formatter, keyed);
|
||||
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
|
||||
return histogramFactory.create(name, Collections.emptyList(), order, minDocCount, emptyBucketInfo, formatter, keyed);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -139,28 +139,28 @@ public class HistogramAggregator extends BucketsAggregator {
|
|||
private final Rounding rounding;
|
||||
private final InternalOrder order;
|
||||
private final boolean keyed;
|
||||
private final boolean computeEmptyBuckets;
|
||||
private final long minDocCount;
|
||||
private final AbstractHistogramBase.Factory<?> histogramFactory;
|
||||
|
||||
public Factory(String name, ValuesSourceConfig<NumericValuesSource> valueSourceConfig,
|
||||
Rounding rounding, InternalOrder order, boolean keyed, boolean computeEmptyBuckets, AbstractHistogramBase.Factory<?> histogramFactory) {
|
||||
Rounding rounding, InternalOrder order, boolean keyed, long minDocCount, AbstractHistogramBase.Factory<?> histogramFactory) {
|
||||
super(name, histogramFactory.type(), valueSourceConfig);
|
||||
this.rounding = rounding;
|
||||
this.order = order;
|
||||
this.keyed = keyed;
|
||||
this.computeEmptyBuckets = computeEmptyBuckets;
|
||||
this.minDocCount = minDocCount;
|
||||
this.histogramFactory = histogramFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
|
||||
return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, null, 0, histogramFactory, aggregationContext, parent);
|
||||
return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, null, 0, histogramFactory, aggregationContext, parent);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator create(NumericValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
// todo if we'll keep track of min/max values in IndexFieldData, we could use the max here to come up with a better estimation for the buckets count
|
||||
return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, valuesSource, 50, histogramFactory, aggregationContext, parent);
|
||||
return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, valuesSource, 50, histogramFactory, aggregationContext, parent);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
|
|||
|
||||
private Long interval;
|
||||
private HistogramBase.Order order;
|
||||
private Boolean computeEmptyBuckets;
|
||||
private Long minDocCount;
|
||||
|
||||
public HistogramBuilder(String name) {
|
||||
super(name, InternalHistogram.TYPE.name());
|
||||
|
@ -48,8 +48,8 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
|
|||
return this;
|
||||
}
|
||||
|
||||
public HistogramBuilder emptyBuckets(boolean computeEmptyBuckets) {
|
||||
this.computeEmptyBuckets = computeEmptyBuckets;
|
||||
public HistogramBuilder minDocCount(long minDocCount) {
|
||||
this.minDocCount = minDocCount;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -65,8 +65,8 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
|
|||
order.toXContent(builder, params);
|
||||
}
|
||||
|
||||
if (computeEmptyBuckets != null) {
|
||||
builder.field("empty_buckets", computeEmptyBuckets);
|
||||
if (minDocCount != null) {
|
||||
builder.field("min_doc_count", minDocCount);
|
||||
}
|
||||
|
||||
return builder;
|
||||
|
|
|
@ -54,7 +54,7 @@ public class HistogramParser implements Aggregator.Parser {
|
|||
String scriptLang = null;
|
||||
Map<String, Object> scriptParams = null;
|
||||
boolean keyed = false;
|
||||
boolean emptyBuckets = false;
|
||||
long minDocCount = 1;
|
||||
InternalOrder order = (InternalOrder) InternalOrder.KEY_ASC;
|
||||
long interval = -1;
|
||||
boolean assumeSorted = false;
|
||||
|
@ -80,14 +80,14 @@ public class HistogramParser implements Aggregator.Parser {
|
|||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("interval".equals(currentFieldName)) {
|
||||
interval = parser.longValue();
|
||||
} else if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
|
||||
minDocCount = parser.longValue();
|
||||
} else {
|
||||
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
|
||||
if ("keyed".equals(currentFieldName)) {
|
||||
keyed = parser.booleanValue();
|
||||
} else if ("empty_buckets".equals(currentFieldName) || "emptyBuckets".equals(currentFieldName)) {
|
||||
emptyBuckets = parser.booleanValue();
|
||||
} else if ("script_values_sorted".equals(currentFieldName)) {
|
||||
assumeSorted = parser.booleanValue();
|
||||
} else {
|
||||
|
@ -130,13 +130,13 @@ public class HistogramParser implements Aggregator.Parser {
|
|||
}
|
||||
|
||||
if (field == null) {
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
|
||||
}
|
||||
|
||||
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
|
||||
if (mapper == null) {
|
||||
config.unmapped(true);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
|
||||
}
|
||||
|
||||
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
|
||||
|
@ -146,7 +146,7 @@ public class HistogramParser implements Aggregator.Parser {
|
|||
config.formatter(new ValueFormatter.Number.Pattern(format));
|
||||
}
|
||||
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
|
||||
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -76,8 +76,8 @@ public class InternalDateHistogram extends AbstractHistogramBase<DateHistogram.B
|
|||
}
|
||||
|
||||
@Override
|
||||
public AbstractHistogramBase<?> create(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
return new InternalDateHistogram(name, buckets, order, emptyBucketInfo, formatter, keyed);
|
||||
public AbstractHistogramBase<?> create(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
return new InternalDateHistogram(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -88,8 +88,8 @@ public class InternalDateHistogram extends AbstractHistogramBase<DateHistogram.B
|
|||
|
||||
InternalDateHistogram() {} // for serialization
|
||||
|
||||
InternalDateHistogram(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
super(name, buckets, order, emptyBucketInfo, formatter, keyed);
|
||||
InternalDateHistogram(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
super(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -65,8 +65,8 @@ public class InternalHistogram extends AbstractHistogramBase<Histogram.Bucket> i
|
|||
return TYPE.name();
|
||||
}
|
||||
|
||||
public AbstractHistogramBase<?> create(String name, List<Histogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
return new InternalHistogram(name, buckets, order, emptyBucketInfo, formatter, keyed);
|
||||
public AbstractHistogramBase<?> create(String name, List<Histogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
|
||||
}
|
||||
|
||||
public Bucket createBucket(long key, long docCount, InternalAggregations aggregations) {
|
||||
|
@ -77,8 +77,8 @@ public class InternalHistogram extends AbstractHistogramBase<Histogram.Bucket> i
|
|||
|
||||
public InternalHistogram() {} // for serialization
|
||||
|
||||
public InternalHistogram(String name, List<Histogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
super(name, buckets, order, emptyBucketInfo, formatter, keyed);
|
||||
public InternalHistogram(String name, List<Histogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
|
||||
super(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -88,12 +88,12 @@ public class DoubleTerms extends InternalTerms {
|
|||
|
||||
DoubleTerms() {} // for serialization
|
||||
|
||||
public DoubleTerms(String name, InternalOrder order, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
|
||||
this(name, order, null, requiredSize, buckets);
|
||||
public DoubleTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
|
||||
this(name, order, null, requiredSize, minDocCount, buckets);
|
||||
}
|
||||
|
||||
public DoubleTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
|
||||
super(name, order, requiredSize, buckets);
|
||||
public DoubleTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
|
||||
super(name, order, requiredSize, minDocCount, buckets);
|
||||
this.valueFormatter = valueFormatter;
|
||||
}
|
||||
|
||||
|
@ -147,7 +147,10 @@ public class DoubleTerms extends InternalTerms {
|
|||
for (int i = 0; i < states.length; i++) {
|
||||
if (states[i]) {
|
||||
List<DoubleTerms.Bucket> sameTermBuckets = (List<DoubleTerms.Bucket>) internalBuckets[i];
|
||||
ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
|
||||
final InternalTerms.Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
|
||||
if (b.getDocCount() >= minDocCount) {
|
||||
ordered.insertWithOverflow(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
buckets.release();
|
||||
|
@ -165,6 +168,7 @@ public class DoubleTerms extends InternalTerms {
|
|||
this.order = InternalOrder.Streams.readOrder(in);
|
||||
this.valueFormatter = ValueFormatterStreams.readOptional(in);
|
||||
this.requiredSize = in.readVInt();
|
||||
this.minDocCount = in.readVLong();
|
||||
int size = in.readVInt();
|
||||
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
@ -180,6 +184,7 @@ public class DoubleTerms extends InternalTerms {
|
|||
InternalOrder.Streams.writeOrder(order, out);
|
||||
ValueFormatterStreams.writeOptional(valueFormatter, out);
|
||||
out.writeVInt(requiredSize);
|
||||
out.writeVLong(minDocCount);
|
||||
out.writeVInt(buckets.size());
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
out.writeDouble(((Bucket) bucket).term);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.index.fielddata.DoubleValues;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
|
@ -40,16 +41,18 @@ public class DoubleTermsAggregator extends BucketsAggregator {
|
|||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final long minDocCount;
|
||||
private final NumericValuesSource valuesSource;
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public DoubleTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount,
|
||||
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
|
||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
|
||||
this.valuesSource = valuesSource;
|
||||
this.order = InternalOrder.validate(order, this);
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
this.minDocCount = minDocCount;
|
||||
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
|
||||
}
|
||||
|
||||
|
@ -78,6 +81,21 @@ public class DoubleTermsAggregator extends BucketsAggregator {
|
|||
@Override
|
||||
public DoubleTerms buildAggregation(long owningBucketOrdinal) {
|
||||
assert owningBucketOrdinal == 0;
|
||||
|
||||
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
|
||||
// we need to fill-in the blanks
|
||||
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
|
||||
context.setNextReader(ctx);
|
||||
final DoubleValues values = valuesSource.doubleValues();
|
||||
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
|
||||
final int valueCount = values.setDocument(docId);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
bucketOrds.add(Double.doubleToLongBits(values.nextValue()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final int size = (int) Math.min(bucketOrds.size(), shardSize);
|
||||
|
||||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
|
||||
|
@ -104,12 +122,12 @@ public class DoubleTermsAggregator extends BucketsAggregator {
|
|||
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
|
||||
list[i] = bucket;
|
||||
}
|
||||
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, Arrays.asList(list));
|
||||
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Arrays.asList(list));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleTerms buildEmptyAggregation() {
|
||||
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, Collections.<InternalTerms.Bucket>emptyList());
|
||||
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.elasticsearch.cache.recycler.CacheRecycler;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
|
@ -78,15 +79,17 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
|||
|
||||
protected InternalOrder order;
|
||||
protected int requiredSize;
|
||||
protected long minDocCount;
|
||||
protected Collection<Bucket> buckets;
|
||||
protected Map<String, Bucket> bucketMap;
|
||||
|
||||
protected InternalTerms() {} // for serialization
|
||||
|
||||
protected InternalTerms(String name, InternalOrder order, int requiredSize, Collection<Bucket> buckets) {
|
||||
protected InternalTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<Bucket> buckets) {
|
||||
super(name);
|
||||
this.order = order;
|
||||
this.requiredSize = requiredSize;
|
||||
this.minDocCount = minDocCount;
|
||||
this.buckets = buckets;
|
||||
}
|
||||
|
||||
|
@ -156,7 +159,10 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
|||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(null));
|
||||
for (Map.Entry<Text, List<Bucket>> entry : buckets.entrySet()) {
|
||||
List<Bucket> sameTermBuckets = entry.getValue();
|
||||
ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
|
||||
final Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
|
||||
if (b.docCount >= minDocCount) {
|
||||
ordered.insertWithOverflow(b);
|
||||
}
|
||||
}
|
||||
Bucket[] list = new Bucket[ordered.size()];
|
||||
for (int i = ordered.size() - 1; i >= 0; i--) {
|
||||
|
@ -166,23 +172,17 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
|||
return reduced;
|
||||
}
|
||||
|
||||
protected void trimExcessEntries() {
|
||||
if (requiredSize >= buckets.size()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (buckets instanceof List) {
|
||||
buckets = ((List) buckets).subList(0, requiredSize);
|
||||
return;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
for (Iterator<Bucket> iter = buckets.iterator(); iter.hasNext();) {
|
||||
iter.next();
|
||||
if (i++ >= requiredSize) {
|
||||
iter.remove();
|
||||
final void trimExcessEntries() {
|
||||
final List<Bucket> newBuckets = Lists.newArrayList();
|
||||
for (Bucket b : buckets) {
|
||||
if (newBuckets.size() >= requiredSize) {
|
||||
break;
|
||||
}
|
||||
if (b.docCount >= minDocCount) {
|
||||
newBuckets.add(b);
|
||||
}
|
||||
}
|
||||
buckets = newBuckets;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -89,8 +89,8 @@ public class LongTerms extends InternalTerms {
|
|||
|
||||
LongTerms() {} // for serialization
|
||||
|
||||
public LongTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
|
||||
super(name, order, requiredSize, buckets);
|
||||
public LongTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
|
||||
super(name, order, requiredSize, minDocCount, buckets);
|
||||
this.valueFormatter = valueFormatter;
|
||||
}
|
||||
|
||||
|
@ -144,7 +144,10 @@ public class LongTerms extends InternalTerms {
|
|||
for (int i = 0; i < states.length; i++) {
|
||||
if (states[i]) {
|
||||
List<LongTerms.Bucket> sameTermBuckets = (List<LongTerms.Bucket>) internalBuckets[i];
|
||||
ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
|
||||
final InternalTerms.Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
|
||||
if (b.getDocCount() >= minDocCount) {
|
||||
ordered.insertWithOverflow(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
buckets.release();
|
||||
|
@ -162,6 +165,7 @@ public class LongTerms extends InternalTerms {
|
|||
this.order = InternalOrder.Streams.readOrder(in);
|
||||
this.valueFormatter = ValueFormatterStreams.readOptional(in);
|
||||
this.requiredSize = in.readVInt();
|
||||
this.minDocCount = in.readVLong();
|
||||
int size = in.readVInt();
|
||||
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
@ -177,6 +181,7 @@ public class LongTerms extends InternalTerms {
|
|||
InternalOrder.Streams.writeOrder(order, out);
|
||||
ValueFormatterStreams.writeOptional(valueFormatter, out);
|
||||
out.writeVInt(requiredSize);
|
||||
out.writeVLong(minDocCount);
|
||||
out.writeVInt(buckets.size());
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
out.writeLong(((Bucket) bucket).term);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.index.fielddata.LongValues;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
|
@ -40,16 +41,18 @@ public class LongTermsAggregator extends BucketsAggregator {
|
|||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final long minDocCount;
|
||||
private final NumericValuesSource valuesSource;
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public LongTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount,
|
||||
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
|
||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
|
||||
this.valuesSource = valuesSource;
|
||||
this.order = InternalOrder.validate(order, this);
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
this.minDocCount = minDocCount;
|
||||
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
|
||||
}
|
||||
|
||||
|
@ -77,6 +80,21 @@ public class LongTermsAggregator extends BucketsAggregator {
|
|||
@Override
|
||||
public LongTerms buildAggregation(long owningBucketOrdinal) {
|
||||
assert owningBucketOrdinal == 0;
|
||||
|
||||
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
|
||||
// we need to fill-in the blanks
|
||||
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
|
||||
context.setNextReader(ctx);
|
||||
final LongValues values = valuesSource.longValues();
|
||||
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
|
||||
final int valueCount = values.setDocument(docId);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
bucketOrds.add(values.nextValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final int size = (int) Math.min(bucketOrds.size(), shardSize);
|
||||
|
||||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
|
||||
|
@ -103,12 +121,12 @@ public class LongTermsAggregator extends BucketsAggregator {
|
|||
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
|
||||
list[i] = bucket;
|
||||
}
|
||||
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, Arrays.asList(list));
|
||||
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Arrays.asList(list));
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongTerms buildEmptyAggregation() {
|
||||
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, Collections.<InternalTerms.Bucket>emptyList());
|
||||
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -83,8 +83,8 @@ public class StringTerms extends InternalTerms {
|
|||
|
||||
StringTerms() {} // for serialization
|
||||
|
||||
public StringTerms(String name, InternalOrder order, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
|
||||
super(name, order, requiredSize, buckets);
|
||||
public StringTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
|
||||
super(name, order, requiredSize, minDocCount, buckets);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -97,6 +97,7 @@ public class StringTerms extends InternalTerms {
|
|||
this.name = in.readString();
|
||||
this.order = InternalOrder.Streams.readOrder(in);
|
||||
this.requiredSize = in.readVInt();
|
||||
this.minDocCount = in.readVLong();
|
||||
int size = in.readVInt();
|
||||
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
@ -111,6 +112,7 @@ public class StringTerms extends InternalTerms {
|
|||
out.writeString(name);
|
||||
InternalOrder.Streams.writeOrder(order, out);
|
||||
out.writeVInt(requiredSize);
|
||||
out.writeVLong(minDocCount);
|
||||
out.writeVInt(buckets.size());
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
out.writeBytesRef(((Bucket) bucket).termBytes);
|
||||
|
|
|
@ -18,30 +18,31 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import com.sun.corba.se.impl.naming.cosnaming.InterOperableNamingImpl;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.UnmodifiableIterator;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.collect.Iterators2;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.lucene.ReaderContextAware;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.LongArray;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.BytesRefHash;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.metrics.MetricsAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An aggregator of string values.
|
||||
|
@ -52,11 +53,12 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final long minDocCount;
|
||||
protected final BytesRefHash bucketOrds;
|
||||
private final IncludeExclude includeExclude;
|
||||
|
||||
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
|
||||
InternalOrder order, int requiredSize, int shardSize,
|
||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
|
||||
|
||||
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
|
||||
|
@ -64,6 +66,7 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
this.order = InternalOrder.validate(order, this);
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
this.minDocCount = minDocCount;
|
||||
this.includeExclude = includeExclude;
|
||||
bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
|
||||
}
|
||||
|
@ -94,9 +97,122 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns an iterator over the field data terms. */
|
||||
private static Iterator<BytesRef> terms(final BytesValues.WithOrdinals bytesValues, boolean reverse) {
|
||||
final Ordinals.Docs ordinals = bytesValues.ordinals();
|
||||
if (reverse) {
|
||||
return new UnmodifiableIterator<BytesRef>() {
|
||||
|
||||
long i = ordinals.getMaxOrd() - 1;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return i >= Ordinals.MIN_ORDINAL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
bytesValues.getValueByOrd(i--);
|
||||
return bytesValues.copyShared();
|
||||
}
|
||||
|
||||
};
|
||||
} else {
|
||||
return new UnmodifiableIterator<BytesRef>() {
|
||||
|
||||
long i = Ordinals.MIN_ORDINAL;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return i < ordinals.getMaxOrd();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
bytesValues.getValueByOrd(i++);
|
||||
return bytesValues.copyShared();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringTerms buildAggregation(long owningBucketOrdinal) {
|
||||
assert owningBucketOrdinal == 0;
|
||||
|
||||
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
|
||||
// we need to fill-in the blanks
|
||||
List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList();
|
||||
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
|
||||
context.setNextReader(ctx);
|
||||
final BytesValues values = valuesSource.bytesValues();
|
||||
if (values instanceof BytesValues.WithOrdinals) {
|
||||
valuesWithOrdinals.add((BytesValues.WithOrdinals) values);
|
||||
} else {
|
||||
// brute force
|
||||
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
|
||||
final int valueCount = values.setDocument(docId);
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
final BytesRef term = values.nextValue();
|
||||
if (includeExclude == null || includeExclude.accept(term)) {
|
||||
bucketOrds.add(term, values.currentValueHash());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// With ordinals we can be smarter and add just as many terms as necessary to the hash table
|
||||
// For instance, if sorting by term asc, we only need to get the first `requiredSize` terms as other terms would
|
||||
// either be excluded by the priority queue or at reduce time.
|
||||
if (valuesWithOrdinals.size() > 0) {
|
||||
final boolean reverse = order == InternalOrder.TERM_DESC;
|
||||
Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
if (reverse) {
|
||||
comparator = Collections.reverseOrder(comparator);
|
||||
}
|
||||
Iterator<? extends BytesRef>[] iterators = new Iterator[valuesWithOrdinals.size()];
|
||||
for (int i = 0; i < valuesWithOrdinals.size(); ++i) {
|
||||
iterators[i] = terms(valuesWithOrdinals.get(i), reverse);
|
||||
}
|
||||
Iterator<BytesRef> terms = Iterators2.mergeSorted(Arrays.asList(iterators), comparator, true);
|
||||
if (includeExclude != null) {
|
||||
terms = Iterators.filter(terms, new Predicate<BytesRef>() {
|
||||
@Override
|
||||
public boolean apply(BytesRef input) {
|
||||
return includeExclude.accept(input);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (order == InternalOrder.COUNT_ASC) {
|
||||
// let's try to find `shardSize` terms that matched no hit
|
||||
// this one needs shardSize and not requiredSize because even though terms have a count of 0 here,
|
||||
// they might have higher counts on other shards
|
||||
for (int added = 0; added < shardSize && terms.hasNext(); ) {
|
||||
if (bucketOrds.add(terms.next()) >= 0) {
|
||||
++added;
|
||||
}
|
||||
}
|
||||
} else if (order == InternalOrder.COUNT_DESC) {
|
||||
// add terms until there are enough buckets
|
||||
while (bucketOrds.size() < requiredSize && terms.hasNext()) {
|
||||
bucketOrds.add(terms.next());
|
||||
}
|
||||
} else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) {
|
||||
// add the `requiredSize` least terms
|
||||
for (int i = 0; i < requiredSize && terms.hasNext(); ++i) {
|
||||
bucketOrds.add(terms.next());
|
||||
}
|
||||
} else {
|
||||
// other orders (aggregations) are not optimizable
|
||||
while (terms.hasNext()) {
|
||||
bucketOrds.add(terms.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final int size = (int) Math.min(bucketOrds.size(), shardSize);
|
||||
|
||||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
|
||||
|
@ -117,12 +233,13 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
|
||||
list[i] = bucket;
|
||||
}
|
||||
return new StringTerms(name, order, requiredSize, Arrays.asList(list));
|
||||
|
||||
return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list));
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringTerms buildEmptyAggregation() {
|
||||
return new StringTerms(name, order, requiredSize, Collections.<InternalTerms.Bucket>emptyList());
|
||||
return new StringTerms(name, order, requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -141,8 +258,8 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
private LongArray ordinalToBucket;
|
||||
|
||||
public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, long esitmatedBucketCount,
|
||||
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, null, aggregationContext, parent);
|
||||
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent);
|
||||
this.valuesSource = valuesSource;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,21 +41,23 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
|
|||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final long minDocCount;
|
||||
private final IncludeExclude includeExclude;
|
||||
private final String executionHint;
|
||||
|
||||
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude, String executionHint) {
|
||||
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, String executionHint) {
|
||||
super(name, StringTerms.TYPE.name(), valueSourceConfig);
|
||||
this.order = order;
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
this.minDocCount = minDocCount;
|
||||
this.includeExclude = includeExclude;
|
||||
this.executionHint = executionHint;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
|
||||
return new UnmappedTermsAggregator(name, order, requiredSize, aggregationContext, parent);
|
||||
return new UnmappedTermsAggregator(name, order, requiredSize, minDocCount, aggregationContext, parent);
|
||||
}
|
||||
|
||||
private static boolean hasParentBucketAggregator(Aggregator parent) {
|
||||
|
@ -107,11 +109,11 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
|
|||
if (execution.equals(EXECUTION_HINT_VALUE_ORDINALS)) {
|
||||
assert includeExclude == null;
|
||||
final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name,
|
||||
factories, (BytesValuesSource.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
|
||||
factories, (BytesValuesSource.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||
aggregationContext.registerReaderContextAware(aggregator);
|
||||
return aggregator;
|
||||
} else {
|
||||
return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
|
||||
return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,9 +124,9 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
|
|||
|
||||
if (valuesSource instanceof NumericValuesSource) {
|
||||
if (((NumericValuesSource) valuesSource).isFloatingPoint()) {
|
||||
return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
|
||||
return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||
}
|
||||
return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
|
||||
return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
|
||||
}
|
||||
|
||||
throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + valuesSourceConfig.fieldContext().field() +
|
||||
|
|
|
@ -33,6 +33,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
|
||||
private int size = -1;
|
||||
private int shardSize = -1;
|
||||
private long minDocCount = -1;
|
||||
private Terms.ValueType valueType;
|
||||
private Terms.Order order;
|
||||
private String includePattern;
|
||||
|
@ -62,6 +63,14 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the minimum document count terms should have in order to appear in the response.
|
||||
*/
|
||||
public TermsBuilder minDocCount(long minDocCount) {
|
||||
this.minDocCount = minDocCount;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||
* on the {@link java.util.regex.Pattern} class.
|
||||
|
@ -135,6 +144,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
if (shardSize >= 0) {
|
||||
builder.field("shard_size", shardSize);
|
||||
}
|
||||
if (minDocCount >= 0) {
|
||||
builder.field("min_doc_count", minDocCount);
|
||||
}
|
||||
if (valueType != null) {
|
||||
builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT));
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ public class TermsParser implements Aggregator.Parser {
|
|||
String exclude = null;
|
||||
int excludeFlags = 0; // 0 means no flags
|
||||
String executionHint = null;
|
||||
long minDocCount = 1;
|
||||
|
||||
|
||||
XContentParser.Token token;
|
||||
|
@ -110,6 +111,8 @@ public class TermsParser implements Aggregator.Parser {
|
|||
requiredSize = parser.intValue();
|
||||
} else if ("shard_size".equals(currentFieldName) || "shardSize".equals(currentFieldName)) {
|
||||
shardSize = parser.intValue();
|
||||
} else if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
|
||||
minDocCount = parser.intValue();
|
||||
} else {
|
||||
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
|
||||
}
|
||||
|
@ -206,14 +209,14 @@ public class TermsParser implements Aggregator.Parser {
|
|||
if (!assumeUnique) {
|
||||
config.ensureUnique(true);
|
||||
}
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
|
||||
}
|
||||
|
||||
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
|
||||
if (mapper == null) {
|
||||
ValuesSourceConfig<?> config = new ValuesSourceConfig<BytesValuesSource>(BytesValuesSource.class);
|
||||
config.unmapped(true);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
|
||||
}
|
||||
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
|
||||
|
||||
|
@ -255,7 +258,7 @@ public class TermsParser implements Aggregator.Parser {
|
|||
config.ensureUnique(true);
|
||||
}
|
||||
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
|
||||
}
|
||||
|
||||
static InternalOrder resolveOrder(String key, boolean asc) {
|
||||
|
|
|
@ -53,8 +53,8 @@ public class UnmappedTerms extends InternalTerms {
|
|||
|
||||
UnmappedTerms() {} // for serialization
|
||||
|
||||
public UnmappedTerms(String name, InternalOrder order, int requiredSize) {
|
||||
super(name, order, requiredSize, BUCKETS);
|
||||
public UnmappedTerms(String name, InternalOrder order, int requiredSize, long minDocCount) {
|
||||
super(name, order, requiredSize, minDocCount, BUCKETS);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -67,6 +67,7 @@ public class UnmappedTerms extends InternalTerms {
|
|||
this.name = in.readString();
|
||||
this.order = InternalOrder.Streams.readOrder(in);
|
||||
this.requiredSize = in.readVInt();
|
||||
this.minDocCount = in.readVLong();
|
||||
this.buckets = BUCKETS;
|
||||
this.bucketMap = BUCKETS_MAP;
|
||||
}
|
||||
|
@ -76,6 +77,7 @@ public class UnmappedTerms extends InternalTerms {
|
|||
out.writeString(name);
|
||||
InternalOrder.Streams.writeOrder(order, out);
|
||||
out.writeVInt(requiredSize);
|
||||
out.writeVLong(minDocCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -32,11 +32,13 @@ public class UnmappedTermsAggregator extends Aggregator {
|
|||
|
||||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final long minDocCount;
|
||||
|
||||
public UnmappedTermsAggregator(String name, InternalOrder order, int requiredSize, AggregationContext aggregationContext, Aggregator parent) {
|
||||
public UnmappedTermsAggregator(String name, InternalOrder order, int requiredSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
super(name, BucketAggregationMode.PER_BUCKET, AggregatorFactories.EMPTY, 0, aggregationContext, parent);
|
||||
this.order = order;
|
||||
this.requiredSize = requiredSize;
|
||||
this.minDocCount = minDocCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -51,11 +53,11 @@ public class UnmappedTermsAggregator extends Aggregator {
|
|||
@Override
|
||||
public InternalAggregation buildAggregation(long owningBucketOrdinal) {
|
||||
assert owningBucketOrdinal == 0;
|
||||
return new UnmappedTerms(name, order, requiredSize);
|
||||
return new UnmappedTerms(name, order, requiredSize, minDocCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
return new UnmappedTerms(name, order, requiredSize);
|
||||
return new UnmappedTerms(name, order, requiredSize, minDocCount);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.collect;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
public class Iterators2Tests extends ElasticsearchTestCase {
|
||||
|
||||
public void testDeduplicateSorted() {
|
||||
final List<String> list = Lists.newArrayList();
|
||||
for (int i = randomInt(100); i >= 0; --i) {
|
||||
final int frequency = randomIntBetween(1, 10);
|
||||
final String s = randomAsciiOfLength(randomIntBetween(2, 20));
|
||||
for (int j = 0; j < frequency; ++j) {
|
||||
list.add(s);
|
||||
}
|
||||
}
|
||||
CollectionUtil.introSort(list);
|
||||
final List<String> deduplicated = Lists.newArrayList();
|
||||
for (Iterator<String> it = Iterators2.deduplicateSorted(list.iterator(), Ordering.natural()); it.hasNext(); ) {
|
||||
deduplicated.add(it.next());
|
||||
}
|
||||
assertEquals(Lists.newArrayList(Sets.newTreeSet(list)), deduplicated);
|
||||
}
|
||||
|
||||
}
|
|
@ -888,7 +888,7 @@ public class DateHistogramTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(dateHistogram("date_histo").interval(1)))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(dateHistogram("date_histo").interval(1)))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
|
||||
|
|
|
@ -994,7 +994,7 @@ public class DateRangeTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(dateRange("date_range").addRange("0-1", 0, 1)))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(dateRange("date_range").addRange("0-1", 0, 1)))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
|
||||
|
|
|
@ -600,7 +600,7 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(terms("terms")))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -23,8 +23,8 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
|
|||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
||||
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
||||
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.hamcrest.Matchers;
|
||||
|
@ -159,7 +159,7 @@ public class FilterTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(filter("filter").filter(matchAllFilter())))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -371,7 +371,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(geoDistance("geo_dist").field("location").point("52.3760, 4.894").addRange("0-100", 0.0, 100.0)))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -756,7 +756,7 @@ public class HistogramTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(histogram("sub_histo").interval(1l)))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -842,7 +842,7 @@ public class IPv4RangeTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(ipRange("ip_range").field("ip").addRange("r1", "10.0.0.1", "10.0.0.10")))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -594,7 +594,7 @@ public class LongTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(terms("terms")))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -0,0 +1,322 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket;
|
||||
|
||||
import com.carrotsearch.hppc.LongOpenHashSet;
|
||||
import com.carrotsearch.hppc.LongSet;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.action.search.SearchType;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||
|
||||
public class MinDocCountTests extends ElasticsearchIntegrationTest {
|
||||
|
||||
private static final QueryBuilder QUERY = QueryBuilders.termQuery("match", true);
|
||||
|
||||
@Override
|
||||
public Settings indexSettings() {
|
||||
return ImmutableSettings.builder()
|
||||
.put("index.number_of_shards", between(1, 5))
|
||||
.put("index.number_of_replicas", between(0, 1))
|
||||
.build();
|
||||
}
|
||||
|
||||
private int cardinality;
|
||||
|
||||
@Before
|
||||
public void indexData() throws Exception {
|
||||
createIndex("idx");
|
||||
|
||||
cardinality = randomIntBetween(8, 30);
|
||||
final List<IndexRequestBuilder> indexRequests = new ArrayList<IndexRequestBuilder>();
|
||||
final Set<String> stringTerms = new HashSet<String>();
|
||||
final LongSet longTerms = new LongOpenHashSet();
|
||||
for (int i = 0; i < cardinality; ++i) {
|
||||
String stringTerm;
|
||||
do {
|
||||
stringTerm = RandomStrings.randomAsciiOfLength(getRandom(), 8);
|
||||
} while (!stringTerms.add(stringTerm));
|
||||
long longTerm;
|
||||
do {
|
||||
longTerm = randomInt(cardinality * 2);
|
||||
} while (!longTerms.add(longTerm));
|
||||
double doubleTerm = longTerm * Math.PI;
|
||||
final int frequency = randomBoolean() ? 1 : randomIntBetween(2, 20);
|
||||
for (int j = 0; j < frequency; ++j) {
|
||||
indexRequests.add(client().prepareIndex("idx", "type").setSource(jsonBuilder().startObject().field("s", stringTerm).field("l", longTerm).field("d", doubleTerm).field("match", randomBoolean()).endObject()));
|
||||
}
|
||||
}
|
||||
cardinality = stringTerms.size();
|
||||
|
||||
indexRandom(true, indexRequests);
|
||||
ensureSearchable();
|
||||
}
|
||||
|
||||
private enum Script {
|
||||
NO {
|
||||
@Override
|
||||
TermsBuilder apply(TermsBuilder builder, String field) {
|
||||
return builder.field(field);
|
||||
}
|
||||
},
|
||||
YES {
|
||||
@Override
|
||||
TermsBuilder apply(TermsBuilder builder, String field) {
|
||||
return builder.script("doc['" + field + "'].values");
|
||||
}
|
||||
};
|
||||
abstract TermsBuilder apply(TermsBuilder builder, String field);
|
||||
}
|
||||
|
||||
// check that terms2 is a subset of terms1
|
||||
private void assertSubset(Terms terms1, Terms terms2, long minDocCount, int size, String include) {
|
||||
final Matcher matcher = include == null ? null : Pattern.compile(include).matcher("");;
|
||||
final Iterator<Terms.Bucket> it1 = terms1.iterator();
|
||||
final Iterator<Terms.Bucket> it2 = terms2.iterator();
|
||||
int size2 = 0;
|
||||
while (it1.hasNext()) {
|
||||
final Terms.Bucket bucket1 = it1.next();
|
||||
if (bucket1.getDocCount() >= minDocCount && (matcher == null || matcher.reset(bucket1.getKey().string()).matches())) {
|
||||
if (size2++ == size) {
|
||||
break;
|
||||
}
|
||||
assertTrue(it2.hasNext());
|
||||
final Terms.Bucket bucket2 = it2.next();
|
||||
assertEquals(bucket1.getKey(), bucket2.getKey());
|
||||
assertEquals(bucket1.getDocCount(), bucket2.getDocCount());
|
||||
}
|
||||
}
|
||||
assertFalse(it2.hasNext());
|
||||
}
|
||||
|
||||
private void assertSubset(Histogram histo1, Histogram histo2, long minDocCount) {
|
||||
final Iterator<Histogram.Bucket> it2 = histo2.iterator();
|
||||
for (Histogram.Bucket b1 : histo1) {
|
||||
if (b1.getDocCount() >= minDocCount) {
|
||||
final Histogram.Bucket b2 = it2.next();
|
||||
assertEquals(b1.getKey(), b2.getKey());
|
||||
assertEquals(b1.getDocCount(), b2.getDocCount());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testStringTermAsc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.NO, Terms.Order.term(true));
|
||||
}
|
||||
|
||||
public void testStringScriptTermAsc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.YES, Terms.Order.term(true));
|
||||
}
|
||||
|
||||
public void testStringTermDesc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.NO, Terms.Order.term(false));
|
||||
}
|
||||
|
||||
public void testStringScriptTermDesc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.YES, Terms.Order.term(false));
|
||||
}
|
||||
|
||||
public void testStringCountAsc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(true));
|
||||
}
|
||||
|
||||
public void testStringScriptCountAsc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(true));
|
||||
}
|
||||
|
||||
public void testStringCountDesc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(false));
|
||||
}
|
||||
|
||||
public void testStringScriptCountDesc() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(false));
|
||||
}
|
||||
|
||||
public void testStringCountAscWithInclude() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(true), ".*a.*");
|
||||
}
|
||||
|
||||
public void testStringScriptCountAscWithInclude() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(true), ".*a.*");
|
||||
}
|
||||
|
||||
public void testStringCountDescWithInclude() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(false), ".*a.*");
|
||||
}
|
||||
|
||||
public void testStringScriptCountDescWithInclude() throws Exception {
|
||||
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(false), ".*a.*");
|
||||
}
|
||||
|
||||
public void testLongTermAsc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.NO, Terms.Order.term(true));
|
||||
}
|
||||
|
||||
public void testLongScriptTermAsc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.YES, Terms.Order.term(true));
|
||||
}
|
||||
|
||||
public void testLongTermDesc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.NO, Terms.Order.term(false));
|
||||
}
|
||||
|
||||
public void testLongScriptTermDesc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.YES, Terms.Order.term(false));
|
||||
}
|
||||
|
||||
public void testLongCountAsc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.NO, Terms.Order.count(true));
|
||||
}
|
||||
|
||||
public void testLongScriptCountAsc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.YES, Terms.Order.count(true));
|
||||
}
|
||||
|
||||
public void testLongCountDesc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.NO, Terms.Order.count(false));
|
||||
}
|
||||
|
||||
public void testLongScriptCountDesc() throws Exception {
|
||||
testMinDocCountOnTerms("l", Script.YES, Terms.Order.count(false));
|
||||
}
|
||||
|
||||
public void testDoubleTermAsc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.NO, Terms.Order.term(true));
|
||||
}
|
||||
|
||||
public void testDoubleScriptTermAsc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.YES, Terms.Order.term(true));
|
||||
}
|
||||
|
||||
public void testDoubleTermDesc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.NO, Terms.Order.term(false));
|
||||
}
|
||||
|
||||
public void testDoubleScriptTermDesc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.YES, Terms.Order.term(false));
|
||||
}
|
||||
|
||||
public void testDoubleCountAsc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.NO, Terms.Order.count(true));
|
||||
}
|
||||
|
||||
public void testDoubleScriptCountAsc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.YES, Terms.Order.count(true));
|
||||
}
|
||||
|
||||
public void testDoubleCountDesc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.NO, Terms.Order.count(false));
|
||||
}
|
||||
|
||||
public void testDoubleScriptCountDesc() throws Exception {
|
||||
testMinDocCountOnTerms("d", Script.YES, Terms.Order.count(false));
|
||||
}
|
||||
|
||||
private void testMinDocCountOnTerms(String field, Script script, Terms.Order order) throws Exception {
|
||||
testMinDocCountOnTerms(field, script, order, null);
|
||||
}
|
||||
|
||||
private void testMinDocCountOnTerms(String field, Script script, Terms.Order order, String include) throws Exception {
|
||||
// all terms
|
||||
final SearchResponse allTermsResponse = client().prepareSearch("idx").setTypes("type")
|
||||
.setSearchType(SearchType.COUNT)
|
||||
.setQuery(QUERY)
|
||||
.addAggregation(script.apply(terms("terms"), field)
|
||||
.executionHint(StringTermsTests.randomExecutionHint())
|
||||
.order(order)
|
||||
.size(cardinality + randomInt(10))
|
||||
.minDocCount(0))
|
||||
.execute().actionGet();
|
||||
final Terms allTerms = allTermsResponse.getAggregations().get("terms");
|
||||
assertEquals(cardinality, allTerms.buckets().size());
|
||||
|
||||
for (long minDocCount = 0; minDocCount < 20; ++minDocCount) {
|
||||
final int size = randomIntBetween(1, cardinality + 2);
|
||||
final SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.setSearchType(SearchType.COUNT)
|
||||
.setQuery(QUERY)
|
||||
.addAggregation(script.apply(terms("terms"), field)
|
||||
.executionHint(StringTermsTests.randomExecutionHint())
|
||||
.order(order)
|
||||
.size(size)
|
||||
.include(include)
|
||||
.shardSize(cardinality + randomInt(10))
|
||||
.minDocCount(minDocCount))
|
||||
.execute().actionGet();
|
||||
assertSubset(allTerms, (Terms) response.getAggregations().get("terms"), minDocCount, size, include);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testHistogramCountAsc() throws Exception {
|
||||
testMinDocCountOnHistogram(Histogram.Order.COUNT_ASC);
|
||||
}
|
||||
|
||||
public void testHistogramCountDesc() throws Exception {
|
||||
testMinDocCountOnHistogram(Histogram.Order.COUNT_DESC);
|
||||
}
|
||||
|
||||
public void testHistogramKeyAsc() throws Exception {
|
||||
testMinDocCountOnHistogram(Histogram.Order.KEY_ASC);
|
||||
}
|
||||
|
||||
public void testHistogramKeyDesc() throws Exception {
|
||||
testMinDocCountOnHistogram(Histogram.Order.KEY_DESC);
|
||||
}
|
||||
|
||||
private void testMinDocCountOnHistogram(Histogram.Order order) throws Exception {
|
||||
final int interval = randomIntBetween(1, 3);
|
||||
final SearchResponse allResponse = client().prepareSearch("idx").setTypes("type")
|
||||
.setSearchType(SearchType.COUNT)
|
||||
.setQuery(QUERY)
|
||||
.addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(0))
|
||||
.execute().actionGet();
|
||||
|
||||
final Histogram allHisto = allResponse.getAggregations().get("histo");
|
||||
|
||||
for (long minDocCount = 0; minDocCount < 50; ++minDocCount) {
|
||||
final SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||
.setSearchType(SearchType.COUNT)
|
||||
.setQuery(QUERY)
|
||||
.addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(minDocCount))
|
||||
.execute().actionGet();
|
||||
assertSubset(allHisto, (Histogram) response.getAggregations().get("histo"), minDocCount);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -202,7 +202,7 @@ public class MissingTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(missing("missing")))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -251,7 +251,7 @@ public class NestedTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(nested("nested").path("nested")))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -921,7 +921,7 @@ public class RangeTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(range("range").addRange("0-2", 0.0, 2.0)))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
.build();
|
||||
}
|
||||
|
||||
private String randomExecutionHint() {
|
||||
public static String randomExecutionHint() {
|
||||
return randomFrom(Arrays.asList(null, TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP, TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS));
|
||||
}
|
||||
|
||||
|
@ -758,7 +758,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
|
||||
.subAggregation(terms("terms")))
|
||||
.execute().actionGet();
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ public class AvgTests extends AbstractNumericTests {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(avg("avg")))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(avg("avg")))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
|
||||
|
|
|
@ -53,7 +53,7 @@ public class ExtendedStatsTests extends AbstractNumericTests {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(extendedStats("stats")))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(extendedStats("stats")))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
|
||||
|
|
|
@ -39,7 +39,7 @@ public class MaxTests extends AbstractNumericTests {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(max("max")))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(max("max")))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
|
||||
|
|
|
@ -39,7 +39,7 @@ public class MinTests extends AbstractNumericTests {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(min("min")))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(min("min")))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
|
||||
|
|
|
@ -40,7 +40,7 @@ public class StatsTests extends AbstractNumericTests {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(stats("stats")))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(stats("stats")))
|
||||
.execute().actionGet();
|
||||
|
||||
assertShardExecutionState(searchResponse, 0);
|
||||
|
|
|
@ -39,7 +39,7 @@ public class SumTests extends AbstractNumericTests {
|
|||
|
||||
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
|
||||
.setQuery(matchAllQuery())
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(sum("sum")))
|
||||
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(sum("sum")))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));
|
||||
|
|
Loading…
Reference in New Issue