Add new option `min_doc_count` to terms and histogram aggregations.

`min_doc_count` is the minimum number of hits that a term or histogram key
should match in order to appear in the response.

`min_doc_count=0` replaces `compute_empty_buckets` for histograms and will
behave exactly like facets' `all_terms=true` for terms aggregations.

Close #4662
This commit is contained in:
Adrien Grand 2014-01-08 15:08:18 +01:00
parent 943b62634c
commit 5c237fe834
42 changed files with 859 additions and 136 deletions

View File

@ -159,6 +159,63 @@ If the histogram aggregation has a direct metrics sub-aggregation, the latter ca
<2> There is no need to configure the `price` field for the `price_stats` aggregation as it will inherit it by default from its parent histogram aggregation.
==== Minimum document count
It is possible to only return buckets that have a document count that is greater than or equal to a configured limit through the `min_doc_count` option.
[source,js]
--------------------------------------------------
{
"aggs" : {
"prices" : {
"histogram" : {
"field" : "price",
"interval" : 50,
"min_doc_count": 10
}
}
}
}
--------------------------------------------------
The above aggregation would only return buckets that contain 10 documents or more. Default value is `1`.
NOTE: The special value `0` can be used to add empty buckets to the response between the minimum and the maximum buckets. Here is an example of what the response could look like:
[source,js]
--------------------------------------------------
{
"aggregations": {
"prices": {
"0": {
"key": 0,
"doc_count": 2
},
"50": {
"key": 50,
"doc_count": 0
},
"150": {
"key": 150,
"doc_count": 3
},
"200": {
"key": 150,
"doc_count": 0
},
"250": {
"key": 150,
"doc_count": 0
},
"300": {
"key": 150,
"doc_count": 1
}
}
}
}
--------------------------------------------------
==== Response Format
By default, the buckets are retuned as an ordered array. It is also possilbe to request the response as a hash instead keyed by the buckets keys:

View File

@ -142,6 +142,34 @@ Ordering the buckets by multi value metrics sub-aggregation (identified by the a
}
--------------------------------------------------
==== Minimum document count
It is possible to only return terms that match more than a configured number of hits using the `min_doc_count` option:
[source,js]
--------------------------------------------------
{
"aggs" : {
"tags" : {
"terms" : {
"field" : "tag",
"min_doc_count": 10
}
}
}
}
--------------------------------------------------
The above aggregation would only return tags which have been found in 10 hits or more. Default value is `1`.
NOTE: Setting `min_doc_count`=`0` will also return buckets for terms that didn't match any hit. However, some of
the returned terms which have a document count of zero might only belong to deleted documents, so there is
no warranty that a `match_all` query would find a positive document count for those terms.
WARNING: When NOT sorting on `doc_count` descending, high values of `min_doc_count` may return a number of buckets
which is less than `size` because not enough data was gathered from the shards. Missing buckets can be
back by increasing `shard_size`.
==== Script
Generating the terms using a script:

View File

@ -0,0 +1,64 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.collect;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import com.google.common.collect.UnmodifiableIterator;
import java.util.Comparator;
import java.util.Iterator;
public enum Iterators2 {
;
/** Remove duplicated elements from an iterator over sorted content. */
public static <T> Iterator<T> deduplicateSorted(Iterator<? extends T> iterator, final Comparator<? super T> comparator) {
final PeekingIterator<T> it = Iterators.peekingIterator(iterator);
return new UnmodifiableIterator<T>() {
@Override
public boolean hasNext() {
return it.hasNext();
}
@Override
public T next() {
final T ret = it.next();
while (it.hasNext() && comparator.compare(ret, it.peek()) == 0) {
it.next();
}
assert !it.hasNext() || comparator.compare(ret, it.peek()) < 0 : "iterator is not sorted: " + ret + " > " + it.peek();
return ret;
}
};
}
/** Return a merged view over several iterators, optionally deduplicating equivalent entries. */
public static <T> Iterator<T> mergeSorted(Iterable<Iterator<? extends T>> iterators, Comparator<? super T> comparator, boolean deduplicate) {
Iterator<T> it = Iterators.mergeSorted(iterators, comparator);
if (deduplicate) {
it = deduplicateSorted(it, comparator);
}
return it;
}
}

View File

@ -117,7 +117,7 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
String type();
AbstractHistogramBase create(String name, List<B> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed);
AbstractHistogramBase create(String name, List<B> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed);
Bucket createBucket(long key, long docCount, InternalAggregations aggregations);
@ -128,14 +128,17 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
private InternalOrder order;
private ValueFormatter formatter;
private boolean keyed;
private long minDocCount;
private EmptyBucketInfo emptyBucketInfo;
protected AbstractHistogramBase() {} // for serialization
protected AbstractHistogramBase(String name, List<B> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
protected AbstractHistogramBase(String name, List<B> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
super(name);
this.buckets = buckets;
this.order = order;
assert (minDocCount == 0) == (emptyBucketInfo != null);
this.minDocCount = minDocCount;
this.emptyBucketInfo = emptyBucketInfo;
this.formatter = formatter;
this.keyed = keyed;
@ -169,28 +172,36 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
List<InternalAggregation> aggregations = reduceContext.aggregations();
if (aggregations.size() == 1) {
if (emptyBucketInfo == null) {
if (minDocCount == 1) {
return aggregations.get(0);
}
// we need to fill the gaps with empty buckets
AbstractHistogramBase histo = (AbstractHistogramBase) aggregations.get(0);
CollectionUtil.introSort(histo.buckets, order.asc ? InternalOrder.KEY_ASC.comparator() : InternalOrder.KEY_DESC.comparator());
List<HistogramBase.Bucket> list = order.asc ? histo.buckets : Lists.reverse(histo.buckets);
HistogramBase.Bucket prevBucket = null;
ListIterator<HistogramBase.Bucket> iter = list.listIterator();
while (iter.hasNext()) {
// look ahead on the next bucket without advancing the iter
// so we'll be able to insert elements at the right position
HistogramBase.Bucket nextBucket = list.get(iter.nextIndex());
if (prevBucket != null) {
long key = emptyBucketInfo.rounding.nextRoundingValue(prevBucket.getKey());
while (key != nextBucket.getKey()) {
iter.add(createBucket(key, 0, emptyBucketInfo.subAggregations));
key = emptyBucketInfo.rounding.nextRoundingValue(key);
if (minDocCount == 0) {
// we need to fill the gaps with empty buckets
while (iter.hasNext()) {
// look ahead on the next bucket without advancing the iter
// so we'll be able to insert elements at the right position
HistogramBase.Bucket nextBucket = list.get(iter.nextIndex());
if (prevBucket != null) {
long key = emptyBucketInfo.rounding.nextRoundingValue(prevBucket.getKey());
while (key != nextBucket.getKey()) {
iter.add(createBucket(key, 0, emptyBucketInfo.subAggregations));
key = emptyBucketInfo.rounding.nextRoundingValue(key);
}
}
prevBucket = iter.next();
}
} else {
while (iter.hasNext()) {
if (iter.next().getDocCount() < minDocCount) {
iter.remove();
}
}
prevBucket = iter.next();
}
if (order != InternalOrder.KEY_ASC && order != InternalOrder.KEY_DESC) {
@ -222,7 +233,9 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
for (int i = 0; i < allocated.length; i++) {
if (allocated[i]) {
Bucket bucket = ((List<Bucket>) buckets[i]).get(0).reduce(((List<Bucket>) buckets[i]), reduceContext.cacheRecycler());
reducedBuckets.add(bucket);
if (bucket.getDocCount() >= minDocCount) {
reducedBuckets.add(bucket);
}
}
}
bucketsByKey.release();
@ -230,7 +243,7 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
// adding empty buckets in needed
if (emptyBucketInfo != null) {
if (minDocCount == 0) {
CollectionUtil.introSort(reducedBuckets, order.asc ? InternalOrder.KEY_ASC.comparator() : InternalOrder.KEY_DESC.comparator());
List<HistogramBase.Bucket> list = order.asc ? reducedBuckets : Lists.reverse(reducedBuckets);
HistogramBase.Bucket prevBucket = null;
@ -268,7 +281,8 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
order = InternalOrder.Streams.readOrder(in);
if (in.readBoolean()) {
minDocCount = in.readVLong();
if (minDocCount == 0) {
emptyBucketInfo = EmptyBucketInfo.readFrom(in);
}
formatter = ValueFormatterStreams.readOptional(in);
@ -286,10 +300,8 @@ abstract class AbstractHistogramBase<B extends HistogramBase.Bucket> extends Int
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
InternalOrder.Streams.writeOrder(order, out);
if (emptyBucketInfo == null) {
out.writeBoolean(false);
} else {
out.writeBoolean(true);
out.writeVLong(minDocCount);
if (minDocCount == 0) {
EmptyBucketInfo.writeTo(emptyBucketInfo, out);
}
ValueFormatterStreams.writeOptional(formatter, out);

View File

@ -86,7 +86,7 @@ public class DateHistogramParser implements Aggregator.Parser {
String scriptLang = null;
Map<String, Object> scriptParams = null;
boolean keyed = false;
boolean computeEmptyBuckets = false;
long minDocCount = 1;
InternalOrder order = (InternalOrder) Histogram.Order.KEY_ASC;
String interval = null;
boolean preZoneAdjustLargeInterval = false;
@ -131,13 +131,17 @@ public class DateHistogramParser implements Aggregator.Parser {
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
if ("keyed".equals(currentFieldName)) {
keyed = parser.booleanValue();
} else if ("compute_empty_buckets".equals(currentFieldName) || "computeEmptyBuckets".equals(currentFieldName)) {
computeEmptyBuckets = parser.booleanValue();
} else if ("script_values_sorted".equals(currentFieldName)) {
assumeSorted = parser.booleanValue();
} else {
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
}
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
minDocCount = parser.longValue();
} else {
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("params".equals(currentFieldName)) {
scriptParams = parser.map();
@ -199,17 +203,17 @@ public class DateHistogramParser implements Aggregator.Parser {
if (searchScript != null) {
ValueParser valueParser = new ValueParser.DateMath(new DateMathParser(DateFieldMapper.Defaults.DATE_TIME_FORMATTER, DateFieldMapper.Defaults.TIME_UNIT));
config.parser(valueParser);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
}
// falling back on the get field data context
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
}
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
if (mapper == null) {
config.unmapped(true);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
}
if (!(mapper instanceof DateFieldMapper)) {
@ -218,7 +222,7 @@ public class DateHistogramParser implements Aggregator.Parser {
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
config.fieldContext(new FieldContext(field, indexFieldData));
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, computeEmptyBuckets, InternalDateHistogram.FACTORY);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
}
private static InternalOrder resolveOrder(String key, boolean asc) {

View File

@ -45,7 +45,7 @@ public class HistogramAggregator extends BucketsAggregator {
private final Rounding rounding;
private final InternalOrder order;
private final boolean keyed;
private final boolean computeEmptyBuckets;
private final long minDocCount;
private final AbstractHistogramBase.Factory histogramFactory;
private final LongHash bucketOrds;
@ -55,7 +55,7 @@ public class HistogramAggregator extends BucketsAggregator {
Rounding rounding,
InternalOrder order,
boolean keyed,
boolean computeEmptyBuckets,
long minDocCount,
@Nullable NumericValuesSource valuesSource,
long initialCapacity,
AbstractHistogramBase.Factory<?> histogramFactory,
@ -67,7 +67,7 @@ public class HistogramAggregator extends BucketsAggregator {
this.rounding = rounding;
this.order = order;
this.keyed = keyed;
this.computeEmptyBuckets = computeEmptyBuckets;
this.minDocCount = minDocCount;
this.histogramFactory = histogramFactory;
bucketOrds = new LongHash(initialCapacity, aggregationContext.pageCacheRecycler());
@ -118,15 +118,15 @@ public class HistogramAggregator extends BucketsAggregator {
// value source will be null for unmapped fields
ValueFormatter formatter = valuesSource != null ? valuesSource.formatter() : null;
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = computeEmptyBuckets ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
return histogramFactory.create(name, buckets, order, emptyBucketInfo, formatter, keyed);
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
return histogramFactory.create(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
}
@Override
public InternalAggregation buildEmptyAggregation() {
ValueFormatter formatter = valuesSource != null ? valuesSource.formatter() : null;
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = computeEmptyBuckets ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
return histogramFactory.create(name, (List<HistogramBase.Bucket>) Collections.EMPTY_LIST, order, emptyBucketInfo, formatter, keyed);
AbstractHistogramBase.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new AbstractHistogramBase.EmptyBucketInfo(rounding, buildEmptySubAggregations()) : null;
return histogramFactory.create(name, Collections.emptyList(), order, minDocCount, emptyBucketInfo, formatter, keyed);
}
@Override
@ -139,28 +139,28 @@ public class HistogramAggregator extends BucketsAggregator {
private final Rounding rounding;
private final InternalOrder order;
private final boolean keyed;
private final boolean computeEmptyBuckets;
private final long minDocCount;
private final AbstractHistogramBase.Factory<?> histogramFactory;
public Factory(String name, ValuesSourceConfig<NumericValuesSource> valueSourceConfig,
Rounding rounding, InternalOrder order, boolean keyed, boolean computeEmptyBuckets, AbstractHistogramBase.Factory<?> histogramFactory) {
Rounding rounding, InternalOrder order, boolean keyed, long minDocCount, AbstractHistogramBase.Factory<?> histogramFactory) {
super(name, histogramFactory.type(), valueSourceConfig);
this.rounding = rounding;
this.order = order;
this.keyed = keyed;
this.computeEmptyBuckets = computeEmptyBuckets;
this.minDocCount = minDocCount;
this.histogramFactory = histogramFactory;
}
@Override
protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, null, 0, histogramFactory, aggregationContext, parent);
return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, null, 0, histogramFactory, aggregationContext, parent);
}
@Override
protected Aggregator create(NumericValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
// todo if we'll keep track of min/max values in IndexFieldData, we could use the max here to come up with a better estimation for the buckets count
return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, valuesSource, 50, histogramFactory, aggregationContext, parent);
return new HistogramAggregator(name, factories, rounding, order, keyed, minDocCount, valuesSource, 50, histogramFactory, aggregationContext, parent);
}
}

View File

@ -32,7 +32,7 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
private Long interval;
private HistogramBase.Order order;
private Boolean computeEmptyBuckets;
private Long minDocCount;
public HistogramBuilder(String name) {
super(name, InternalHistogram.TYPE.name());
@ -48,8 +48,8 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
return this;
}
public HistogramBuilder emptyBuckets(boolean computeEmptyBuckets) {
this.computeEmptyBuckets = computeEmptyBuckets;
public HistogramBuilder minDocCount(long minDocCount) {
this.minDocCount = minDocCount;
return this;
}
@ -65,8 +65,8 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
order.toXContent(builder, params);
}
if (computeEmptyBuckets != null) {
builder.field("empty_buckets", computeEmptyBuckets);
if (minDocCount != null) {
builder.field("min_doc_count", minDocCount);
}
return builder;

View File

@ -54,7 +54,7 @@ public class HistogramParser implements Aggregator.Parser {
String scriptLang = null;
Map<String, Object> scriptParams = null;
boolean keyed = false;
boolean emptyBuckets = false;
long minDocCount = 1;
InternalOrder order = (InternalOrder) InternalOrder.KEY_ASC;
long interval = -1;
boolean assumeSorted = false;
@ -80,14 +80,14 @@ public class HistogramParser implements Aggregator.Parser {
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("interval".equals(currentFieldName)) {
interval = parser.longValue();
} else if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
minDocCount = parser.longValue();
} else {
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
}
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
if ("keyed".equals(currentFieldName)) {
keyed = parser.booleanValue();
} else if ("empty_buckets".equals(currentFieldName) || "emptyBuckets".equals(currentFieldName)) {
emptyBuckets = parser.booleanValue();
} else if ("script_values_sorted".equals(currentFieldName)) {
assumeSorted = parser.booleanValue();
} else {
@ -130,13 +130,13 @@ public class HistogramParser implements Aggregator.Parser {
}
if (field == null) {
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
}
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
if (mapper == null) {
config.unmapped(true);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
}
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
@ -146,7 +146,7 @@ public class HistogramParser implements Aggregator.Parser {
config.formatter(new ValueFormatter.Number.Pattern(format));
}
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, emptyBuckets, InternalHistogram.FACTORY);
return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalHistogram.FACTORY);
}

View File

@ -76,8 +76,8 @@ public class InternalDateHistogram extends AbstractHistogramBase<DateHistogram.B
}
@Override
public AbstractHistogramBase<?> create(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
return new InternalDateHistogram(name, buckets, order, emptyBucketInfo, formatter, keyed);
public AbstractHistogramBase<?> create(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
return new InternalDateHistogram(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
}
@Override
@ -88,8 +88,8 @@ public class InternalDateHistogram extends AbstractHistogramBase<DateHistogram.B
InternalDateHistogram() {} // for serialization
InternalDateHistogram(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
super(name, buckets, order, emptyBucketInfo, formatter, keyed);
InternalDateHistogram(String name, List<DateHistogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
super(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
}
@Override

View File

@ -65,8 +65,8 @@ public class InternalHistogram extends AbstractHistogramBase<Histogram.Bucket> i
return TYPE.name();
}
public AbstractHistogramBase<?> create(String name, List<Histogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
return new InternalHistogram(name, buckets, order, emptyBucketInfo, formatter, keyed);
public AbstractHistogramBase<?> create(String name, List<Histogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
}
public Bucket createBucket(long key, long docCount, InternalAggregations aggregations) {
@ -77,8 +77,8 @@ public class InternalHistogram extends AbstractHistogramBase<Histogram.Bucket> i
public InternalHistogram() {} // for serialization
public InternalHistogram(String name, List<Histogram.Bucket> buckets, InternalOrder order, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
super(name, buckets, order, emptyBucketInfo, formatter, keyed);
public InternalHistogram(String name, List<Histogram.Bucket> buckets, InternalOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo, ValueFormatter formatter, boolean keyed) {
super(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed);
}
@Override

View File

@ -88,12 +88,12 @@ public class DoubleTerms extends InternalTerms {
DoubleTerms() {} // for serialization
public DoubleTerms(String name, InternalOrder order, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
this(name, order, null, requiredSize, buckets);
public DoubleTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
this(name, order, null, requiredSize, minDocCount, buckets);
}
public DoubleTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
super(name, order, requiredSize, buckets);
public DoubleTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
super(name, order, requiredSize, minDocCount, buckets);
this.valueFormatter = valueFormatter;
}
@ -147,7 +147,10 @@ public class DoubleTerms extends InternalTerms {
for (int i = 0; i < states.length; i++) {
if (states[i]) {
List<DoubleTerms.Bucket> sameTermBuckets = (List<DoubleTerms.Bucket>) internalBuckets[i];
ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
final InternalTerms.Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
if (b.getDocCount() >= minDocCount) {
ordered.insertWithOverflow(b);
}
}
}
buckets.release();
@ -165,6 +168,7 @@ public class DoubleTerms extends InternalTerms {
this.order = InternalOrder.Streams.readOrder(in);
this.valueFormatter = ValueFormatterStreams.readOptional(in);
this.requiredSize = in.readVInt();
this.minDocCount = in.readVLong();
int size = in.readVInt();
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
for (int i = 0; i < size; i++) {
@ -180,6 +184,7 @@ public class DoubleTerms extends InternalTerms {
InternalOrder.Streams.writeOrder(order, out);
ValueFormatterStreams.writeOptional(valueFormatter, out);
out.writeVInt(requiredSize);
out.writeVLong(minDocCount);
out.writeVInt(buckets.size());
for (InternalTerms.Bucket bucket : buckets) {
out.writeDouble(((Bucket) bucket).term);

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.AtomicReaderContext;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.search.aggregations.Aggregator;
@ -40,16 +41,18 @@ public class DoubleTermsAggregator extends BucketsAggregator {
private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final long minDocCount;
private final NumericValuesSource valuesSource;
private final LongHash bucketOrds;
public DoubleTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount,
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
this.valuesSource = valuesSource;
this.order = InternalOrder.validate(order, this);
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
}
@ -78,6 +81,21 @@ public class DoubleTermsAggregator extends BucketsAggregator {
@Override
public DoubleTerms buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0;
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
// we need to fill-in the blanks
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
context.setNextReader(ctx);
final DoubleValues values = valuesSource.doubleValues();
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
final int valueCount = values.setDocument(docId);
for (int i = 0; i < valueCount; ++i) {
bucketOrds.add(Double.doubleToLongBits(values.nextValue()));
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), shardSize);
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
@ -104,12 +122,12 @@ public class DoubleTermsAggregator extends BucketsAggregator {
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket;
}
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, Arrays.asList(list));
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Arrays.asList(list));
}
@Override
public DoubleTerms buildEmptyAggregation() {
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, Collections.<InternalTerms.Bucket>emptyList());
return new DoubleTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
}
@Override

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.common.io.stream.Streamable;
@ -78,15 +79,17 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
protected InternalOrder order;
protected int requiredSize;
protected long minDocCount;
protected Collection<Bucket> buckets;
protected Map<String, Bucket> bucketMap;
protected InternalTerms() {} // for serialization
protected InternalTerms(String name, InternalOrder order, int requiredSize, Collection<Bucket> buckets) {
protected InternalTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<Bucket> buckets) {
super(name);
this.order = order;
this.requiredSize = requiredSize;
this.minDocCount = minDocCount;
this.buckets = buckets;
}
@ -156,7 +159,10 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(null));
for (Map.Entry<Text, List<Bucket>> entry : buckets.entrySet()) {
List<Bucket> sameTermBuckets = entry.getValue();
ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
final Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
if (b.docCount >= minDocCount) {
ordered.insertWithOverflow(b);
}
}
Bucket[] list = new Bucket[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; i--) {
@ -166,23 +172,17 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
return reduced;
}
protected void trimExcessEntries() {
if (requiredSize >= buckets.size()) {
return;
}
if (buckets instanceof List) {
buckets = ((List) buckets).subList(0, requiredSize);
return;
}
int i = 0;
for (Iterator<Bucket> iter = buckets.iterator(); iter.hasNext();) {
iter.next();
if (i++ >= requiredSize) {
iter.remove();
final void trimExcessEntries() {
final List<Bucket> newBuckets = Lists.newArrayList();
for (Bucket b : buckets) {
if (newBuckets.size() >= requiredSize) {
break;
}
if (b.docCount >= minDocCount) {
newBuckets.add(b);
}
}
buckets = newBuckets;
}
}

View File

@ -89,8 +89,8 @@ public class LongTerms extends InternalTerms {
LongTerms() {} // for serialization
public LongTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
super(name, order, requiredSize, buckets);
public LongTerms(String name, InternalOrder order, ValueFormatter valueFormatter, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
super(name, order, requiredSize, minDocCount, buckets);
this.valueFormatter = valueFormatter;
}
@ -144,7 +144,10 @@ public class LongTerms extends InternalTerms {
for (int i = 0; i < states.length; i++) {
if (states[i]) {
List<LongTerms.Bucket> sameTermBuckets = (List<LongTerms.Bucket>) internalBuckets[i];
ordered.insertWithOverflow(sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler()));
final InternalTerms.Bucket b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext.cacheRecycler());
if (b.getDocCount() >= minDocCount) {
ordered.insertWithOverflow(b);
}
}
}
buckets.release();
@ -162,6 +165,7 @@ public class LongTerms extends InternalTerms {
this.order = InternalOrder.Streams.readOrder(in);
this.valueFormatter = ValueFormatterStreams.readOptional(in);
this.requiredSize = in.readVInt();
this.minDocCount = in.readVLong();
int size = in.readVInt();
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
for (int i = 0; i < size; i++) {
@ -177,6 +181,7 @@ public class LongTerms extends InternalTerms {
InternalOrder.Streams.writeOrder(order, out);
ValueFormatterStreams.writeOptional(valueFormatter, out);
out.writeVInt(requiredSize);
out.writeVLong(minDocCount);
out.writeVInt(buckets.size());
for (InternalTerms.Bucket bucket : buckets) {
out.writeLong(((Bucket) bucket).term);

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.AtomicReaderContext;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.index.fielddata.LongValues;
import org.elasticsearch.search.aggregations.Aggregator;
@ -40,16 +41,18 @@ public class LongTermsAggregator extends BucketsAggregator {
private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final long minDocCount;
private final NumericValuesSource valuesSource;
private final LongHash bucketOrds;
public LongTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount,
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
this.valuesSource = valuesSource;
this.order = InternalOrder.validate(order, this);
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
}
@ -77,6 +80,21 @@ public class LongTermsAggregator extends BucketsAggregator {
@Override
public LongTerms buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0;
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
// we need to fill-in the blanks
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
context.setNextReader(ctx);
final LongValues values = valuesSource.longValues();
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
final int valueCount = values.setDocument(docId);
for (int i = 0; i < valueCount; ++i) {
bucketOrds.add(values.nextValue());
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), shardSize);
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
@ -103,12 +121,12 @@ public class LongTermsAggregator extends BucketsAggregator {
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket;
}
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, Arrays.asList(list));
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Arrays.asList(list));
}
@Override
public LongTerms buildEmptyAggregation() {
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, Collections.<InternalTerms.Bucket>emptyList());
return new LongTerms(name, order, valuesSource.formatter(), requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
}
@Override

View File

@ -83,8 +83,8 @@ public class StringTerms extends InternalTerms {
StringTerms() {} // for serialization
public StringTerms(String name, InternalOrder order, int requiredSize, Collection<InternalTerms.Bucket> buckets) {
super(name, order, requiredSize, buckets);
public StringTerms(String name, InternalOrder order, int requiredSize, long minDocCount, Collection<InternalTerms.Bucket> buckets) {
super(name, order, requiredSize, minDocCount, buckets);
}
@Override
@ -97,6 +97,7 @@ public class StringTerms extends InternalTerms {
this.name = in.readString();
this.order = InternalOrder.Streams.readOrder(in);
this.requiredSize = in.readVInt();
this.minDocCount = in.readVLong();
int size = in.readVInt();
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
for (int i = 0; i < size; i++) {
@ -111,6 +112,7 @@ public class StringTerms extends InternalTerms {
out.writeString(name);
InternalOrder.Streams.writeOrder(order, out);
out.writeVInt(requiredSize);
out.writeVLong(minDocCount);
out.writeVInt(buckets.size());
for (InternalTerms.Bucket bucket : buckets) {
out.writeBytesRef(((Bucket) bucket).termBytes);

View File

@ -18,30 +18,31 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import com.sun.corba.se.impl.naming.cosnaming.InterOperableNamingImpl;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.UnmodifiableIterator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.collect.Iterators2;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.lucene.ReaderContextAware;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.BytesRefHash;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.metrics.MetricsAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.*;
/**
* An aggregator of string values.
@ -52,11 +53,12 @@ public class StringTermsAggregator extends BucketsAggregator {
private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final long minDocCount;
protected final BytesRefHash bucketOrds;
private final IncludeExclude includeExclude;
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
InternalOrder order, int requiredSize, int shardSize,
InternalOrder order, int requiredSize, int shardSize, long minDocCount,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent);
@ -64,6 +66,7 @@ public class StringTermsAggregator extends BucketsAggregator {
this.order = InternalOrder.validate(order, this);
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
this.includeExclude = includeExclude;
bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.pageCacheRecycler());
}
@ -94,9 +97,122 @@ public class StringTermsAggregator extends BucketsAggregator {
}
}
/** Returns an iterator over the field data terms. */
private static Iterator<BytesRef> terms(final BytesValues.WithOrdinals bytesValues, boolean reverse) {
final Ordinals.Docs ordinals = bytesValues.ordinals();
if (reverse) {
return new UnmodifiableIterator<BytesRef>() {
long i = ordinals.getMaxOrd() - 1;
@Override
public boolean hasNext() {
return i >= Ordinals.MIN_ORDINAL;
}
@Override
public BytesRef next() {
bytesValues.getValueByOrd(i--);
return bytesValues.copyShared();
}
};
} else {
return new UnmodifiableIterator<BytesRef>() {
long i = Ordinals.MIN_ORDINAL;
@Override
public boolean hasNext() {
return i < ordinals.getMaxOrd();
}
@Override
public BytesRef next() {
bytesValues.getValueByOrd(i++);
return bytesValues.copyShared();
}
};
}
}
@Override
public StringTerms buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0;
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) {
// we need to fill-in the blanks
List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList();
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
context.setNextReader(ctx);
final BytesValues values = valuesSource.bytesValues();
if (values instanceof BytesValues.WithOrdinals) {
valuesWithOrdinals.add((BytesValues.WithOrdinals) values);
} else {
// brute force
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
final int valueCount = values.setDocument(docId);
for (int i = 0; i < valueCount; ++i) {
final BytesRef term = values.nextValue();
if (includeExclude == null || includeExclude.accept(term)) {
bucketOrds.add(term, values.currentValueHash());
}
}
}
}
}
// With ordinals we can be smarter and add just as many terms as necessary to the hash table
// For instance, if sorting by term asc, we only need to get the first `requiredSize` terms as other terms would
// either be excluded by the priority queue or at reduce time.
if (valuesWithOrdinals.size() > 0) {
final boolean reverse = order == InternalOrder.TERM_DESC;
Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
if (reverse) {
comparator = Collections.reverseOrder(comparator);
}
Iterator<? extends BytesRef>[] iterators = new Iterator[valuesWithOrdinals.size()];
for (int i = 0; i < valuesWithOrdinals.size(); ++i) {
iterators[i] = terms(valuesWithOrdinals.get(i), reverse);
}
Iterator<BytesRef> terms = Iterators2.mergeSorted(Arrays.asList(iterators), comparator, true);
if (includeExclude != null) {
terms = Iterators.filter(terms, new Predicate<BytesRef>() {
@Override
public boolean apply(BytesRef input) {
return includeExclude.accept(input);
}
});
}
if (order == InternalOrder.COUNT_ASC) {
// let's try to find `shardSize` terms that matched no hit
// this one needs shardSize and not requiredSize because even though terms have a count of 0 here,
// they might have higher counts on other shards
for (int added = 0; added < shardSize && terms.hasNext(); ) {
if (bucketOrds.add(terms.next()) >= 0) {
++added;
}
}
} else if (order == InternalOrder.COUNT_DESC) {
// add terms until there are enough buckets
while (bucketOrds.size() < requiredSize && terms.hasNext()) {
bucketOrds.add(terms.next());
}
} else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) {
// add the `requiredSize` least terms
for (int i = 0; i < requiredSize && terms.hasNext(); ++i) {
bucketOrds.add(terms.next());
}
} else {
// other orders (aggregations) are not optimizable
while (terms.hasNext()) {
bucketOrds.add(terms.next());
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), shardSize);
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
@ -117,12 +233,13 @@ public class StringTermsAggregator extends BucketsAggregator {
bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket;
}
return new StringTerms(name, order, requiredSize, Arrays.asList(list));
return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list));
}
@Override
public StringTerms buildEmptyAggregation() {
return new StringTerms(name, order, requiredSize, Collections.<InternalTerms.Bucket>emptyList());
return new StringTerms(name, order, requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList());
}
@Override
@ -141,8 +258,8 @@ public class StringTermsAggregator extends BucketsAggregator {
private LongArray ordinalToBucket;
public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, long esitmatedBucketCount,
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, null, aggregationContext, parent);
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent);
this.valuesSource = valuesSource;
}

View File

@ -41,21 +41,23 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final long minDocCount;
private final IncludeExclude includeExclude;
private final String executionHint;
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude, String executionHint) {
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, String executionHint) {
super(name, StringTerms.TYPE.name(), valueSourceConfig);
this.order = order;
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
this.includeExclude = includeExclude;
this.executionHint = executionHint;
}
@Override
protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
return new UnmappedTermsAggregator(name, order, requiredSize, aggregationContext, parent);
return new UnmappedTermsAggregator(name, order, requiredSize, minDocCount, aggregationContext, parent);
}
private static boolean hasParentBucketAggregator(Aggregator parent) {
@ -107,11 +109,11 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
if (execution.equals(EXECUTION_HINT_VALUE_ORDINALS)) {
assert includeExclude == null;
final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name,
factories, (BytesValuesSource.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
factories, (BytesValuesSource.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
aggregationContext.registerReaderContextAware(aggregator);
return aggregator;
} else {
return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
}
}
@ -122,9 +124,9 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
if (valuesSource instanceof NumericValuesSource) {
if (((NumericValuesSource) valuesSource).isFloatingPoint()) {
return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
}
return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent);
return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
}
throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + valuesSourceConfig.fieldContext().field() +

View File

@ -33,6 +33,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
private int size = -1;
private int shardSize = -1;
private long minDocCount = -1;
private Terms.ValueType valueType;
private Terms.Order order;
private String includePattern;
@ -62,6 +63,14 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
return this;
}
/**
* Set the minimum document count terms should have in order to appear in the response.
*/
public TermsBuilder minDocCount(long minDocCount) {
this.minDocCount = minDocCount;
return this;
}
/**
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
* on the {@link java.util.regex.Pattern} class.
@ -135,6 +144,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
if (shardSize >= 0) {
builder.field("shard_size", shardSize);
}
if (minDocCount >= 0) {
builder.field("min_doc_count", minDocCount);
}
if (valueType != null) {
builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT));
}

View File

@ -72,6 +72,7 @@ public class TermsParser implements Aggregator.Parser {
String exclude = null;
int excludeFlags = 0; // 0 means no flags
String executionHint = null;
long minDocCount = 1;
XContentParser.Token token;
@ -110,6 +111,8 @@ public class TermsParser implements Aggregator.Parser {
requiredSize = parser.intValue();
} else if ("shard_size".equals(currentFieldName) || "shardSize".equals(currentFieldName)) {
shardSize = parser.intValue();
} else if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) {
minDocCount = parser.intValue();
} else {
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
}
@ -206,14 +209,14 @@ public class TermsParser implements Aggregator.Parser {
if (!assumeUnique) {
config.ensureUnique(true);
}
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
}
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
if (mapper == null) {
ValuesSourceConfig<?> config = new ValuesSourceConfig<BytesValuesSource>(BytesValuesSource.class);
config.unmapped(true);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
}
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
@ -255,7 +258,7 @@ public class TermsParser implements Aggregator.Parser {
config.ensureUnique(true);
}
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude, executionHint);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, minDocCount, includeExclude, executionHint);
}
static InternalOrder resolveOrder(String key, boolean asc) {

View File

@ -53,8 +53,8 @@ public class UnmappedTerms extends InternalTerms {
UnmappedTerms() {} // for serialization
public UnmappedTerms(String name, InternalOrder order, int requiredSize) {
super(name, order, requiredSize, BUCKETS);
public UnmappedTerms(String name, InternalOrder order, int requiredSize, long minDocCount) {
super(name, order, requiredSize, minDocCount, BUCKETS);
}
@Override
@ -67,6 +67,7 @@ public class UnmappedTerms extends InternalTerms {
this.name = in.readString();
this.order = InternalOrder.Streams.readOrder(in);
this.requiredSize = in.readVInt();
this.minDocCount = in.readVLong();
this.buckets = BUCKETS;
this.bucketMap = BUCKETS_MAP;
}
@ -76,6 +77,7 @@ public class UnmappedTerms extends InternalTerms {
out.writeString(name);
InternalOrder.Streams.writeOrder(order, out);
out.writeVInt(requiredSize);
out.writeVLong(minDocCount);
}
@Override

View File

@ -32,11 +32,13 @@ public class UnmappedTermsAggregator extends Aggregator {
private final InternalOrder order;
private final int requiredSize;
private final long minDocCount;
public UnmappedTermsAggregator(String name, InternalOrder order, int requiredSize, AggregationContext aggregationContext, Aggregator parent) {
public UnmappedTermsAggregator(String name, InternalOrder order, int requiredSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) {
super(name, BucketAggregationMode.PER_BUCKET, AggregatorFactories.EMPTY, 0, aggregationContext, parent);
this.order = order;
this.requiredSize = requiredSize;
this.minDocCount = minDocCount;
}
@Override
@ -51,11 +53,11 @@ public class UnmappedTermsAggregator extends Aggregator {
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0;
return new UnmappedTerms(name, order, requiredSize);
return new UnmappedTerms(name, order, requiredSize, minDocCount);
}
@Override
public InternalAggregation buildEmptyAggregation() {
return new UnmappedTerms(name, order, requiredSize);
return new UnmappedTerms(name, order, requiredSize, minDocCount);
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.collect;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.test.ElasticsearchTestCase;
import java.util.Iterator;
import java.util.List;
public class Iterators2Tests extends ElasticsearchTestCase {
public void testDeduplicateSorted() {
final List<String> list = Lists.newArrayList();
for (int i = randomInt(100); i >= 0; --i) {
final int frequency = randomIntBetween(1, 10);
final String s = randomAsciiOfLength(randomIntBetween(2, 20));
for (int j = 0; j < frequency; ++j) {
list.add(s);
}
}
CollectionUtil.introSort(list);
final List<String> deduplicated = Lists.newArrayList();
for (Iterator<String> it = Iterators2.deduplicateSorted(list.iterator(), Ordering.natural()); it.hasNext(); ) {
deduplicated.add(it.next());
}
assertEquals(Lists.newArrayList(Sets.newTreeSet(list)), deduplicated);
}
}

View File

@ -888,7 +888,7 @@ public class DateHistogramTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(dateHistogram("date_histo").interval(1)))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(dateHistogram("date_histo").interval(1)))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

View File

@ -994,7 +994,7 @@ public class DateRangeTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(dateRange("date_range").addRange("0-1", 0, 1)))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(dateRange("date_range").addRange("0-1", 0, 1)))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

View File

@ -600,7 +600,7 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(terms("terms")))
.execute().actionGet();

View File

@ -23,8 +23,8 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.hamcrest.Matchers;
@ -159,7 +159,7 @@ public class FilterTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(filter("filter").filter(matchAllFilter())))
.execute().actionGet();

View File

@ -371,7 +371,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(geoDistance("geo_dist").field("location").point("52.3760, 4.894").addRange("0-100", 0.0, 100.0)))
.execute().actionGet();

View File

@ -756,7 +756,7 @@ public class HistogramTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(histogram("sub_histo").interval(1l)))
.execute().actionGet();

View File

@ -842,7 +842,7 @@ public class IPv4RangeTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(ipRange("ip_range").field("ip").addRange("r1", "10.0.0.1", "10.0.0.10")))
.execute().actionGet();

View File

@ -594,7 +594,7 @@ public class LongTermsTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(terms("terms")))
.execute().actionGet();

View File

@ -0,0 +1,322 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket;
import com.carrotsearch.hppc.LongOpenHashSet;
import com.carrotsearch.hppc.LongSet;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Before;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
public class MinDocCountTests extends ElasticsearchIntegrationTest {
private static final QueryBuilder QUERY = QueryBuilders.termQuery("match", true);
@Override
public Settings indexSettings() {
return ImmutableSettings.builder()
.put("index.number_of_shards", between(1, 5))
.put("index.number_of_replicas", between(0, 1))
.build();
}
private int cardinality;
@Before
public void indexData() throws Exception {
createIndex("idx");
cardinality = randomIntBetween(8, 30);
final List<IndexRequestBuilder> indexRequests = new ArrayList<IndexRequestBuilder>();
final Set<String> stringTerms = new HashSet<String>();
final LongSet longTerms = new LongOpenHashSet();
for (int i = 0; i < cardinality; ++i) {
String stringTerm;
do {
stringTerm = RandomStrings.randomAsciiOfLength(getRandom(), 8);
} while (!stringTerms.add(stringTerm));
long longTerm;
do {
longTerm = randomInt(cardinality * 2);
} while (!longTerms.add(longTerm));
double doubleTerm = longTerm * Math.PI;
final int frequency = randomBoolean() ? 1 : randomIntBetween(2, 20);
for (int j = 0; j < frequency; ++j) {
indexRequests.add(client().prepareIndex("idx", "type").setSource(jsonBuilder().startObject().field("s", stringTerm).field("l", longTerm).field("d", doubleTerm).field("match", randomBoolean()).endObject()));
}
}
cardinality = stringTerms.size();
indexRandom(true, indexRequests);
ensureSearchable();
}
private enum Script {
NO {
@Override
TermsBuilder apply(TermsBuilder builder, String field) {
return builder.field(field);
}
},
YES {
@Override
TermsBuilder apply(TermsBuilder builder, String field) {
return builder.script("doc['" + field + "'].values");
}
};
abstract TermsBuilder apply(TermsBuilder builder, String field);
}
// check that terms2 is a subset of terms1
private void assertSubset(Terms terms1, Terms terms2, long minDocCount, int size, String include) {
final Matcher matcher = include == null ? null : Pattern.compile(include).matcher("");;
final Iterator<Terms.Bucket> it1 = terms1.iterator();
final Iterator<Terms.Bucket> it2 = terms2.iterator();
int size2 = 0;
while (it1.hasNext()) {
final Terms.Bucket bucket1 = it1.next();
if (bucket1.getDocCount() >= minDocCount && (matcher == null || matcher.reset(bucket1.getKey().string()).matches())) {
if (size2++ == size) {
break;
}
assertTrue(it2.hasNext());
final Terms.Bucket bucket2 = it2.next();
assertEquals(bucket1.getKey(), bucket2.getKey());
assertEquals(bucket1.getDocCount(), bucket2.getDocCount());
}
}
assertFalse(it2.hasNext());
}
private void assertSubset(Histogram histo1, Histogram histo2, long minDocCount) {
final Iterator<Histogram.Bucket> it2 = histo2.iterator();
for (Histogram.Bucket b1 : histo1) {
if (b1.getDocCount() >= minDocCount) {
final Histogram.Bucket b2 = it2.next();
assertEquals(b1.getKey(), b2.getKey());
assertEquals(b1.getDocCount(), b2.getDocCount());
}
}
}
public void testStringTermAsc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, Terms.Order.term(true));
}
public void testStringScriptTermAsc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, Terms.Order.term(true));
}
public void testStringTermDesc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, Terms.Order.term(false));
}
public void testStringScriptTermDesc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, Terms.Order.term(false));
}
public void testStringCountAsc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(true));
}
public void testStringScriptCountAsc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(true));
}
public void testStringCountDesc() throws Exception {
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(false));
}
public void testStringScriptCountDesc() throws Exception {
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(false));
}
public void testStringCountAscWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(true), ".*a.*");
}
public void testStringScriptCountAscWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(true), ".*a.*");
}
public void testStringCountDescWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.NO, Terms.Order.count(false), ".*a.*");
}
public void testStringScriptCountDescWithInclude() throws Exception {
testMinDocCountOnTerms("s", Script.YES, Terms.Order.count(false), ".*a.*");
}
public void testLongTermAsc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, Terms.Order.term(true));
}
public void testLongScriptTermAsc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, Terms.Order.term(true));
}
public void testLongTermDesc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, Terms.Order.term(false));
}
public void testLongScriptTermDesc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, Terms.Order.term(false));
}
public void testLongCountAsc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, Terms.Order.count(true));
}
public void testLongScriptCountAsc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, Terms.Order.count(true));
}
public void testLongCountDesc() throws Exception {
testMinDocCountOnTerms("l", Script.NO, Terms.Order.count(false));
}
public void testLongScriptCountDesc() throws Exception {
testMinDocCountOnTerms("l", Script.YES, Terms.Order.count(false));
}
public void testDoubleTermAsc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, Terms.Order.term(true));
}
public void testDoubleScriptTermAsc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, Terms.Order.term(true));
}
public void testDoubleTermDesc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, Terms.Order.term(false));
}
public void testDoubleScriptTermDesc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, Terms.Order.term(false));
}
public void testDoubleCountAsc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, Terms.Order.count(true));
}
public void testDoubleScriptCountAsc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, Terms.Order.count(true));
}
public void testDoubleCountDesc() throws Exception {
testMinDocCountOnTerms("d", Script.NO, Terms.Order.count(false));
}
public void testDoubleScriptCountDesc() throws Exception {
testMinDocCountOnTerms("d", Script.YES, Terms.Order.count(false));
}
private void testMinDocCountOnTerms(String field, Script script, Terms.Order order) throws Exception {
testMinDocCountOnTerms(field, script, order, null);
}
private void testMinDocCountOnTerms(String field, Script script, Terms.Order order, String include) throws Exception {
// all terms
final SearchResponse allTermsResponse = client().prepareSearch("idx").setTypes("type")
.setSearchType(SearchType.COUNT)
.setQuery(QUERY)
.addAggregation(script.apply(terms("terms"), field)
.executionHint(StringTermsTests.randomExecutionHint())
.order(order)
.size(cardinality + randomInt(10))
.minDocCount(0))
.execute().actionGet();
final Terms allTerms = allTermsResponse.getAggregations().get("terms");
assertEquals(cardinality, allTerms.buckets().size());
for (long minDocCount = 0; minDocCount < 20; ++minDocCount) {
final int size = randomIntBetween(1, cardinality + 2);
final SearchResponse response = client().prepareSearch("idx").setTypes("type")
.setSearchType(SearchType.COUNT)
.setQuery(QUERY)
.addAggregation(script.apply(terms("terms"), field)
.executionHint(StringTermsTests.randomExecutionHint())
.order(order)
.size(size)
.include(include)
.shardSize(cardinality + randomInt(10))
.minDocCount(minDocCount))
.execute().actionGet();
assertSubset(allTerms, (Terms) response.getAggregations().get("terms"), minDocCount, size, include);
}
}
public void testHistogramCountAsc() throws Exception {
testMinDocCountOnHistogram(Histogram.Order.COUNT_ASC);
}
public void testHistogramCountDesc() throws Exception {
testMinDocCountOnHistogram(Histogram.Order.COUNT_DESC);
}
public void testHistogramKeyAsc() throws Exception {
testMinDocCountOnHistogram(Histogram.Order.KEY_ASC);
}
public void testHistogramKeyDesc() throws Exception {
testMinDocCountOnHistogram(Histogram.Order.KEY_DESC);
}
private void testMinDocCountOnHistogram(Histogram.Order order) throws Exception {
final int interval = randomIntBetween(1, 3);
final SearchResponse allResponse = client().prepareSearch("idx").setTypes("type")
.setSearchType(SearchType.COUNT)
.setQuery(QUERY)
.addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(0))
.execute().actionGet();
final Histogram allHisto = allResponse.getAggregations().get("histo");
for (long minDocCount = 0; minDocCount < 50; ++minDocCount) {
final SearchResponse response = client().prepareSearch("idx").setTypes("type")
.setSearchType(SearchType.COUNT)
.setQuery(QUERY)
.addAggregation(histogram("histo").field("d").interval(interval).order(order).minDocCount(minDocCount))
.execute().actionGet();
assertSubset(allHisto, (Histogram) response.getAggregations().get("histo"), minDocCount);
}
}
}

View File

@ -202,7 +202,7 @@ public class MissingTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(missing("missing")))
.execute().actionGet();

View File

@ -251,7 +251,7 @@ public class NestedTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(nested("nested").path("nested")))
.execute().actionGet();

View File

@ -921,7 +921,7 @@ public class RangeTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(range("range").addRange("0-2", 0.0, 2.0)))
.execute().actionGet();

View File

@ -66,7 +66,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
.build();
}
private String randomExecutionHint() {
public static String randomExecutionHint() {
return randomFrom(Arrays.asList(null, TermsAggregatorFactory.EXECUTION_HINT_VALUE_MAP, TermsAggregatorFactory.EXECUTION_HINT_VALUE_ORDINALS));
}
@ -758,7 +758,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true)
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0)
.subAggregation(terms("terms")))
.execute().actionGet();

View File

@ -39,7 +39,7 @@ public class AvgTests extends AbstractNumericTests {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(avg("avg")))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(avg("avg")))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

View File

@ -53,7 +53,7 @@ public class ExtendedStatsTests extends AbstractNumericTests {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(extendedStats("stats")))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(extendedStats("stats")))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

View File

@ -39,7 +39,7 @@ public class MaxTests extends AbstractNumericTests {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(max("max")))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(max("max")))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

View File

@ -39,7 +39,7 @@ public class MinTests extends AbstractNumericTests {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(min("min")))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(min("min")))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));

View File

@ -40,7 +40,7 @@ public class StatsTests extends AbstractNumericTests {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(stats("stats")))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(stats("stats")))
.execute().actionGet();
assertShardExecutionState(searchResponse, 0);

View File

@ -39,7 +39,7 @@ public class SumTests extends AbstractNumericTests {
SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx")
.setQuery(matchAllQuery())
.addAggregation(histogram("histo").field("value").interval(1l).emptyBuckets(true).subAggregation(sum("sum")))
.addAggregation(histogram("histo").field("value").interval(1l).minDocCount(0).subAggregation(sum("sum")))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2l));