mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
Aggregations: Return the sum of the doc counts of other buckets.
This commit adds a new field to the response of the terms aggregation called `sum_other_doc_count` which is equal to the sum of the doc counts of the buckets that did not make it to the list of top buckets. It is typically useful to have a sector called eg. `other` when using terms aggregations to build pie charts. Example query and response: ```json GET test/_search?search_type=count { "aggs": { "colors": { "terms": { "field": "color", "size": 3 } } } } ``` ```json { [...], "aggregations": { "colors": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 4, "buckets": [ { "key": "blue", "doc_count": 65 }, { "key": "red", "doc_count": 14 }, { "key": "brown", "doc_count": 3 } ] } } } ``` Close #8213
This commit is contained in:
parent
96e62b3c1b
commit
7ea490dfd1
@ -25,7 +25,9 @@ Response:
|
||||
|
||||
"aggregations" : {
|
||||
"genders" : {
|
||||
"buckets" : [
|
||||
"doc_count_error_upper_bound": 0, <1>
|
||||
"sum_other_doc_count": 0, <2>
|
||||
"buckets" : [ <3>
|
||||
{
|
||||
"key" : "male",
|
||||
"doc_count" : 10
|
||||
@ -40,6 +42,10 @@ Response:
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
<1> an upper bound of the error on the document counts for each term, see <<search-aggregations-bucket-terms-aggregation-approximate-counts,below>>
|
||||
<2> when there are lots of unique terms, elasticsearch only returns the top terms; this number is the sum of the document counts for all buckets that are not part of the response
|
||||
<3> the list of the top buckets, the meaning of `top` being defined by the <<search-aggregations-bucket-terms-aggregation-order,order>>
|
||||
|
||||
By default, the `terms` aggregation will return the buckets for the top ten terms ordered by the `doc_count`. One can
|
||||
change this default behaviour by setting the `size` parameter.
|
||||
|
||||
@ -52,6 +58,7 @@ This means that if the number of unique terms is greater than `size`, the return
|
||||
(it could be that the term counts are slightly off and it could even be that a term that should have been in the top
|
||||
size buckets was not returned). If set to `0`, the `size` will be set to `Integer.MAX_VALUE`.
|
||||
|
||||
[[search-aggregations-bucket-terms-aggregation-approximate-counts]]
|
||||
==== Document counts are approximate
|
||||
|
||||
As described above, the document counts (and the results of any sub aggregations) in the terms aggregation are not always
|
||||
@ -224,6 +231,7 @@ does not return a particular term which appears in the results from another shar
|
||||
aggregation is either sorted by a sub aggregation or in order of ascending document count, the error in the document counts cannot be
|
||||
determined and is given a value of -1 to indicate this.
|
||||
|
||||
[[search-aggregations-bucket-terms-aggregation-order]]
|
||||
==== Order
|
||||
|
||||
The order of the buckets can be customized by setting the `order` parameter. By default, the buckets are ordered by
|
||||
|
@ -45,7 +45,7 @@ abstract class AbstractStringTermsAggregator extends TermsAggregator {
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0);
|
||||
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -163,8 +163,8 @@ public class DoubleTerms extends InternalTerms {
|
||||
|
||||
DoubleTerms() {} // for serialization
|
||||
|
||||
public DoubleTerms(String name, Terms.Order order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
|
||||
public DoubleTerms(String name, Terms.Order order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
|
||||
this.formatter = formatter;
|
||||
}
|
||||
|
||||
@ -174,8 +174,8 @@ public class DoubleTerms extends InternalTerms {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
return new DoubleTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
|
||||
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
return new DoubleTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -197,6 +197,9 @@ public class DoubleTerms extends InternalTerms {
|
||||
this.showTermDocCountError = false;
|
||||
}
|
||||
this.minDocCount = in.readVLong();
|
||||
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
this.otherDocCount = in.readVLong();
|
||||
}
|
||||
int size = in.readVInt();
|
||||
List<InternalTerms.Bucket> buckets = new ArrayList<>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
@ -222,6 +225,9 @@ public class DoubleTerms extends InternalTerms {
|
||||
out.writeBoolean(showTermDocCountError);
|
||||
}
|
||||
out.writeVLong(minDocCount);
|
||||
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
out.writeVLong(otherDocCount);
|
||||
}
|
||||
out.writeVInt(buckets.size());
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
bucket.writeTo(out);
|
||||
@ -231,6 +237,7 @@ public class DoubleTerms extends InternalTerms {
|
||||
@Override
|
||||
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME, docCountError);
|
||||
builder.field(SUM_OF_OTHER_DOC_COUNTS, otherDocCount);
|
||||
builder.startArray(CommonFields.BUCKETS);
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
bucket.toXContent(builder, params);
|
||||
|
@ -70,7 +70,7 @@ public class DoubleTermsAggregator extends LongTermsAggregator {
|
||||
for (int i = 0; i < buckets.length; ++i) {
|
||||
buckets[i] = convertToDouble(buckets[i]);
|
||||
}
|
||||
return new DoubleTerms(terms.getName(), terms.order, terms.formatter, terms.requiredSize, terms.shardSize, terms.minDocCount, Arrays.asList(buckets), terms.showTermDocCountError, terms.docCountError);
|
||||
return new DoubleTerms(terms.getName(), terms.order, terms.formatter, terms.requiredSize, terms.shardSize, terms.minDocCount, Arrays.asList(buckets), terms.showTermDocCountError, terms.docCountError, terms.otherDocCount);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -154,6 +154,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
} else {
|
||||
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
|
||||
}
|
||||
long otherDocCount = 0;
|
||||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
|
||||
OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
|
||||
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
|
||||
@ -165,6 +166,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
|
||||
continue;
|
||||
}
|
||||
otherDocCount += bucketDocCount;
|
||||
spare.globalOrd = globalTermOrd;
|
||||
spare.bucketOrd = bucketOrd;
|
||||
spare.docCount = bucketDocCount;
|
||||
@ -186,6 +188,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
copy(globalOrds.lookupOrd(bucket.globalOrd), scratch);
|
||||
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0);
|
||||
list[i].bucketOrd = bucket.bucketOrd;
|
||||
otherDocCount -= list[i].docCount;
|
||||
}
|
||||
//replay any deferred collections
|
||||
runDeferredCollections(survivingBucketOrds);
|
||||
@ -197,7 +200,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
bucket.docCountError = 0;
|
||||
}
|
||||
|
||||
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0);
|
||||
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0, otherDocCount);
|
||||
}
|
||||
|
||||
/** This is used internally only, just for compare using global ordinal instead of term bytes in the PQ */
|
||||
|
@ -39,6 +39,7 @@ import java.util.*;
|
||||
public abstract class InternalTerms extends InternalAggregation implements Terms, ToXContent, Streamable {
|
||||
|
||||
protected static final String DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME = "doc_count_error_upper_bound";
|
||||
protected static final String SUM_OF_OTHER_DOC_COUNTS = "sum_other_doc_count";
|
||||
|
||||
public static abstract class Bucket extends Terms.Bucket {
|
||||
|
||||
@ -112,10 +113,11 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
||||
protected Map<String, Bucket> bucketMap;
|
||||
protected long docCountError;
|
||||
protected boolean showTermDocCountError;
|
||||
protected long otherDocCount;
|
||||
|
||||
protected InternalTerms() {} // for serialization
|
||||
|
||||
protected InternalTerms(String name, Terms.Order order, int requiredSize, int shardSize, long minDocCount, List<Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
protected InternalTerms(String name, Terms.Order order, int requiredSize, int shardSize, long minDocCount, List<Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
super(name);
|
||||
this.order = order;
|
||||
this.requiredSize = requiredSize;
|
||||
@ -124,6 +126,7 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
||||
this.buckets = buckets;
|
||||
this.showTermDocCountError = showTermDocCountError;
|
||||
this.docCountError = docCountError;
|
||||
this.otherDocCount = otherDocCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -147,14 +150,21 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
||||
return docCountError;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumOfOtherDocCounts() {
|
||||
return otherDocCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation reduce(ReduceContext reduceContext) {
|
||||
List<InternalAggregation> aggregations = reduceContext.aggregations();
|
||||
|
||||
Multimap<Object, InternalTerms.Bucket> buckets = ArrayListMultimap.create();
|
||||
long sumDocCountError = 0;
|
||||
long otherDocCount = 0;
|
||||
for (InternalAggregation aggregation : aggregations) {
|
||||
InternalTerms terms = (InternalTerms) aggregation;
|
||||
otherDocCount += terms.getSumOfOtherDocCounts();
|
||||
final long thisAggDocCountError;
|
||||
if (terms.buckets.size() < this.shardSize || this.order == InternalOrder.TERM_ASC || this.order == InternalOrder.TERM_DESC) {
|
||||
thisAggDocCountError = 0;
|
||||
@ -190,7 +200,10 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
||||
}
|
||||
}
|
||||
if (b.docCount >= minDocCount) {
|
||||
ordered.insertWithOverflow(b);
|
||||
Terms.Bucket removed = ordered.insertWithOverflow(b);
|
||||
if (removed != null) {
|
||||
otherDocCount += removed.getDocCount();
|
||||
}
|
||||
}
|
||||
}
|
||||
Bucket[] list = new Bucket[ordered.size()];
|
||||
@ -203,9 +216,9 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
||||
} else {
|
||||
docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
|
||||
}
|
||||
return newAggregation(name, Arrays.asList(list), showTermDocCountError, docCountError);
|
||||
return newAggregation(name, Arrays.asList(list), showTermDocCountError, docCountError, otherDocCount);
|
||||
}
|
||||
|
||||
protected abstract InternalTerms newAggregation(String name, List<Bucket> buckets, boolean showTermDocCountError, long docCountError);
|
||||
protected abstract InternalTerms newAggregation(String name, List<Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount);
|
||||
|
||||
}
|
||||
|
@ -163,8 +163,8 @@ public class LongTerms extends InternalTerms {
|
||||
|
||||
LongTerms() {} // for serialization
|
||||
|
||||
public LongTerms(String name, Terms.Order order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
|
||||
public LongTerms(String name, Terms.Order order, @Nullable ValueFormatter formatter, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
|
||||
this.formatter = formatter;
|
||||
}
|
||||
|
||||
@ -174,8 +174,8 @@ public class LongTerms extends InternalTerms {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
return new LongTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
|
||||
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
return new LongTerms(name, order, formatter, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -197,6 +197,9 @@ public class LongTerms extends InternalTerms {
|
||||
this.showTermDocCountError = false;
|
||||
}
|
||||
this.minDocCount = in.readVLong();
|
||||
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
this.otherDocCount = in.readVLong();
|
||||
}
|
||||
int size = in.readVInt();
|
||||
List<InternalTerms.Bucket> buckets = new ArrayList<>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
@ -222,6 +225,9 @@ public class LongTerms extends InternalTerms {
|
||||
out.writeBoolean(showTermDocCountError);
|
||||
}
|
||||
out.writeVLong(minDocCount);
|
||||
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
out.writeVLong(otherDocCount);
|
||||
}
|
||||
out.writeVInt(buckets.size());
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
bucket.writeTo(out);
|
||||
@ -231,6 +237,7 @@ public class LongTerms extends InternalTerms {
|
||||
@Override
|
||||
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME, docCountError);
|
||||
builder.field(SUM_OF_OTHER_DOC_COUNTS, otherDocCount);
|
||||
builder.startArray(CommonFields.BUCKETS);
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
bucket.toXContent(builder, params);
|
||||
|
@ -122,6 +122,7 @@ public class LongTermsAggregator extends TermsAggregator {
|
||||
|
||||
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
|
||||
|
||||
long otherDocCount = 0;
|
||||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
|
||||
LongTerms.Bucket spare = null;
|
||||
for (long i = 0; i < bucketOrds.size(); i++) {
|
||||
@ -130,6 +131,7 @@ public class LongTermsAggregator extends TermsAggregator {
|
||||
}
|
||||
spare.term = bucketOrds.get(i);
|
||||
spare.docCount = bucketDocCount(i);
|
||||
otherDocCount += spare.docCount;
|
||||
spare.bucketOrd = i;
|
||||
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
|
||||
spare = (LongTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
@ -143,6 +145,7 @@ public class LongTermsAggregator extends TermsAggregator {
|
||||
final LongTerms.Bucket bucket = (LongTerms.Bucket) ordered.pop();
|
||||
survivingBucketOrds[i] = bucket.bucketOrd;
|
||||
list[i] = bucket;
|
||||
otherDocCount -= bucket.docCount;
|
||||
}
|
||||
|
||||
runDeferredCollections(survivingBucketOrds);
|
||||
@ -153,13 +156,13 @@ public class LongTermsAggregator extends TermsAggregator {
|
||||
list[i].docCountError = 0;
|
||||
}
|
||||
|
||||
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0);
|
||||
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0, otherDocCount);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0);
|
||||
return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -159,8 +159,8 @@ public class StringTerms extends InternalTerms {
|
||||
|
||||
StringTerms() {} // for serialization
|
||||
|
||||
public StringTerms(String name, Terms.Order order, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
|
||||
public StringTerms(String name, Terms.Order order, int requiredSize, int shardSize, long minDocCount, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
super(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -169,8 +169,8 @@ public class StringTerms extends InternalTerms {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
return new StringTerms(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError);
|
||||
protected InternalTerms newAggregation(String name, List<InternalTerms.Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
return new StringTerms(name, order, requiredSize, shardSize, minDocCount, buckets, showTermDocCountError, docCountError, otherDocCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -191,6 +191,9 @@ public class StringTerms extends InternalTerms {
|
||||
this.showTermDocCountError = false;
|
||||
}
|
||||
this.minDocCount = in.readVLong();
|
||||
if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
this.otherDocCount = in.readVLong();
|
||||
}
|
||||
int size = in.readVInt();
|
||||
List<InternalTerms.Bucket> buckets = new ArrayList<>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
@ -215,6 +218,9 @@ public class StringTerms extends InternalTerms {
|
||||
out.writeBoolean(showTermDocCountError);
|
||||
}
|
||||
out.writeVLong(minDocCount);
|
||||
if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
|
||||
out.writeVLong(otherDocCount);
|
||||
}
|
||||
out.writeVInt(buckets.size());
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
bucket.writeTo(out);
|
||||
@ -224,6 +230,7 @@ public class StringTerms extends InternalTerms {
|
||||
@Override
|
||||
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME, docCountError);
|
||||
builder.field(SUM_OF_OTHER_DOC_COUNTS, otherDocCount);
|
||||
builder.startArray(CommonFields.BUCKETS);
|
||||
for (InternalTerms.Bucket bucket : buckets) {
|
||||
bucket.toXContent(builder, params);
|
||||
|
@ -120,6 +120,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
||||
|
||||
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
|
||||
|
||||
long otherDocCount = 0;
|
||||
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
|
||||
StringTerms.Bucket spare = null;
|
||||
for (int i = 0; i < bucketOrds.size(); i++) {
|
||||
@ -128,6 +129,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
||||
}
|
||||
bucketOrds.get(i, spare.termBytes);
|
||||
spare.docCount = bucketDocCount(i);
|
||||
otherDocCount += spare.docCount;
|
||||
spare.bucketOrd = i;
|
||||
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
|
||||
spare = (StringTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
@ -141,6 +143,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
||||
final StringTerms.Bucket bucket = (StringTerms.Bucket) ordered.pop();
|
||||
survivingBucketOrds[i] = bucket.bucketOrd;
|
||||
list[i] = bucket;
|
||||
otherDocCount -= bucket.docCount;
|
||||
}
|
||||
// replay any deferred collections
|
||||
runDeferredCollections(survivingBucketOrds);
|
||||
@ -153,12 +156,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
||||
bucket.docCountError = 0;
|
||||
}
|
||||
|
||||
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public InternalAggregation buildEmptyAggregation() {
|
||||
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0);
|
||||
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0, otherDocCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -86,6 +86,12 @@ public interface Terms extends MultiBucketsAggregation {
|
||||
*/
|
||||
long getDocCountError();
|
||||
|
||||
/**
|
||||
* Return the sum of the document counts of all buckets that did not make
|
||||
* it to the top buckets.
|
||||
*/
|
||||
long getSumOfOtherDocCounts();
|
||||
|
||||
/**
|
||||
* Determines the order by which the term buckets will be sorted
|
||||
*/
|
||||
|
@ -55,7 +55,7 @@ public class UnmappedTerms extends InternalTerms {
|
||||
UnmappedTerms() {} // for serialization
|
||||
|
||||
public UnmappedTerms(String name, Terms.Order order, int requiredSize, int shardSize, long minDocCount) {
|
||||
super(name, order, requiredSize, shardSize, minDocCount, BUCKETS, false, 0);
|
||||
super(name, order, requiredSize, shardSize, minDocCount, BUCKETS, false, 0, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -93,13 +93,14 @@ public class UnmappedTerms extends InternalTerms {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected InternalTerms newAggregation(String name, List<Bucket> buckets, boolean showTermDocCountError, long docCountError) {
|
||||
protected InternalTerms newAggregation(String name, List<Bucket> buckets, boolean showTermDocCountError, long docCountError, long otherDocCount) {
|
||||
throw new UnsupportedOperationException("How did you get there?");
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(InternalTerms.DOC_COUNT_ERROR_UPPER_BOUND_FIELD_NAME, docCountError);
|
||||
builder.field(SUM_OF_OTHER_DOC_COUNTS, 0);
|
||||
builder.startArray(CommonFields.BUCKETS).endArray();
|
||||
return builder;
|
||||
}
|
||||
|
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket;
|
||||
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory.ExecutionMode;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.junit.Ignore;
|
||||
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
|
||||
|
||||
@Ignore
|
||||
public abstract class AbstractTermsTests extends ElasticsearchIntegrationTest {
|
||||
|
||||
public String randomExecutionHint() {
|
||||
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
|
||||
}
|
||||
|
||||
private static long sumOfDocCounts(Terms terms) {
|
||||
long sumOfDocCounts = terms.getSumOfOtherDocCounts();
|
||||
for (Terms.Bucket b : terms.getBuckets()) {
|
||||
sumOfDocCounts += b.getDocCount();
|
||||
}
|
||||
return sumOfDocCounts;
|
||||
}
|
||||
|
||||
public void testOtherDocCount(String... fieldNames) {
|
||||
for (String fieldName : fieldNames) {
|
||||
SearchResponse allTerms = client().prepareSearch("idx")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field(fieldName)
|
||||
.size(0)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.get();
|
||||
assertSearchResponse(allTerms);
|
||||
|
||||
Terms terms = allTerms.getAggregations().get("terms");
|
||||
assertEquals(0, terms.getSumOfOtherDocCounts()); // size is 0
|
||||
final long sumOfDocCounts = sumOfDocCounts(terms);
|
||||
final int totalNumTerms = terms.getBuckets().size();
|
||||
|
||||
for (int size = 1; size < totalNumTerms + 2; size += randomIntBetween(1, 5)) {
|
||||
for (int shardSize = size; shardSize <= totalNumTerms + 2; shardSize += randomIntBetween(1, 5)) {
|
||||
SearchResponse resp = client().prepareSearch("idx")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field(fieldName)
|
||||
.size(size)
|
||||
.shardSize(shardSize)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.get();
|
||||
assertSearchResponse(resp);
|
||||
terms = resp.getAggregations().get("terms");
|
||||
assertEquals(Math.min(size, totalNumTerms), terms.getBuckets().size());
|
||||
assertEquals(sumOfDocCounts, sumOfDocCounts(terms));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -53,7 +53,7 @@ import static org.hamcrest.core.IsNull.notNullValue;
|
||||
*
|
||||
*/
|
||||
@ElasticsearchIntegrationTest.SuiteScopeTest
|
||||
public class DoubleTermsTests extends ElasticsearchIntegrationTest {
|
||||
public class DoubleTermsTests extends AbstractTermsTests {
|
||||
|
||||
private static final int NUM_DOCS = 5; // TODO: randomize the size?
|
||||
private static final String SINGLE_VALUED_FIELD_NAME = "d_value";
|
||||
@ -1265,4 +1265,8 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void otherDocCount() {
|
||||
testOtherDocCount(SINGLE_VALUED_FIELD_NAME, MULTI_VALUED_FIELD_NAME);
|
||||
}
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ import static org.hamcrest.core.IsNull.notNullValue;
|
||||
*
|
||||
*/
|
||||
@ElasticsearchIntegrationTest.SuiteScopeTest
|
||||
public class LongTermsTests extends ElasticsearchIntegrationTest {
|
||||
public class LongTermsTests extends AbstractTermsTests {
|
||||
|
||||
private static final int NUM_DOCS = 5; // TODO randomize the size?
|
||||
private static final String SINGLE_VALUED_FIELD_NAME = "l_value";
|
||||
@ -1238,4 +1238,8 @@ public class LongTermsTests extends ElasticsearchIntegrationTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void otherDocCount() {
|
||||
testOtherDocCount(SINGLE_VALUED_FIELD_NAME, MULTI_VALUED_FIELD_NAME);
|
||||
}
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllS
|
||||
|
||||
@ElasticsearchIntegrationTest.SuiteScopeTest
|
||||
@TestLogging("action.admin.indices.refresh:TRACE,action.search.type:TRACE,cluster.service:TRACE")
|
||||
public class MinDocCountTests extends ElasticsearchIntegrationTest {
|
||||
public class MinDocCountTests extends AbstractTermsTests {
|
||||
|
||||
private static final QueryBuilder QUERY = QueryBuilders.termQuery("match", true);
|
||||
|
||||
@ -275,7 +275,7 @@ public class MinDocCountTests extends ElasticsearchIntegrationTest {
|
||||
.setQuery(QUERY)
|
||||
.addAggregation(script.apply(terms("terms"), field)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.executionHint(StringTermsTests.randomExecutionHint())
|
||||
.executionHint(randomExecutionHint())
|
||||
.order(order)
|
||||
.size(cardinality + randomInt(10))
|
||||
.minDocCount(0))
|
||||
@ -292,7 +292,7 @@ public class MinDocCountTests extends ElasticsearchIntegrationTest {
|
||||
.setQuery(QUERY)
|
||||
.addAggregation(script.apply(terms("terms"), field)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||
.executionHint(StringTermsTests.randomExecutionHint())
|
||||
.executionHint(randomExecutionHint())
|
||||
.order(order)
|
||||
.size(size)
|
||||
.include(include)
|
||||
|
@ -255,9 +255,9 @@ public class SignificantTermsSignificanceScoreTests extends ElasticsearchIntegra
|
||||
classes.toXContent(responseBuilder, null);
|
||||
String result = null;
|
||||
if (type.equals("long")) {
|
||||
result = "\"class\"{\"doc_count_error_upper_bound\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":0,\"key_as_string\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":1,\"key_as_string\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}";
|
||||
result = "\"class\"{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":0,\"key_as_string\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":1,\"key_as_string\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}";
|
||||
} else {
|
||||
result = "\"class\"{\"doc_count_error_upper_bound\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}";
|
||||
result = "\"class\"{\"doc_count_error_upper_bound\":0,\"sum_other_doc_count\":0,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"sig_terms\":{\"doc_count\":4,\"buckets\":[{\"key\":\"0\",\"doc_count\":4,\"score\":0.39999999999999997,\"bg_count\":5}]}},{\"key\":\"1\",\"doc_count\":3,\"sig_terms\":{\"doc_count\":3,\"buckets\":[{\"key\":\"1\",\"doc_count\":3,\"score\":0.75,\"bg_count\":4}]}}]}";
|
||||
}
|
||||
assertThat(responseBuilder.string(), equalTo(result));
|
||||
|
||||
|
@ -58,16 +58,12 @@ import static org.hamcrest.core.IsNull.nullValue;
|
||||
*
|
||||
*/
|
||||
@ElasticsearchIntegrationTest.SuiteScopeTest
|
||||
public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||
public class StringTermsTests extends AbstractTermsTests {
|
||||
|
||||
private static final String SINGLE_VALUED_FIELD_NAME = "s_value";
|
||||
private static final String MULTI_VALUED_FIELD_NAME = "s_values";
|
||||
private static Map<String, Map<String, Object>> expectedMultiSortBuckets;
|
||||
|
||||
public static String randomExecutionHint() {
|
||||
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setupSuiteScopeCluster() throws Exception {
|
||||
createIndex("idx");
|
||||
@ -248,7 +244,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);System.out.println(response);
|
||||
assertSearchResponse(response);
|
||||
|
||||
Terms terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
@ -1678,4 +1674,9 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
||||
terms = response.getAggregations().get("terms");
|
||||
assertEquals(5L, terms.getBucketByKey("i").getDocCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void otherDocCount() {
|
||||
testOtherDocCount(SINGLE_VALUED_FIELD_NAME, MULTI_VALUED_FIELD_NAME);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user