Aggregations: Adds other bucket to filters aggregation

The filters aggregation now has an option to add an 'other' bucket which will, when turned on, contain all documents which do not match any of the defined filters. There is also an option to change the name of the 'other' bucket from the default of '_other_'

Closes #11289
This commit is contained in:
Colin Goodheart-Smithe 2015-06-30 13:54:35 +01:00
parent ab80130c10
commit e366d0380d
5 changed files with 343 additions and 17 deletions

View File

@ -37,6 +37,8 @@ public class FiltersAggregationBuilder extends AggregationBuilder<FiltersAggrega
private Map<String, QueryBuilder> keyedFilters = null;
private List<QueryBuilder> nonKeyedFilters = null;
private Boolean otherBucket;
private String otherBucketKey;
/**
* Sole constructor.
@ -70,6 +72,22 @@ public class FiltersAggregationBuilder extends AggregationBuilder<FiltersAggrega
return this;
}
/**
* Include a bucket for documents not matching any filter
*/
public FiltersAggregationBuilder otherBucket(boolean otherBucket) {
this.otherBucket = otherBucket;
return this;
}
/**
* The key to use for the bucket for documents not matching any filter. Will
* implicitly enable the other bucket if set.
*/
public FiltersAggregationBuilder otherBucketKey(String otherBucketKey) {
this.otherBucketKey = otherBucketKey;
return this;
}
@Override
protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
@ -82,7 +100,7 @@ public class FiltersAggregationBuilder extends AggregationBuilder<FiltersAggrega
}
if (keyedFilters != null) {
builder.startObject("filters");
builder.startObject(FiltersParser.FILTERS_FIELD.getPreferredName());
for (Map.Entry<String, QueryBuilder> entry : keyedFilters.entrySet()) {
builder.field(entry.getKey());
entry.getValue().toXContent(builder, params);
@ -90,13 +108,19 @@ public class FiltersAggregationBuilder extends AggregationBuilder<FiltersAggrega
builder.endObject();
}
if (nonKeyedFilters != null) {
builder.startArray("filters");
builder.startArray(FiltersParser.FILTERS_FIELD.getPreferredName());
for (QueryBuilder filterBuilder : nonKeyedFilters) {
filterBuilder.toXContent(builder, params);
}
builder.endArray();
}
if (otherBucketKey != null) {
builder.field(FiltersParser.OTHER_BUCKET_KEY_FIELD.getPreferredName(), otherBucketKey);
}
if (otherBucket != null) {
builder.field(FiltersParser.OTHER_BUCKET_FIELD.getPreferredName(), otherBucket);
}
return builder.endObject();
}
}

View File

@ -60,14 +60,25 @@ public class FiltersAggregator extends BucketsAggregator {
private final String[] keys;
private final Weight[] filters;
private final boolean keyed;
private final boolean showOtherBucket;
private final String otherBucketKey;
private final int totalNumKeys;
public FiltersAggregator(String name, AggregatorFactories factories, List<KeyedFilter> filters, boolean keyed, AggregationContext aggregationContext,
public FiltersAggregator(String name, AggregatorFactories factories, List<KeyedFilter> filters, boolean keyed, String otherBucketKey,
AggregationContext aggregationContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
this.keyed = keyed;
this.keys = new String[filters.size()];
this.filters = new Weight[filters.size()];
this.showOtherBucket = otherBucketKey != null;
this.otherBucketKey = otherBucketKey;
if (showOtherBucket) {
this.totalNumKeys = filters.size() + 1;
} else {
this.totalNumKeys = filters.size();
}
for (int i = 0; i < filters.size(); ++i) {
KeyedFilter keyedFilter = filters.get(i);
this.keys[i] = keyedFilter.key;
@ -86,11 +97,16 @@ public class FiltersAggregator extends BucketsAggregator {
return new LeafBucketCollectorBase(sub, null) {
@Override
public void collect(int doc, long bucket) throws IOException {
boolean matched = false;
for (int i = 0; i < bits.length; i++) {
if (bits[i].get(doc)) {
collectBucket(sub, doc, bucketOrd(bucket, i));
matched = true;
}
}
if (showOtherBucket && !matched) {
collectBucket(sub, doc, bucketOrd(bucket, bits.length));
}
}
};
}
@ -103,6 +119,13 @@ public class FiltersAggregator extends BucketsAggregator {
InternalFilters.Bucket bucket = new InternalFilters.Bucket(keys[i], bucketDocCount(bucketOrd), bucketAggregations(bucketOrd), keyed);
buckets.add(bucket);
}
// other bucket
if (showOtherBucket) {
long bucketOrd = bucketOrd(owningBucketOrdinal, keys.length);
InternalFilters.Bucket bucket = new InternalFilters.Bucket(otherBucketKey, bucketDocCount(bucketOrd),
bucketAggregations(bucketOrd), keyed);
buckets.add(bucket);
}
return new InternalFilters(name, buckets, keyed, pipelineAggregators(), metaData());
}
@ -118,24 +141,26 @@ public class FiltersAggregator extends BucketsAggregator {
}
final long bucketOrd(long owningBucketOrdinal, int filterOrd) {
return owningBucketOrdinal * filters.length + filterOrd;
return owningBucketOrdinal * totalNumKeys + filterOrd;
}
public static class Factory extends AggregatorFactory {
private final List<KeyedFilter> filters;
private boolean keyed;
private String otherBucketKey;
public Factory(String name, List<KeyedFilter> filters, boolean keyed) {
public Factory(String name, List<KeyedFilter> filters, boolean keyed, String otherBucketKey) {
super(name, InternalFilters.TYPE.name());
this.filters = filters;
this.keyed = keyed;
this.otherBucketKey = otherBucketKey;
}
@Override
public Aggregator createInternal(AggregationContext context, Aggregator parent, boolean collectsFromSingleBucket,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
return new FiltersAggregator(name, factories, filters, keyed, context, parent, pipelineAggregators, metaData);
return new FiltersAggregator(name, factories, filters, keyed, otherBucketKey, context, parent, pipelineAggregators, metaData);
}
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.aggregations.bucket.filters;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.ParsedQuery;
@ -36,6 +37,10 @@ import java.util.List;
*/
public class FiltersParser implements Aggregator.Parser {
public static final ParseField FILTERS_FIELD = new ParseField("filters");
public static final ParseField OTHER_BUCKET_FIELD = new ParseField("other_bucket");
public static final ParseField OTHER_BUCKET_KEY_FIELD = new ParseField("other_bucket_key");
@Override
public String type() {
return InternalFilters.TYPE.name();
@ -49,11 +54,28 @@ public class FiltersParser implements Aggregator.Parser {
XContentParser.Token token = null;
String currentFieldName = null;
Boolean keyed = null;
String otherBucketKey = null;
boolean otherBucket = false;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
if (OTHER_BUCKET_FIELD.match(currentFieldName)) {
otherBucket = parser.booleanValue();
} else {
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: ["
+ currentFieldName + "].", parser.getTokenLocation());
}
} else if (token == XContentParser.Token.VALUE_STRING) {
if (OTHER_BUCKET_KEY_FIELD.match(currentFieldName)) {
otherBucketKey = parser.text();
otherBucket = true;
} else {
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: ["
+ currentFieldName + "].", parser.getTokenLocation());
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("filters".equals(currentFieldName)) {
if (FILTERS_FIELD.match(currentFieldName)) {
keyed = true;
String key = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
@ -69,7 +91,7 @@ public class FiltersParser implements Aggregator.Parser {
+ currentFieldName + "].", parser.getTokenLocation());
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("filters".equals(currentFieldName)) {
if (FILTERS_FIELD.match(currentFieldName)) {
keyed = false;
int idx = 0;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
@ -88,7 +110,11 @@ public class FiltersParser implements Aggregator.Parser {
}
}
return new FiltersAggregator.Factory(aggregationName, filters, keyed);
if (otherBucket && otherBucketKey == null) {
otherBucketKey = "_other_";
}
return new FiltersAggregator.Factory(aggregationName, filters, keyed, otherBucketKey);
}
}

View File

@ -40,7 +40,6 @@ import java.util.List;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.search.aggregations.AggregationBuilders.avg;
import static org.elasticsearch.search.aggregations.AggregationBuilders.filters;
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
@ -55,7 +54,7 @@ import static org.hamcrest.core.IsNull.notNullValue;
@ElasticsearchIntegrationTest.SuiteScopeTest
public class FiltersTests extends ElasticsearchIntegrationTest {
static int numDocs, numTag1Docs, numTag2Docs;
static int numDocs, numTag1Docs, numTag2Docs, numOtherDocs;
@Override
public void setupSuiteScopeCluster() throws Exception {
@ -63,6 +62,7 @@ public class FiltersTests extends ElasticsearchIntegrationTest {
createIndex("idx2");
numDocs = randomIntBetween(5, 20);
numTag1Docs = randomIntBetween(1, numDocs - 1);
numTag2Docs = randomIntBetween(1, numDocs - numTag1Docs);
List<IndexRequestBuilder> builders = new ArrayList<>();
for (int i = 0; i < numTag1Docs; i++) {
XContentBuilder source = jsonBuilder()
@ -76,8 +76,7 @@ public class FiltersTests extends ElasticsearchIntegrationTest {
builders.add(client().prepareIndex("idx", "type", ""+i).setSource(source));
}
}
for (int i = numTag1Docs; i < numDocs; i++) {
numTag2Docs++;
for (int i = numTag1Docs; i < (numTag1Docs + numTag2Docs); i++) {
XContentBuilder source = jsonBuilder()
.startObject()
.field("value", i)
@ -89,6 +88,15 @@ public class FiltersTests extends ElasticsearchIntegrationTest {
builders.add(client().prepareIndex("idx", "type", ""+i).setSource(source));
}
}
for (int i = numTag1Docs + numTag2Docs; i < numDocs; i++) {
numOtherDocs++;
XContentBuilder source = jsonBuilder().startObject().field("value", i).field("tag", "tag3").field("name", "name" + i)
.endObject();
builders.add(client().prepareIndex("idx", "type", "" + i).setSource(source));
if (randomBoolean()) {
builders.add(client().prepareIndex("idx", "type", "" + i).setSource(source));
}
}
prepareCreate("empty_bucket_idx").addMapping("type", "value", "type=integer").execute().actionGet();
for (int i = 0; i < 2; i++) {
builders.add(client().prepareIndex("empty_bucket_idx", "type", ""+i).setSource(jsonBuilder()
@ -188,7 +196,7 @@ public class FiltersTests extends ElasticsearchIntegrationTest {
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag2Docs));
sum = 0;
for (int i = numTag1Docs; i < numDocs; ++i) {
for (int i = numTag1Docs; i < (numTag1Docs + numTag2Docs); ++i) {
sum += i;
}
assertThat(bucket.getAggregations().asList().isEmpty(), is(false));
@ -272,4 +280,169 @@ public class FiltersTests extends ElasticsearchIntegrationTest {
assertThat(bucket.getDocCount(), equalTo((long) numTag2Docs));
}
@Test
public void otherBucket() throws Exception {
SearchResponse response = client()
.prepareSearch("idx")
.addAggregation(
filters("tags").otherBucket(true)
.filter("tag1", termQuery("tag", "tag1"))
.filter("tag2", termQuery("tag", "tag2")))
.execute().actionGet();
assertSearchResponse(response);
Filters filters = response.getAggregations().get("tags");
assertThat(filters, notNullValue());
assertThat(filters.getName(), equalTo("tags"));
assertThat(filters.getBuckets().size(), equalTo(3));
Filters.Bucket bucket = filters.getBucketByKey("tag1");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag1Docs));
bucket = filters.getBucketByKey("tag2");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag2Docs));
bucket = filters.getBucketByKey("_other_");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numOtherDocs));
}
@Test
public void otherNamedBucket() throws Exception {
SearchResponse response = client()
.prepareSearch("idx")
.addAggregation(
filters("tags").otherBucketKey("foobar")
.filter("tag1", termQuery("tag", "tag1"))
.filter("tag2", termQuery("tag", "tag2")))
.execute().actionGet();
assertSearchResponse(response);
Filters filters = response.getAggregations().get("tags");
assertThat(filters, notNullValue());
assertThat(filters.getName(), equalTo("tags"));
assertThat(filters.getBuckets().size(), equalTo(3));
Filters.Bucket bucket = filters.getBucketByKey("tag1");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag1Docs));
bucket = filters.getBucketByKey("tag2");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag2Docs));
bucket = filters.getBucketByKey("foobar");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numOtherDocs));
}
@Test
public void other_nonKeyed() throws Exception {
SearchResponse response = client().prepareSearch("idx")
.addAggregation(
filters("tags").otherBucket(true)
.filter(termQuery("tag", "tag1"))
.filter(termQuery("tag", "tag2")))
.execute().actionGet();
assertSearchResponse(response);
Filters filters = response.getAggregations().get("tags");
assertThat(filters, notNullValue());
assertThat(filters.getName(), equalTo("tags"));
assertThat(filters.getBuckets().size(), equalTo(3));
Collection<? extends Filters.Bucket> buckets = filters.getBuckets();
Iterator<? extends Filters.Bucket> itr = buckets.iterator();
Filters.Bucket bucket = itr.next();
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag1Docs));
bucket = itr.next();
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag2Docs));
bucket = itr.next();
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numOtherDocs));
}
@Test
public void otherWithSubAggregation() throws Exception {
SearchResponse response = client().prepareSearch("idx")
.addAggregation(
filters("tags").otherBucket(true)
.filter("tag1", termQuery("tag", "tag1"))
.filter("tag2", termQuery("tag", "tag2"))
.subAggregation(avg("avg_value").field("value")))
.execute().actionGet();
assertSearchResponse(response);
Filters filters = response.getAggregations().get("tags");
assertThat(filters, notNullValue());
assertThat(filters.getName(), equalTo("tags"));
assertThat(filters.getBuckets().size(), equalTo(3));
Object[] propertiesKeys = (Object[]) filters.getProperty("_key");
Object[] propertiesDocCounts = (Object[]) filters.getProperty("_count");
Object[] propertiesCounts = (Object[]) filters.getProperty("avg_value.value");
Filters.Bucket bucket = filters.getBucketByKey("tag1");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag1Docs));
long sum = 0;
for (int i = 0; i < numTag1Docs; ++i) {
sum += i + 1;
}
assertThat(bucket.getAggregations().asList().isEmpty(), is(false));
Avg avgValue = bucket.getAggregations().get("avg_value");
assertThat(avgValue, notNullValue());
assertThat(avgValue.getName(), equalTo("avg_value"));
assertThat(avgValue.getValue(), equalTo((double) sum / numTag1Docs));
assertThat((String) propertiesKeys[0], equalTo("tag1"));
assertThat((long) propertiesDocCounts[0], equalTo((long) numTag1Docs));
assertThat((double) propertiesCounts[0], equalTo((double) sum / numTag1Docs));
bucket = filters.getBucketByKey("tag2");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numTag2Docs));
sum = 0;
for (int i = numTag1Docs; i < (numTag1Docs + numTag2Docs); ++i) {
sum += i;
}
assertThat(bucket.getAggregations().asList().isEmpty(), is(false));
avgValue = bucket.getAggregations().get("avg_value");
assertThat(avgValue, notNullValue());
assertThat(avgValue.getName(), equalTo("avg_value"));
assertThat(avgValue.getValue(), equalTo((double) sum / numTag2Docs));
assertThat((String) propertiesKeys[1], equalTo("tag2"));
assertThat((long) propertiesDocCounts[1], equalTo((long) numTag2Docs));
assertThat((double) propertiesCounts[1], equalTo((double) sum / numTag2Docs));
bucket = filters.getBucketByKey("_other_");
assertThat(bucket, Matchers.notNullValue());
assertThat(bucket.getDocCount(), equalTo((long) numOtherDocs));
sum = 0;
for (int i = numTag1Docs + numTag2Docs; i < numDocs; ++i) {
sum += i;
}
assertThat(bucket.getAggregations().asList().isEmpty(), is(false));
avgValue = bucket.getAggregations().get("avg_value");
assertThat(avgValue, notNullValue());
assertThat(avgValue.getName(), equalTo("avg_value"));
assertThat(avgValue.getValue(), equalTo((double) sum / numOtherDocs));
assertThat((String) propertiesKeys[2], equalTo("_other_"));
assertThat((long) propertiesDocCounts[2], equalTo((long) numOtherDocs));
assertThat((double) propertiesCounts[2], equalTo((double) sum / numOtherDocs));
}
}

View File

@ -126,3 +126,81 @@ request. The response for this example would be:
}
...
--------------------------------------------------
==== `Other` Bucket
The `other_bucket` parameter can be set to add a bucket to the response which will contain all documents that do
not match any of the given filters. The value of this parameter can be as follows:
`false`:: Does not compute the `other` bucket
`true`:: Returns the `other` bucket bucket either in a bucket (named `_other_` by default) if named filters are being used,
or as the last bucket if anonymous filters are being used
The `other_bucket_key` parameter can be used to set the key for the `other` bucket to a value other than the default `_other_`. Seting
this parameter will implicitly set the `other_bucket` parameter to `true`.
The following snippet shows a response where the `other` bucket is requested to be named `other_messages`.
[source,js]
--------------------------------------------------
{
"aggs" : {
"messages" : {
"filters" : {
"other_bucket": "other_messages",
"filters" : {
"errors" : { "term" : { "body" : "error" }},
"warnings" : { "term" : { "body" : "warning" }}
}
},
"aggs" : {
"monthly" : {
"histogram" : {
"field" : "timestamp",
"interval" : "1M"
}
}
}
}
}
}
--------------------------------------------------
The response would be something like the following:
[source,js]
--------------------------------------------------
...
"aggs" : {
"messages" : {
"buckets" : {
"errors" : {
"doc_count" : 34,
"monthly" : {
"buckets" : [
... // the histogram monthly breakdown
]
}
},
"warnings" : {
"doc_count" : 439,
"monthly" : {
"buckets" : [
... // the histogram monthly breakdown
]
}
},
"other_messages" : {
"doc_count" : 237,
"monthly" : {
"buckets" : [
... // the histogram monthly breakdown
]
}
}
}
}
}
}
...
--------------------------------------------------