Filter(s) aggregation should create weights only once.

We have a performance bug that if a filter aggregation is below a terms
aggregation that has a cardinality of 1000, we will call Query.createWeight
1000 times as well. However, Query.createWeight can be a costly operation.
For instance in the case of a TermQuery it will seek the term in every
segment. Instead, we should create the Weight once, and then get as many
iterators as we need from this Weight.

I found this problem while trying to diagnose a performance regression while
upgrading from 1.7 to 2.1[1]. While the problem was not introduced in 2.x, the
fact that 1.7 cached very aggressively had hidden this problem, since you don't
need to seek the term anymore on a cached TermFilter.

Doing things once for every aggregator is not easy with the current API but
I discussed this with Colin and Aggregator factories will need to get an init
method for different reasons, where we will be able to put these steps that
need to be performed only once, no matter haw many aggregators need to be
created.

[1] https://discuss.elastic.co/t/aggregations-in-2-1-0-much-slower-than-1-6-0/38056/26
This commit is contained in:
Adrien Grand 2016-01-14 19:01:54 +01:00
parent dc51dd0056
commit cc41e6e7fe
4 changed files with 88 additions and 15 deletions

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.aggregations.bucket.filter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
@ -45,13 +46,13 @@ public class FilterAggregator extends SingleBucketAggregator {
private final Weight filter;
public FilterAggregator(String name,
Query filter,
Weight filter,
AggregatorFactories factories,
AggregationContext aggregationContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
this.filter = aggregationContext.searchContext().searcher().createNormalizedWeight(filter, false);
this.filter = filter;
}
@Override
@ -89,10 +90,22 @@ public class FilterAggregator extends SingleBucketAggregator {
this.filter = filter;
}
// TODO: refactor in order to initialize the factory once with its parent,
// the context, etc. and then have a no-arg lightweight create method
// (since create may be called thousands of times)
private IndexSearcher searcher;
private Weight weight;
@Override
public Aggregator createInternal(AggregationContext context, Aggregator parent, boolean collectsFromSingleBucket,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
return new FilterAggregator(name, filter, factories, context, parent, pipelineAggregators, metaData);
IndexSearcher contextSearcher = context.searchContext().searcher();
if (searcher != contextSearcher) {
searcher = contextSearcher;
weight = contextSearcher.createNormalizedWeight(filter, false);
}
return new FilterAggregator(name, weight, factories, context, parent, pipelineAggregators, metaData);
}
}

View File

@ -20,6 +20,7 @@
package org.elasticsearch.search.aggregations.bucket.filters;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
@ -57,31 +58,26 @@ public class FiltersAggregator extends BucketsAggregator {
}
private final String[] keys;
private final Weight[] filters;
private Weight[] filters;
private final boolean keyed;
private final boolean showOtherBucket;
private final String otherBucketKey;
private final int totalNumKeys;
public FiltersAggregator(String name, AggregatorFactories factories, List<KeyedFilter> filters, boolean keyed, String otherBucketKey,
public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Weight[] filters, boolean keyed, String otherBucketKey,
AggregationContext aggregationContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
this.keyed = keyed;
this.keys = new String[filters.size()];
this.filters = new Weight[filters.size()];
this.keys = keys;
this.filters = filters;
this.showOtherBucket = otherBucketKey != null;
this.otherBucketKey = otherBucketKey;
if (showOtherBucket) {
this.totalNumKeys = filters.size() + 1;
this.totalNumKeys = keys.length + 1;
} else {
this.totalNumKeys = filters.size();
}
for (int i = 0; i < filters.size(); ++i) {
KeyedFilter keyedFilter = filters.get(i);
this.keys[i] = keyedFilter.key;
this.filters[i] = aggregationContext.searchContext().searcher().createNormalizedWeight(keyedFilter.filter, false);
this.totalNumKeys = keys.length;
}
}
@ -146,6 +142,7 @@ public class FiltersAggregator extends BucketsAggregator {
public static class Factory extends AggregatorFactory {
private final List<KeyedFilter> filters;
private final String[] keys;
private boolean keyed;
private String otherBucketKey;
@ -154,12 +151,33 @@ public class FiltersAggregator extends BucketsAggregator {
this.filters = filters;
this.keyed = keyed;
this.otherBucketKey = otherBucketKey;
this.keys = new String[filters.size()];
for (int i = 0; i < filters.size(); ++i) {
KeyedFilter keyedFilter = filters.get(i);
this.keys[i] = keyedFilter.key;
}
}
// TODO: refactor in order to initialize the factory once with its parent,
// the context, etc. and then have a no-arg lightweight create method
// (since create may be called thousands of times)
private IndexSearcher searcher;
private Weight[] weights;
@Override
public Aggregator createInternal(AggregationContext context, Aggregator parent, boolean collectsFromSingleBucket,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
return new FiltersAggregator(name, factories, filters, keyed, otherBucketKey, context, parent, pipelineAggregators, metaData);
IndexSearcher contextSearcher = context.searchContext().searcher();
if (searcher != contextSearcher) {
searcher = contextSearcher;
weights = new Weight[filters.size()];
for (int i = 0; i < filters.size(); ++i) {
KeyedFilter keyedFilter = filters.get(i);
this.weights[i] = contextSearcher.createNormalizedWeight(keyedFilter.filter, false);
}
}
return new FiltersAggregator(name, factories, keys, weights, keyed, otherBucketKey, context, parent, pipelineAggregators, metaData);
}
}

View File

@ -42,6 +42,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.histogra
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.core.IsNull.notNullValue;
/**
@ -145,6 +146,25 @@ public class FilterIT extends ESIntegTestCase {
assertThat((double) filter.getProperty("avg_value.value"), equalTo((double) sum / numTag1Docs));
}
public void testAsSubAggregation() {
SearchResponse response = client().prepareSearch("idx")
.addAggregation(
histogram("histo").field("value").interval(2L).subAggregation(
filter("filter").filter(matchAllQuery()))).get();
assertSearchResponse(response);
Histogram histo = response.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getBuckets().size(), greaterThanOrEqualTo(1));
for (Histogram.Bucket bucket : histo.getBuckets()) {
Filter filter = bucket.getAggregations().get("filter");
assertThat(filter, notNullValue());
assertEquals(bucket.getDocCount(), filter.getDocCount());
}
}
public void testWithContextBasedSubAggregation() throws Exception {
try {
client().prepareSearch("idx")

View File

@ -44,6 +44,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.filters;
import static org.elasticsearch.search.aggregations.AggregationBuilders.histogram;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.core.IsNull.notNullValue;
@ -205,6 +206,27 @@ public class FiltersIT extends ESIntegTestCase {
assertThat((double) propertiesCounts[1], equalTo((double) sum / numTag2Docs));
}
public void testAsSubAggregation() {
SearchResponse response = client().prepareSearch("idx")
.addAggregation(
histogram("histo").field("value").interval(2L).subAggregation(
filters("filters").filter(matchAllQuery()))).get();
assertSearchResponse(response);
Histogram histo = response.getAggregations().get("histo");
assertThat(histo, notNullValue());
assertThat(histo.getBuckets().size(), greaterThanOrEqualTo(1));
for (Histogram.Bucket bucket : histo.getBuckets()) {
Filters filters = bucket.getAggregations().get("filters");
assertThat(filters, notNullValue());
assertThat(filters.getBuckets().size(), equalTo(1));
Filters.Bucket filterBucket = filters.getBuckets().get(0);
assertEquals(bucket.getDocCount(), filterBucket.getDocCount());
}
}
public void testWithContextBasedSubAggregation() throws Exception {
try {