From e1679bfe5e1b2be0cf9da24c71d36a059163486b Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Thu, 12 Oct 2017 14:56:32 +0100 Subject: [PATCH] Create weights lazily in filter and filters aggregation (#26983) Previous to this change the weights for the filter and filters aggregation were created in the `Filter(s)AggregatorFactory` which meant that they were created regardless of whether the aggregator actually collects any documents. This meant that for filters that are expensive to initialise, requests would not be quick when the query of the request was (or effectively was) a `match_none` query. This change maintains a single Weight instance for each filter across parent buckets but passes a weight supplier to the aggregator instances which will create the weight on first call and then return that instance for subsequent calls. --- .../bucket/filter/FilterAggregator.java | 7 ++-- .../filter/FilterAggregatorFactory.java | 31 +++++++++++++--- .../bucket/filter/FiltersAggregator.java | 10 +++--- .../filter/FiltersAggregatorFactory.java | 36 +++++++++++++++---- .../bucket/nested/NestedAggregator.java | 1 + .../bucket/filter/FilterAggregatorTests.java | 5 +-- .../bucket/filter/FiltersAggregatorTests.java | 2 +- 7 files changed, 69 insertions(+), 23 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregator.java index c5385c68839..fc4ac58fb15 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregator.java @@ -35,16 +35,17 @@ import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.function.Supplier; /** * Aggregate all docs that match a filter. */ public class FilterAggregator extends BucketsAggregator implements SingleBucketAggregator { - private final Weight filter; + private final Supplier filter; public FilterAggregator(String name, - Weight filter, + Supplier filter, AggregatorFactories factories, SearchContext context, Aggregator parent, List pipelineAggregators, @@ -57,7 +58,7 @@ public class FilterAggregator extends BucketsAggregator implements SingleBucketA public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { // no need to provide deleted docs to the filter - final Bits bits = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filter.scorerSupplier(ctx)); + final Bits bits = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filter.get().scorerSupplier(ctx)); return new LeafBucketCollectorBase(sub, null) { @Override public void collect(int doc, long bucket) throws IOException { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorFactory.java index 482bcb3d009..4b54dccbf96 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorFactory.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.search.aggregations.AggregationInitializationException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactory; @@ -35,20 +36,40 @@ import java.util.Map; public class FilterAggregatorFactory extends AggregatorFactory { - final Weight weight; + private Weight weight; + private Query filter; public FilterAggregatorFactory(String name, QueryBuilder filterBuilder, SearchContext context, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { super(name, context, parent, subFactoriesBuilder, metaData); - IndexSearcher contextSearcher = context.searcher(); - Query filter = filterBuilder.toFilter(context.getQueryShardContext()); - weight = contextSearcher.createNormalizedWeight(filter, false); + filter = filterBuilder.toFilter(context.getQueryShardContext()); + } + + /** + * Returns the {@link Weight} for this filter aggregation, creating it if + * necessary. This is done lazily so that the {@link Weight} is only created + * if the aggregation collects documents reducing the overhead of the + * aggregation in teh case where no documents are collected. + * + * Note that as aggregations are initialsed and executed in a serial manner, + * no concurrency considerations are necessary here. + */ + public Weight getWeight() { + if (weight == null) { + IndexSearcher contextSearcher = context.searcher(); + try { + weight = contextSearcher.createNormalizedWeight(filter, false); + } catch (IOException e) { + throw new AggregationInitializationException("Failed to initialse filter", e); + } + } + return weight; } @Override public Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBucket, List pipelineAggregators, Map metaData) throws IOException { - return new FilterAggregator(name, weight, factories, context, parent, pipelineAggregators, metaData); + return new FilterAggregator(name, () -> this.getWeight(), factories, context, parent, pipelineAggregators, metaData); } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index d488d092360..97724aa8b97 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -45,6 +45,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; public class FiltersAggregator extends BucketsAggregator { @@ -115,13 +116,13 @@ public class FiltersAggregator extends BucketsAggregator { } private final String[] keys; - private Weight[] filters; + private Supplier filters; private final boolean keyed; private final boolean showOtherBucket; private final String otherBucketKey; private final int totalNumKeys; - public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Weight[] filters, boolean keyed, + public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Supplier filters, boolean keyed, String otherBucketKey, SearchContext context, Aggregator parent, List pipelineAggregators, Map metaData) throws IOException { super(name, factories, context, parent, pipelineAggregators, metaData); @@ -141,6 +142,7 @@ public class FiltersAggregator extends BucketsAggregator { public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { // no need to provide deleted docs to the filter + Weight[] filters = this.filters.get(); final Bits[] bits = new Bits[filters.length]; for (int i = 0; i < filters.length; ++i) { bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filters[i].scorerSupplier(ctx)); @@ -164,7 +166,7 @@ public class FiltersAggregator extends BucketsAggregator { @Override public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException { - List buckets = new ArrayList<>(filters.length); + List buckets = new ArrayList<>(keys.length); for (int i = 0; i < keys.length; i++) { long bucketOrd = bucketOrd(owningBucketOrdinal, i); InternalFilters.InternalBucket bucket = new InternalFilters.InternalBucket(keys[i], bucketDocCount(bucketOrd), @@ -184,7 +186,7 @@ public class FiltersAggregator extends BucketsAggregator { @Override public InternalAggregation buildEmptyAggregation() { InternalAggregations subAggs = buildEmptySubAggregations(); - List buckets = new ArrayList<>(filters.length); + List buckets = new ArrayList<>(keys.length); for (int i = 0; i < keys.length; i++) { InternalFilters.InternalBucket bucket = new InternalFilters.InternalBucket(keys[i], 0, subAggs, keyed); buckets.add(bucket); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java index 07c7af1d19d..048042f05ff 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; +import org.elasticsearch.search.aggregations.AggregationInitializationException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactory; @@ -36,7 +37,8 @@ import java.util.Map; public class FiltersAggregatorFactory extends AggregatorFactory { private final String[] keys; - final Weight[] weights; + private final Query[] filters; + private Weight[] weights; private final boolean keyed; private final boolean otherBucket; private final String otherBucketKey; @@ -48,21 +50,43 @@ public class FiltersAggregatorFactory extends AggregatorFactory pipelineAggregators, Map metaData) throws IOException { - return new FiltersAggregator(name, factories, keys, weights, keyed, otherBucket ? otherBucketKey : null, context, parent, + return new FiltersAggregator(name, factories, keys, () -> getWeights(), keyed, otherBucket ? otherBucketKey : null, context, parent, pipelineAggregators, metaData); } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index b39bf864ad2..c7d721baa67 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.bucket.nested; import com.carrotsearch.hppc.LongArrayList; + import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.ReaderUtil; diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorTests.java index fb615e66dfb..f3d057d8e8c 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FilterAggregatorTests.java @@ -36,9 +36,6 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.AggregatorTestCase; -import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder; -import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregatorFactory; -import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter; import org.hamcrest.Matchers; import org.junit.Before; @@ -121,7 +118,7 @@ public class FilterAggregatorTests extends AggregatorTestCase { AggregatorFactory factory = createAggregatorFactory(builder, indexSearcher, fieldType); assertThat(factory, Matchers.instanceOf(FilterAggregatorFactory.class)); FilterAggregatorFactory filterFactory = (FilterAggregatorFactory) factory; - Query parsedQuery = filterFactory.weight.getQuery(); + Query parsedQuery = filterFactory.getWeight().getQuery(); assertThat(parsedQuery, Matchers.instanceOf(BooleanQuery.class)); assertEquals(2, ((BooleanQuery) parsedQuery).clauses().size()); // means the bool query has been parsed as a filter, if it was a query minShouldMatch would diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java index 0420e9d5b9b..6fdf207249f 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java @@ -214,7 +214,7 @@ public class FiltersAggregatorTests extends AggregatorTestCase { AggregatorFactory factory = createAggregatorFactory(builder, indexSearcher, fieldType); assertThat(factory, Matchers.instanceOf(FiltersAggregatorFactory.class)); FiltersAggregatorFactory filtersFactory = (FiltersAggregatorFactory) factory; - Query parsedQuery = filtersFactory.weights[0].getQuery(); + Query parsedQuery = filtersFactory.getWeights()[0].getQuery(); assertThat(parsedQuery, Matchers.instanceOf(BooleanQuery.class)); assertEquals(2, ((BooleanQuery) parsedQuery).clauses().size()); // means the bool query has been parsed as a filter, if it was a query minShouldMatch would