From f9c0e0d4c7536a37b638fc804b229709f8bb72d3 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 16 Jan 2015 16:04:17 +0100 Subject: [PATCH] aggs: The `nested` aggregator's parent filter is n't resolved properly in the case the nested agg gets created on the fly for buckets that are constructed during query execution. The fix is the move the parent filter resolving from the nextReader(...) method to the collect(...) method, because only then any parent nested filter's parent filter is then properly instantiated. Closes #9280 Closes #9335 --- .../bucket/nested/NestedAggregator.java | 48 ++++---- .../aggregations/bucket/NestedTests.java | 106 +++++++++++++++++- 2 files changed, 133 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 23d04a8ea88..dbbf05fd465 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -34,7 +34,6 @@ import org.elasticsearch.index.search.nested.NonNestedDocsFilter; import org.elasticsearch.search.aggregations.*; import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.Map; @@ -51,6 +50,8 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo private DocIdSetIterator childDocs; private BitSet parentDocs; + private LeafReaderContext reader; + public NestedAggregator(String name, AggregatorFactories factories, ObjectMapper objectMapper, AggregationContext aggregationContext, Aggregator parentAggregator, Map metaData, FilterCachingPolicy filterCachingPolicy) throws IOException { super(name, factories, aggregationContext, parentAggregator, metaData); this.parentAggregator = parentAggregator; @@ -59,25 +60,10 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo @Override public void setNextReader(LeafReaderContext reader) { - if (parentFilter == null) { - // The aggs are instantiated in reverse, first the most inner nested aggs and lastly the top level aggs - // So at the time a nested 'nested' aggs is parsed its closest parent nested aggs hasn't been constructed. - // So the trick to set at the last moment just before needed and we can use its child filter as the - // parent filter. - Filter parentFilterNotCached = findClosestNestedPath(parentAggregator); - if (parentFilterNotCached == null) { - parentFilterNotCached = NonNestedDocsFilter.INSTANCE; - } - parentFilter = SearchContext.current().bitsetFilterCache().getBitDocIdSetFilter(parentFilterNotCached); - } - + // Reset parentFilter, so we resolve the parentDocs for each new segment being searched + this.parentFilter = null; + this.reader = reader; try { - BitDocIdSet parentSet = parentFilter.getDocIdSet(reader); - if (DocIdSets.isEmpty(parentSet)) { - parentDocs = null; - } else { - parentDocs = parentSet.bits(); - } // In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here. DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null); if (DocIdSets.isEmpty(childDocIdSet)) { @@ -98,6 +84,30 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo if (parentDoc == 0 || childDocs == null) { return; } + if (parentFilter == null) { + // The aggs are instantiated in reverse, first the most inner nested aggs and lastly the top level aggs + // So at the time a nested 'nested' aggs is parsed its closest parent nested aggs hasn't been constructed. + // So the trick is to set at the last moment just before needed and we can use its child filter as the + // parent filter. + + // Additional NOTE: Before this logic was performed in the setNextReader(...) method, but the the assumption + // that aggs instances are constructed in reverse doesn't hold when buckets are constructed lazily during + // aggs execution + Filter parentFilterNotCached = findClosestNestedPath(parentAggregator); + if (parentFilterNotCached == null) { + parentFilterNotCached = NonNestedDocsFilter.INSTANCE; + } + parentFilter = context.searchContext().bitsetFilterCache().getBitDocIdSetFilter(parentFilterNotCached); + BitDocIdSet parentSet = parentFilter.getDocIdSet(reader); + if (DocIdSets.isEmpty(parentSet)) { + // There are no parentDocs in the segment, so return and set childDocs to null, so we exit early for future invocations. + childDocs = null; + return; + } else { + parentDocs = parentSet.bits(); + } + } + int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); int childDocId; if (childDocs.docID() > prevParentDoc) { diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/NestedTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/NestedTests.java index 405391cf86b..4e2a255bf31 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/NestedTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/NestedTests.java @@ -21,8 +21,10 @@ package org.elasticsearch.search.aggregations.bucket; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; +import org.elasticsearch.search.aggregations.bucket.filter.Filter; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.nested.Nested; import org.elasticsearch.search.aggregations.bucket.terms.LongTerms; @@ -39,11 +41,13 @@ import org.junit.Test; import java.util.ArrayList; import java.util.List; +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.index.query.FilterBuilders.termFilter; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.search.aggregations.AggregationBuilders.*; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; import static org.hamcrest.Matchers.*; import static org.hamcrest.core.IsNull.notNullValue; @@ -349,4 +353,102 @@ public class NestedTests extends ElasticsearchIntegrationTest { assertThat(e.getMessage(), containsString("[nested] nested path [incorrect] is not nested")); } } + + @Test + // Test based on: https://github.com/elasticsearch/elasticsearch/issues/9280 + public void testParentFilterResolvedCorrectly() throws Exception { + XContentBuilder mapping = jsonBuilder().startObject().startObject("provider").startObject("properties") + .startObject("comments") + .field("type", "nested") + .startObject("properties") + .startObject("cid").field("type", "long").endObject() + .startObject("identifier").field("type", "string").field("index", "not_analyzed").endObject() + .startObject("tags") + .field("type", "nested") + .startObject("properties") + .startObject("tid").field("type", "long").endObject() + .startObject("name").field("type", "string").field("index", "not_analyzed").endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .startObject("dates") + .field("type", "object") + .startObject("properties") + .startObject("day").field("type", "date").field("format", "dateOptionalTime").endObject() + .startObject("month") + .field("type", "object") + .startObject("properties") + .startObject("end").field("type", "date").field("format", "dateOptionalTime").endObject() + .startObject("start").field("type", "date").field("format", "dateOptionalTime").endObject() + .startObject("label").field("type", "string").field("index", "not_analyzed").endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .endObject().endObject().endObject(); + assertAcked(prepareCreate("idx2") + .setSettings(ImmutableSettings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0)) + .addMapping("provider", mapping)); + + List indexRequests = new ArrayList<>(2); + indexRequests.add(client().prepareIndex("idx2", "provider", "1").setSource("{\"dates\": {\"month\": {\"label\": \"2014-11\", \"end\": \"2014-11-30\", \"start\": \"2014-11-01\"}, \"day\": \"2014-11-30\"}, \"comments\": [{\"cid\": 3,\"identifier\": \"29111\"}, {\"cid\": 4,\"tags\": [{\"tid\" :44,\"name\": \"Roles\"}], \"identifier\": \"29101\"}]}")); + indexRequests.add(client().prepareIndex("idx2", "provider", "2").setSource("{\"dates\": {\"month\": {\"label\": \"2014-12\", \"end\": \"2014-12-31\", \"start\": \"2014-12-01\"}, \"day\": \"2014-12-03\"}, \"comments\": [{\"cid\": 1, \"identifier\": \"29111\"}, {\"cid\": 2,\"tags\": [{\"tid\" : 22, \"name\": \"DataChannels\"}], \"identifier\": \"29101\"}]}")); + indexRandom(true, indexRequests); + + SearchResponse response = client().prepareSearch("idx2").setTypes("provider") + .addAggregation( + terms("startDate").field("dates.month.start").subAggregation( + terms("endDate").field("dates.month.end").subAggregation( + terms("period").field("dates.month.label").subAggregation( + nested("ctxt_idfier_nested").path("comments").subAggregation( + filter("comment_filter").filter(termFilter("comments.identifier", "29111")).subAggregation( + nested("nested_tags").path("comments.tags").subAggregation( + terms("tag").field("comments.tags.name") + ) + ) + ) + ) + ) + ) + ).get(); + assertNoFailures(response); + assertHitCount(response, 2); + + Terms startDate = response.getAggregations().get("startDate"); + assertThat(startDate.getBuckets().size(), equalTo(2)); + Terms.Bucket bucket = startDate.getBucketByKey("1414800000000"); // 2014-11-01T00:00:00.000Z + assertThat(bucket.getDocCount(), equalTo(1l)); + Terms endDate = bucket.getAggregations().get("endDate"); + bucket = endDate.getBucketByKey("1417305600000"); // 2014-11-30T00:00:00.000Z + assertThat(bucket.getDocCount(), equalTo(1l)); + Terms period = bucket.getAggregations().get("period"); + bucket = period.getBucketByKey("2014-11"); + assertThat(bucket.getDocCount(), equalTo(1l)); + Nested comments = bucket.getAggregations().get("ctxt_idfier_nested"); + assertThat(comments.getDocCount(), equalTo(2l)); + Filter filter = comments.getAggregations().get("comment_filter"); + assertThat(filter.getDocCount(), equalTo(1l)); + Nested nestedTags = filter.getAggregations().get("nested_tags"); + assertThat(nestedTags.getDocCount(), equalTo(0l)); // This must be 0 + Terms tags = nestedTags.getAggregations().get("tag"); + assertThat(tags.getBuckets().size(), equalTo(0)); // and this must be empty + + bucket = startDate.getBucketByKey("1417392000000"); // 2014-12-01T00:00:00.000Z + assertThat(bucket.getDocCount(), equalTo(1l)); + endDate = bucket.getAggregations().get("endDate"); + bucket = endDate.getBucketByKey("1419984000000"); // 2014-12-31T00:00:00.000Z + assertThat(bucket.getDocCount(), equalTo(1l)); + period = bucket.getAggregations().get("period"); + bucket = period.getBucketByKey("2014-12"); + assertThat(bucket.getDocCount(), equalTo(1l)); + comments = bucket.getAggregations().get("ctxt_idfier_nested"); + assertThat(comments.getDocCount(), equalTo(2l)); + filter = comments.getAggregations().get("comment_filter"); + assertThat(filter.getDocCount(), equalTo(1l)); + nestedTags = filter.getAggregations().get("nested_tags"); + assertThat(nestedTags.getDocCount(), equalTo(0l)); // This must be 0 + tags = nestedTags.getAggregations().get("tag"); + assertThat(tags.getBuckets().size(), equalTo(0)); // and this must be empty + } }