aggs: The `nested` aggregator's parent filter is n't resolved properly in the case the nested agg gets created on the fly for buckets that are constructed during query execution.

The fix is the move the parent filter resolving from the nextReader(...) method to the collect(...) method, because only then any parent nested filter's parent filter is then properly instantiated.

Closes #9280
Closes #9335
This commit is contained in:
Martijn van Groningen 2015-01-16 16:04:17 +01:00
parent c3f1982f21
commit f9c0e0d4c7
2 changed files with 133 additions and 21 deletions

View File

@ -34,7 +34,6 @@ import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
import org.elasticsearch.search.aggregations.*;
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Map;
@ -51,6 +50,8 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
private DocIdSetIterator childDocs;
private BitSet parentDocs;
private LeafReaderContext reader;
public NestedAggregator(String name, AggregatorFactories factories, ObjectMapper objectMapper, AggregationContext aggregationContext, Aggregator parentAggregator, Map<String, Object> metaData, FilterCachingPolicy filterCachingPolicy) throws IOException {
super(name, factories, aggregationContext, parentAggregator, metaData);
this.parentAggregator = parentAggregator;
@ -59,25 +60,10 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
@Override
public void setNextReader(LeafReaderContext reader) {
if (parentFilter == null) {
// The aggs are instantiated in reverse, first the most inner nested aggs and lastly the top level aggs
// So at the time a nested 'nested' aggs is parsed its closest parent nested aggs hasn't been constructed.
// So the trick to set at the last moment just before needed and we can use its child filter as the
// parent filter.
Filter parentFilterNotCached = findClosestNestedPath(parentAggregator);
if (parentFilterNotCached == null) {
parentFilterNotCached = NonNestedDocsFilter.INSTANCE;
}
parentFilter = SearchContext.current().bitsetFilterCache().getBitDocIdSetFilter(parentFilterNotCached);
}
// Reset parentFilter, so we resolve the parentDocs for each new segment being searched
this.parentFilter = null;
this.reader = reader;
try {
BitDocIdSet parentSet = parentFilter.getDocIdSet(reader);
if (DocIdSets.isEmpty(parentSet)) {
parentDocs = null;
} else {
parentDocs = parentSet.bits();
}
// In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
if (DocIdSets.isEmpty(childDocIdSet)) {
@ -98,6 +84,30 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
if (parentDoc == 0 || childDocs == null) {
return;
}
if (parentFilter == null) {
// The aggs are instantiated in reverse, first the most inner nested aggs and lastly the top level aggs
// So at the time a nested 'nested' aggs is parsed its closest parent nested aggs hasn't been constructed.
// So the trick is to set at the last moment just before needed and we can use its child filter as the
// parent filter.
// Additional NOTE: Before this logic was performed in the setNextReader(...) method, but the the assumption
// that aggs instances are constructed in reverse doesn't hold when buckets are constructed lazily during
// aggs execution
Filter parentFilterNotCached = findClosestNestedPath(parentAggregator);
if (parentFilterNotCached == null) {
parentFilterNotCached = NonNestedDocsFilter.INSTANCE;
}
parentFilter = context.searchContext().bitsetFilterCache().getBitDocIdSetFilter(parentFilterNotCached);
BitDocIdSet parentSet = parentFilter.getDocIdSet(reader);
if (DocIdSets.isEmpty(parentSet)) {
// There are no parentDocs in the segment, so return and set childDocs to null, so we exit early for future invocations.
childDocs = null;
return;
} else {
parentDocs = parentSet.bits();
}
}
int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
int childDocId;
if (childDocs.docID() > prevParentDoc) {

View File

@ -21,8 +21,10 @@ package org.elasticsearch.search.aggregations.bucket;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.aggregations.bucket.nested.Nested;
import org.elasticsearch.search.aggregations.bucket.terms.LongTerms;
@ -39,11 +41,13 @@ import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.search.aggregations.AggregationBuilders.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.Matchers.*;
import static org.hamcrest.core.IsNull.notNullValue;
@ -349,4 +353,102 @@ public class NestedTests extends ElasticsearchIntegrationTest {
assertThat(e.getMessage(), containsString("[nested] nested path [incorrect] is not nested"));
}
}
@Test
// Test based on: https://github.com/elasticsearch/elasticsearch/issues/9280
public void testParentFilterResolvedCorrectly() throws Exception {
XContentBuilder mapping = jsonBuilder().startObject().startObject("provider").startObject("properties")
.startObject("comments")
.field("type", "nested")
.startObject("properties")
.startObject("cid").field("type", "long").endObject()
.startObject("identifier").field("type", "string").field("index", "not_analyzed").endObject()
.startObject("tags")
.field("type", "nested")
.startObject("properties")
.startObject("tid").field("type", "long").endObject()
.startObject("name").field("type", "string").field("index", "not_analyzed").endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.startObject("dates")
.field("type", "object")
.startObject("properties")
.startObject("day").field("type", "date").field("format", "dateOptionalTime").endObject()
.startObject("month")
.field("type", "object")
.startObject("properties")
.startObject("end").field("type", "date").field("format", "dateOptionalTime").endObject()
.startObject("start").field("type", "date").field("format", "dateOptionalTime").endObject()
.startObject("label").field("type", "string").field("index", "not_analyzed").endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject().endObject();
assertAcked(prepareCreate("idx2")
.setSettings(ImmutableSettings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0))
.addMapping("provider", mapping));
List<IndexRequestBuilder> indexRequests = new ArrayList<>(2);
indexRequests.add(client().prepareIndex("idx2", "provider", "1").setSource("{\"dates\": {\"month\": {\"label\": \"2014-11\", \"end\": \"2014-11-30\", \"start\": \"2014-11-01\"}, \"day\": \"2014-11-30\"}, \"comments\": [{\"cid\": 3,\"identifier\": \"29111\"}, {\"cid\": 4,\"tags\": [{\"tid\" :44,\"name\": \"Roles\"}], \"identifier\": \"29101\"}]}"));
indexRequests.add(client().prepareIndex("idx2", "provider", "2").setSource("{\"dates\": {\"month\": {\"label\": \"2014-12\", \"end\": \"2014-12-31\", \"start\": \"2014-12-01\"}, \"day\": \"2014-12-03\"}, \"comments\": [{\"cid\": 1, \"identifier\": \"29111\"}, {\"cid\": 2,\"tags\": [{\"tid\" : 22, \"name\": \"DataChannels\"}], \"identifier\": \"29101\"}]}"));
indexRandom(true, indexRequests);
SearchResponse response = client().prepareSearch("idx2").setTypes("provider")
.addAggregation(
terms("startDate").field("dates.month.start").subAggregation(
terms("endDate").field("dates.month.end").subAggregation(
terms("period").field("dates.month.label").subAggregation(
nested("ctxt_idfier_nested").path("comments").subAggregation(
filter("comment_filter").filter(termFilter("comments.identifier", "29111")).subAggregation(
nested("nested_tags").path("comments.tags").subAggregation(
terms("tag").field("comments.tags.name")
)
)
)
)
)
)
).get();
assertNoFailures(response);
assertHitCount(response, 2);
Terms startDate = response.getAggregations().get("startDate");
assertThat(startDate.getBuckets().size(), equalTo(2));
Terms.Bucket bucket = startDate.getBucketByKey("1414800000000"); // 2014-11-01T00:00:00.000Z
assertThat(bucket.getDocCount(), equalTo(1l));
Terms endDate = bucket.getAggregations().get("endDate");
bucket = endDate.getBucketByKey("1417305600000"); // 2014-11-30T00:00:00.000Z
assertThat(bucket.getDocCount(), equalTo(1l));
Terms period = bucket.getAggregations().get("period");
bucket = period.getBucketByKey("2014-11");
assertThat(bucket.getDocCount(), equalTo(1l));
Nested comments = bucket.getAggregations().get("ctxt_idfier_nested");
assertThat(comments.getDocCount(), equalTo(2l));
Filter filter = comments.getAggregations().get("comment_filter");
assertThat(filter.getDocCount(), equalTo(1l));
Nested nestedTags = filter.getAggregations().get("nested_tags");
assertThat(nestedTags.getDocCount(), equalTo(0l)); // This must be 0
Terms tags = nestedTags.getAggregations().get("tag");
assertThat(tags.getBuckets().size(), equalTo(0)); // and this must be empty
bucket = startDate.getBucketByKey("1417392000000"); // 2014-12-01T00:00:00.000Z
assertThat(bucket.getDocCount(), equalTo(1l));
endDate = bucket.getAggregations().get("endDate");
bucket = endDate.getBucketByKey("1419984000000"); // 2014-12-31T00:00:00.000Z
assertThat(bucket.getDocCount(), equalTo(1l));
period = bucket.getAggregations().get("period");
bucket = period.getBucketByKey("2014-12");
assertThat(bucket.getDocCount(), equalTo(1l));
comments = bucket.getAggregations().get("ctxt_idfier_nested");
assertThat(comments.getDocCount(), equalTo(2l));
filter = comments.getAggregations().get("comment_filter");
assertThat(filter.getDocCount(), equalTo(1l));
nestedTags = filter.getAggregations().get("nested_tags");
assertThat(nestedTags.getDocCount(), equalTo(0l)); // This must be 0
tags = nestedTags.getAggregations().get("tag");
assertThat(tags.getBuckets().size(), equalTo(0)); // and this must be empty
}
}