From 7e6e9dbb9640b2ced7a6d5dae6a904a632155c4f Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 27 Jan 2015 11:54:11 +0100 Subject: [PATCH] Aggs: nested agg needs to reset root doc between segments. Closes #9437 Closes #9436 --- .../bucket/nested/NestedAggregator.java | 3 + .../bucket/nested/NestedAggregatorTest.java | 137 ++++++++++++++++++ .../elasticsearch/test/TestSearchContext.java | 2 +- 3 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregatorTest.java diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 7e1d229662d..384e01fc8c0 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -79,6 +79,9 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo BitDocIdSetFilter rootDocsFilter = context.searchContext().bitsetFilterCache().getBitDocIdSetFilter(NonNestedDocsFilter.INSTANCE); BitDocIdSet rootDocIdSet = rootDocsFilter.getDocIdSet(reader); rootDocs = rootDocIdSet.bits(); + // We need to reset the current root doc, otherwise we may emit incorrect child docs if the next segment happen to start with the same root doc id value + currentRootDoc = -1; + childDocIdBuffers.clear(); } catch (IOException ioe) { throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe); } diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregatorTest.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregatorTest.java new file mode 100644 index 00000000000..d17fd241e10 --- /dev/null +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregatorTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.nested; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.*; +import org.apache.lucene.queries.TermFilter; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FilterCachingPolicy; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; +import org.elasticsearch.common.compress.CompressedString; +import org.elasticsearch.common.lucene.search.AndFilter; +import org.elasticsearch.common.lucene.search.NotFilter; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.mapper.internal.TypeFieldMapper; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; +import org.elasticsearch.index.search.nested.NonNestedDocsFilter; +import org.elasticsearch.search.aggregations.AggregationPhase; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.test.ElasticsearchSingleNodeLuceneTestCase; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +/** + */ +public class NestedAggregatorTest extends ElasticsearchSingleNodeLuceneTestCase { + + @Test + public void testResetRootDocId() throws Exception { + Directory directory = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(null); + iwc.setMergePolicy(NoMergePolicy.INSTANCE); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc); + + List documents = new ArrayList<>(); + + // 1 segment with, 1 root document, with 3 nested sub docs + Document document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + indexWriter.addDocuments(documents); + indexWriter.commit(); + + documents.clear(); + // 1 segment with: + // 1 document, with 1 nested subdoc + document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + indexWriter.addDocuments(documents); + documents.clear(); + // and 1 document, with 1 nested subdoc + document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + document = new Document(); + document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE)); + document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE)); + documents.add(document); + indexWriter.addDocuments(documents); + + indexWriter.commit(); + indexWriter.close(); + + DirectoryReader directoryReader = DirectoryReader.open(directory); + IndexSearcher searcher = new IndexSearcher(directoryReader); + + IndexService indexService = createIndex("test"); + indexService.mapperService().merge("test", new CompressedString(PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested").string()), true); + AggregationContext context = new AggregationContext(createSearchContext(indexService)); + + AggregatorFactories.Builder builder = AggregatorFactories.builder(); + builder.add(new NestedAggregator.Factory("test", "nested_field", FilterCachingPolicy.ALWAYS_CACHE)); + AggregatorFactories factories = builder.build(); + Aggregator[] aggs = factories.createTopLevelAggregators(context); + AggregationPhase.AggregationsCollector collector = new AggregationPhase.AggregationsCollector(Arrays.asList(aggs), context); + // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here (otherwise MatchAllDocsQuery would be sufficient) + // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc when we process a new segment, because + // root doc type#3 and root doc type#1 have the same segment docid + searcher.search(new ConstantScoreQuery(new AndFilter(Arrays.asList(NonNestedDocsFilter.INSTANCE, new NotFilter(new TermFilter(new Term(UidFieldMapper.NAME, "type#2")))))), collector); + collector.postCollection(); + + Nested nested = (Nested) aggs[0].buildAggregation(0); + // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous child docs from the first segment are emitted as hits. + assertThat(nested.getDocCount(), equalTo(4l)); + + directoryReader.close(); + directory.close(); + } + +} diff --git a/src/test/java/org/elasticsearch/test/TestSearchContext.java b/src/test/java/org/elasticsearch/test/TestSearchContext.java index 55fc2ccb1aa..3bab21cdaa4 100644 --- a/src/test/java/org/elasticsearch/test/TestSearchContext.java +++ b/src/test/java/org/elasticsearch/test/TestSearchContext.java @@ -572,7 +572,7 @@ public class TestSearchContext extends SearchContext { @Override public MapperService.SmartNameObjectMapper smartNameObjectMapper(String name) { - return null; + return mapperService().smartNameObjectMapper(name, types); } @Override