From 13b64cc362772f378a2c87258c3ca8c9e71aa62c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 3 Feb 2015 16:40:49 +0100 Subject: [PATCH] Aggs: Make the nested aggregation call sub aggregators with doc IDs in order. Close #9547 --- .../bucket/nested/NestedAggregator.java | 75 ++++--------------- .../bucket/ReverseNestedTests.java | 1 - 2 files changed, 13 insertions(+), 63 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 384e01fc8c0..39c9df903ee 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -18,8 +18,6 @@ */ package org.elasticsearch.search.aggregations.bucket.nested; -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.IntObjectOpenHashMap; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; @@ -53,10 +51,6 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo private BitSet parentDocs; private LeafReaderContext reader; - private BitSet rootDocs; - private int currentRootDoc = -1; - private final IntObjectOpenHashMap childDocIdBuffers = new IntObjectOpenHashMap<>(); - public NestedAggregator(String name, AggregatorFactories factories, ObjectMapper objectMapper, AggregationContext aggregationContext, Aggregator parentAggregator, Map metaData, FilterCachingPolicy filterCachingPolicy) throws IOException { super(name, factories, aggregationContext, parentAggregator, metaData); this.parentAggregator = parentAggregator; @@ -76,12 +70,6 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo } else { childDocs = childDocIdSet.iterator(); } - BitDocIdSetFilter rootDocsFilter = context.searchContext().bitsetFilterCache().getBitDocIdSetFilter(NonNestedDocsFilter.INSTANCE); - BitDocIdSet rootDocIdSet = rootDocsFilter.getDocIdSet(reader); - rootDocs = rootDocIdSet.bits(); - // We need to reset the current root doc, otherwise we may emit incorrect child docs if the next segment happen to start with the same root doc id value - currentRootDoc = -1; - childDocIdBuffers.clear(); } catch (IOException ioe) { throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe); } @@ -89,6 +77,7 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo @Override public void collect(int parentDoc, long bucketOrd) throws IOException { + assert bucketOrd == 0; // here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them so they'll be collected // if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent doc), so we can skip: @@ -119,21 +108,19 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo } } + final int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); + int childDocId = childDocs.docID(); + if (childDocId <= prevParentDoc) { + childDocId = childDocs.advance(prevParentDoc + 1); + } + int numChildren = 0; - IntArrayList iterator = getChildren(parentDoc); - final int[] buffer = iterator.buffer; - final int size = iterator.size(); - for (int i = 0; i < size; i++) { - numChildren++; - collectBucketNoCounts(buffer[i], bucketOrd); + for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) { + collectBucketNoCounts(childDocId, bucketOrd); + numChildren += 1; } incrementBucketDocCount(bucketOrd, numChildren); } - - @Override - protected void doClose() { - childDocIdBuffers.clear(); - } @Override public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException { @@ -169,6 +156,9 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo @Override public Aggregator createInternal(AggregationContext context, Aggregator parent, boolean collectsFromSingleBucket, Map metaData) throws IOException { + if (collectsFromSingleBucket == false) { + return asMultiBucketAggregator(this, context, parent); + } MapperService.SmartNameObjectMapper mapper = context.searchContext().smartNameObjectMapper(path); if (mapper == null) { return new Unmapped(name, context, parent, metaData); @@ -196,43 +186,4 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo } } - // The aggs framework can collect buckets for the same parent doc id more than once and because the children docs - // can only be consumed once we need to buffer the child docs. We only need to buffer child docs in the scope - // of the current root doc. - - // Examples: - // 1) nested agg wrapped is by terms agg and multiple buckets per document are emitted - // 2) Multiple nested fields are defined. A nested agg joins back to another nested agg via the reverse_nested agg. - // For each child in the first nested agg the second nested agg gets invoked with the same buckets / docids - private IntArrayList getChildren(final int parentDocId) throws IOException { - int rootDocId = rootDocs.nextSetBit(parentDocId); - if (currentRootDoc == rootDocId) { - final IntArrayList childDocIdBuffer = childDocIdBuffers.get(parentDocId); - if (childDocIdBuffer != null) { - return childDocIdBuffer; - } else { - // here we translate the parent doc to a list of its nested docs, - // and then collect buckets for every one of them so they'll be collected - final IntArrayList newChildDocIdBuffer = new IntArrayList(); - childDocIdBuffers.put(parentDocId, newChildDocIdBuffer); - int prevParentDoc = parentDocs.prevSetBit(parentDocId - 1); - int childDocId; - if (childDocs.docID() > prevParentDoc) { - childDocId = childDocs.docID(); - } else { - childDocId = childDocs.advance(prevParentDoc + 1); - } - for (; childDocId < parentDocId; childDocId = childDocs.nextDoc()) { - newChildDocIdBuffer.add(childDocId); - } - return newChildDocIdBuffer; - } - } else { - this.currentRootDoc = rootDocId; - childDocIdBuffers.clear(); - return getChildren(parentDocId); - } - } - - } diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java index 91c345d4dc8..5b8d674b881 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java @@ -483,7 +483,6 @@ public class ReverseNestedTests extends ElasticsearchIntegrationTest { } @Test - @AwaitsFix(bugUrl="http://github.com/elasticsearch/elasticsearch/issues/9547") public void testSameParentDocHavingMultipleBuckets() throws Exception { XContentBuilder mapping = jsonBuilder().startObject().startObject("product").field("dynamic", "strict").startObject("properties") .startObject("id").field("type", "long").endObject()