diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index cfa0aac56b6..0aa764538da 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -19,11 +19,12 @@ package org.elasticsearch.search.aggregations.bucket.nested; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.search.join.BitDocIdSetFilter; import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BitSet; -import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.ReaderContextAware; import org.elasticsearch.common.lucene.docset.DocIdSets; import org.elasticsearch.index.mapper.MapperService; @@ -45,9 +46,9 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo private final String nestedPath; private final Aggregator parentAggregator; private BitDocIdSetFilter parentFilter; - private final BitDocIdSetFilter childFilter; + private final Filter childFilter; - private Bits childDocs; + private DocIdSetIterator childDocs; private BitSet parentDocs; public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator, Map metaData) { @@ -66,7 +67,16 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested"); } - childFilter = aggregationContext.searchContext().bitsetFilterCache().getBitDocIdSetFilter(objectMapper.nestedTypeFilter()); + // TODO: Revise the cache usage for childFilter + // Typical usage of the childFilter in this agg is that not all parent docs match and because this agg executes + // in order we are maybe better off not caching? We can then iterate over the posting list and benefit from skip pointers. + // Even if caching does make sense it is likely that it shouldn't be forced as is today, but based on heuristics that + // the filter cache maintains that the childFilter should be cached. + + // By caching the childFilter we're consistent with other features and previous versions. + childFilter = aggregationContext.searchContext().filterCache().cache(objectMapper.nestedTypeFilter()); + // The childDocs need to be consumed in docId order, this ensures that: + aggregationContext.ensureScoreDocsInOrder(); } @Override @@ -87,16 +97,15 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo BitDocIdSet parentSet = parentFilter.getDocIdSet(reader); if (DocIdSets.isEmpty(parentSet)) { parentDocs = null; - childDocs = null; } else { parentDocs = parentSet.bits(); - // In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here. - BitDocIdSet childSet = childFilter.getDocIdSet(reader); - if (DocIdSets.isEmpty(childSet)) { - childDocs = new Bits.MatchAllBits(reader.reader().maxDoc()); - } else { - childDocs = childSet.bits(); - } + } + // In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here. + DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null); + if (DocIdSets.isEmpty(childDocIdSet)) { + childDocs = null; + } else { + childDocs = childDocIdSet.iterator(); } } catch (IOException ioe) { throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe); @@ -105,18 +114,24 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo @Override public void collect(int parentDoc, long bucketOrd) throws IOException { - // here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them - // so they'll be collected - if (parentDoc == 0 || parentDocs == null) { + // here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them so they'll be collected + + // if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent doc), so we can skip: + if (parentDoc == 0 || childDocs == null) { return; } int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); + int childDocId; + if (childDocs.docID() > prevParentDoc) { + childDocId = childDocs.docID(); + } else { + childDocId = childDocs.advance(prevParentDoc + 1); + } + int numChildren = 0; - for (int childDocId = prevParentDoc + 1; childDocId < parentDoc; childDocId++) { - if (childDocs.get(childDocId)) { - ++numChildren; - collectBucketNoCounts(childDocId, bucketOrd); - } + for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) { + numChildren++; + collectBucketNoCounts(childDocId, bucketOrd); } incrementBucketDocCount(bucketOrd, numChildren); } @@ -126,7 +141,7 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo return new InternalNested(name, bucketDocCount(owningBucketOrdinal), bucketAggregations(owningBucketOrdinal), getMetaData()); } - @Override + @Override public InternalAggregation buildEmptyAggregation() { return new InternalNested(name, 0, buildEmptySubAggregations(), getMetaData()); }