Aggregations: Changed child filters to not require a random access based bitset in `nested` agg.

Also the nested agg now requires docs to be consumed / scored in order. Closes #8454
2014-11-12 10:45:51 +01:00 · 2014-11-12 10:45:51 +01:00 · 5714b0a7ad
parent 284491d874
commit 5714b0a7ad
1 changed files with 36 additions and 21 deletions
--- a/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
+++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java
@ -19,11 +19,12 @@
 package org.elasticsearch.search.aggregations.bucket.nested;

 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.join.BitDocIdSetFilter;
 import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.BitSet;
-import org.apache.lucene.util.Bits;
 import org.elasticsearch.common.lucene.ReaderContextAware;
 import org.elasticsearch.common.lucene.docset.DocIdSets;
 import org.elasticsearch.index.mapper.MapperService;
@ -45,9 +46,9 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
    private final String nestedPath;
    private final Aggregator parentAggregator;
    private BitDocIdSetFilter parentFilter;
-    private final BitDocIdSetFilter childFilter;
+    private final Filter childFilter;

-    private Bits childDocs;
+    private DocIdSetIterator childDocs;
    private BitSet parentDocs;

    public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator, Map<String, Object> metaData) {
@ -66,7 +67,16 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
            throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested");
        }

-        childFilter = aggregationContext.searchContext().bitsetFilterCache().getBitDocIdSetFilter(objectMapper.nestedTypeFilter());
+        // TODO: Revise the cache usage for childFilter
+        // Typical usage of the childFilter in this agg is that not all parent docs match and because this agg executes
+        // in order we are maybe better off not caching? We can then iterate over the posting list and benefit from skip pointers.
+        // Even if caching does make sense it is likely that it shouldn't be forced as is today, but based on heuristics that
+        // the filter cache maintains that the childFilter should be cached.
+
+        // By caching the childFilter we're consistent with other features and previous versions.
+        childFilter = aggregationContext.searchContext().filterCache().cache(objectMapper.nestedTypeFilter());
+        // The childDocs need to be consumed in docId order, this ensures that:
+        aggregationContext.ensureScoreDocsInOrder();
    }

    @Override
@ -87,16 +97,15 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
            BitDocIdSet parentSet = parentFilter.getDocIdSet(reader);
            if (DocIdSets.isEmpty(parentSet)) {
                parentDocs = null;
-                childDocs = null;
            } else {
                parentDocs = parentSet.bits();
-                // In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
-                BitDocIdSet childSet = childFilter.getDocIdSet(reader);
-                if (DocIdSets.isEmpty(childSet)) {
-                    childDocs = new Bits.MatchAllBits(reader.reader().maxDoc());
-                } else {
-                    childDocs = childSet.bits();
            }
+            // In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
+            DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
+            if (DocIdSets.isEmpty(childDocIdSet)) {
+                childDocs = null;
+            } else {
+                childDocs = childDocIdSet.iterator();
            }
        } catch (IOException ioe) {
            throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe);
@ -105,18 +114,24 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo

    @Override
    public void collect(int parentDoc, long bucketOrd) throws IOException {
-        // here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them
-        // so they'll be collected
-        if (parentDoc == 0 || parentDocs == null) {
+        // here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them so they'll be collected
+
+        // if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent doc), so we can skip:
+        if (parentDoc == 0 || childDocs == null) {
            return;
        }
        int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
-        int numChildren = 0;
-        for (int childDocId = prevParentDoc + 1; childDocId < parentDoc; childDocId++) {
-            if (childDocs.get(childDocId)) {
-                ++numChildren;
-                collectBucketNoCounts(childDocId, bucketOrd);
+        int childDocId;
+        if (childDocs.docID() > prevParentDoc) {
+            childDocId = childDocs.docID();
+        } else {
+            childDocId = childDocs.advance(prevParentDoc + 1);
        }
+
+        int numChildren = 0;
+        for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) {
+            numChildren++;
+            collectBucketNoCounts(childDocId, bucketOrd);
        }
        incrementBucketDocCount(bucketOrd, numChildren);
    }