Aggregations: Changed child filters to not require a random access based bitset in `nested` agg.

Also the nested agg now requires docs to be consumed / scored in order.

Closes #8454
This commit is contained in:
Martijn van Groningen 2014-11-12 10:45:51 +01:00
parent 284491d874
commit 5714b0a7ad
1 changed files with 36 additions and 21 deletions

View File

@ -19,11 +19,12 @@
package org.elasticsearch.search.aggregations.bucket.nested; package org.elasticsearch.search.aggregations.bucket.nested;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.search.join.BitDocIdSetFilter; import org.apache.lucene.search.join.BitDocIdSetFilter;
import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.ReaderContextAware; import org.elasticsearch.common.lucene.ReaderContextAware;
import org.elasticsearch.common.lucene.docset.DocIdSets; import org.elasticsearch.common.lucene.docset.DocIdSets;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
@ -45,9 +46,9 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
private final String nestedPath; private final String nestedPath;
private final Aggregator parentAggregator; private final Aggregator parentAggregator;
private BitDocIdSetFilter parentFilter; private BitDocIdSetFilter parentFilter;
private final BitDocIdSetFilter childFilter; private final Filter childFilter;
private Bits childDocs; private DocIdSetIterator childDocs;
private BitSet parentDocs; private BitSet parentDocs;
public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator, Map<String, Object> metaData) { public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator, Map<String, Object> metaData) {
@ -66,7 +67,16 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested"); throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested");
} }
childFilter = aggregationContext.searchContext().bitsetFilterCache().getBitDocIdSetFilter(objectMapper.nestedTypeFilter()); // TODO: Revise the cache usage for childFilter
// Typical usage of the childFilter in this agg is that not all parent docs match and because this agg executes
// in order we are maybe better off not caching? We can then iterate over the posting list and benefit from skip pointers.
// Even if caching does make sense it is likely that it shouldn't be forced as is today, but based on heuristics that
// the filter cache maintains that the childFilter should be cached.
// By caching the childFilter we're consistent with other features and previous versions.
childFilter = aggregationContext.searchContext().filterCache().cache(objectMapper.nestedTypeFilter());
// The childDocs need to be consumed in docId order, this ensures that:
aggregationContext.ensureScoreDocsInOrder();
} }
@Override @Override
@ -87,16 +97,15 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
BitDocIdSet parentSet = parentFilter.getDocIdSet(reader); BitDocIdSet parentSet = parentFilter.getDocIdSet(reader);
if (DocIdSets.isEmpty(parentSet)) { if (DocIdSets.isEmpty(parentSet)) {
parentDocs = null; parentDocs = null;
childDocs = null;
} else { } else {
parentDocs = parentSet.bits(); parentDocs = parentSet.bits();
// In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
BitDocIdSet childSet = childFilter.getDocIdSet(reader);
if (DocIdSets.isEmpty(childSet)) {
childDocs = new Bits.MatchAllBits(reader.reader().maxDoc());
} else {
childDocs = childSet.bits();
} }
// In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
if (DocIdSets.isEmpty(childDocIdSet)) {
childDocs = null;
} else {
childDocs = childDocIdSet.iterator();
} }
} catch (IOException ioe) { } catch (IOException ioe) {
throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe); throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe);
@ -105,18 +114,24 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
@Override @Override
public void collect(int parentDoc, long bucketOrd) throws IOException { public void collect(int parentDoc, long bucketOrd) throws IOException {
// here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them // here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them so they'll be collected
// so they'll be collected
if (parentDoc == 0 || parentDocs == null) { // if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent doc), so we can skip:
if (parentDoc == 0 || childDocs == null) {
return; return;
} }
int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
int numChildren = 0; int childDocId;
for (int childDocId = prevParentDoc + 1; childDocId < parentDoc; childDocId++) { if (childDocs.docID() > prevParentDoc) {
if (childDocs.get(childDocId)) { childDocId = childDocs.docID();
++numChildren; } else {
collectBucketNoCounts(childDocId, bucketOrd); childDocId = childDocs.advance(prevParentDoc + 1);
} }
int numChildren = 0;
for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) {
numChildren++;
collectBucketNoCounts(childDocId, bucketOrd);
} }
incrementBucketDocCount(bucketOrd, numChildren); incrementBucketDocCount(bucketOrd, numChildren);
} }