Aggregations: Changed child filters to not require a random access based bitset in `nested` agg.

Also the nested agg now requires docs to be consumed / scored in order.

Closes #8454
This commit is contained in:
Martijn van Groningen 2014-11-12 10:45:51 +01:00
parent 284491d874
commit 5714b0a7ad
1 changed files with 36 additions and 21 deletions

View File

@ -19,11 +19,12 @@
package org.elasticsearch.search.aggregations.bucket.nested;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.join.BitDocIdSetFilter;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.ReaderContextAware;
import org.elasticsearch.common.lucene.docset.DocIdSets;
import org.elasticsearch.index.mapper.MapperService;
@ -45,9 +46,9 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
private final String nestedPath;
private final Aggregator parentAggregator;
private BitDocIdSetFilter parentFilter;
private final BitDocIdSetFilter childFilter;
private final Filter childFilter;
private Bits childDocs;
private DocIdSetIterator childDocs;
private BitSet parentDocs;
public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator, Map<String, Object> metaData) {
@ -66,7 +67,16 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested");
}
childFilter = aggregationContext.searchContext().bitsetFilterCache().getBitDocIdSetFilter(objectMapper.nestedTypeFilter());
// TODO: Revise the cache usage for childFilter
// Typical usage of the childFilter in this agg is that not all parent docs match and because this agg executes
// in order we are maybe better off not caching? We can then iterate over the posting list and benefit from skip pointers.
// Even if caching does make sense it is likely that it shouldn't be forced as is today, but based on heuristics that
// the filter cache maintains that the childFilter should be cached.
// By caching the childFilter we're consistent with other features and previous versions.
childFilter = aggregationContext.searchContext().filterCache().cache(objectMapper.nestedTypeFilter());
// The childDocs need to be consumed in docId order, this ensures that:
aggregationContext.ensureScoreDocsInOrder();
}
@Override
@ -87,16 +97,15 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
BitDocIdSet parentSet = parentFilter.getDocIdSet(reader);
if (DocIdSets.isEmpty(parentSet)) {
parentDocs = null;
childDocs = null;
} else {
parentDocs = parentSet.bits();
// In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
BitDocIdSet childSet = childFilter.getDocIdSet(reader);
if (DocIdSets.isEmpty(childSet)) {
childDocs = new Bits.MatchAllBits(reader.reader().maxDoc());
} else {
childDocs = childSet.bits();
}
// In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
if (DocIdSets.isEmpty(childDocIdSet)) {
childDocs = null;
} else {
childDocs = childDocIdSet.iterator();
}
} catch (IOException ioe) {
throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe);
@ -105,18 +114,24 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
@Override
public void collect(int parentDoc, long bucketOrd) throws IOException {
// here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them
// so they'll be collected
if (parentDoc == 0 || parentDocs == null) {
// here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them so they'll be collected
// if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent doc), so we can skip:
if (parentDoc == 0 || childDocs == null) {
return;
}
int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
int numChildren = 0;
for (int childDocId = prevParentDoc + 1; childDocId < parentDoc; childDocId++) {
if (childDocs.get(childDocId)) {
++numChildren;
collectBucketNoCounts(childDocId, bucketOrd);
int childDocId;
if (childDocs.docID() > prevParentDoc) {
childDocId = childDocs.docID();
} else {
childDocId = childDocs.advance(prevParentDoc + 1);
}
int numChildren = 0;
for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) {
numChildren++;
collectBucketNoCounts(childDocId, bucketOrd);
}
incrementBucketDocCount(bucketOrd, numChildren);
}