Aggregations: Changed child filters to not require a random access based bitset in `nested` agg.
Also the nested agg now requires docs to be consumed / scored in order. Closes #8454
This commit is contained in:
parent
284491d874
commit
5714b0a7ad
|
@ -19,11 +19,12 @@
|
||||||
package org.elasticsearch.search.aggregations.bucket.nested;
|
package org.elasticsearch.search.aggregations.bucket.nested;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.join.BitDocIdSetFilter;
|
import org.apache.lucene.search.join.BitDocIdSetFilter;
|
||||||
import org.apache.lucene.util.BitDocIdSet;
|
import org.apache.lucene.util.BitDocIdSet;
|
||||||
import org.apache.lucene.util.BitSet;
|
import org.apache.lucene.util.BitSet;
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.elasticsearch.common.lucene.ReaderContextAware;
|
import org.elasticsearch.common.lucene.ReaderContextAware;
|
||||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
|
@ -45,9 +46,9 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
|
||||||
private final String nestedPath;
|
private final String nestedPath;
|
||||||
private final Aggregator parentAggregator;
|
private final Aggregator parentAggregator;
|
||||||
private BitDocIdSetFilter parentFilter;
|
private BitDocIdSetFilter parentFilter;
|
||||||
private final BitDocIdSetFilter childFilter;
|
private final Filter childFilter;
|
||||||
|
|
||||||
private Bits childDocs;
|
private DocIdSetIterator childDocs;
|
||||||
private BitSet parentDocs;
|
private BitSet parentDocs;
|
||||||
|
|
||||||
public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator, Map<String, Object> metaData) {
|
public NestedAggregator(String name, AggregatorFactories factories, String nestedPath, AggregationContext aggregationContext, Aggregator parentAggregator, Map<String, Object> metaData) {
|
||||||
|
@ -66,7 +67,16 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
|
||||||
throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested");
|
throw new AggregationExecutionException("[nested] nested path [" + nestedPath + "] is not nested");
|
||||||
}
|
}
|
||||||
|
|
||||||
childFilter = aggregationContext.searchContext().bitsetFilterCache().getBitDocIdSetFilter(objectMapper.nestedTypeFilter());
|
// TODO: Revise the cache usage for childFilter
|
||||||
|
// Typical usage of the childFilter in this agg is that not all parent docs match and because this agg executes
|
||||||
|
// in order we are maybe better off not caching? We can then iterate over the posting list and benefit from skip pointers.
|
||||||
|
// Even if caching does make sense it is likely that it shouldn't be forced as is today, but based on heuristics that
|
||||||
|
// the filter cache maintains that the childFilter should be cached.
|
||||||
|
|
||||||
|
// By caching the childFilter we're consistent with other features and previous versions.
|
||||||
|
childFilter = aggregationContext.searchContext().filterCache().cache(objectMapper.nestedTypeFilter());
|
||||||
|
// The childDocs need to be consumed in docId order, this ensures that:
|
||||||
|
aggregationContext.ensureScoreDocsInOrder();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -87,16 +97,15 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
|
||||||
BitDocIdSet parentSet = parentFilter.getDocIdSet(reader);
|
BitDocIdSet parentSet = parentFilter.getDocIdSet(reader);
|
||||||
if (DocIdSets.isEmpty(parentSet)) {
|
if (DocIdSets.isEmpty(parentSet)) {
|
||||||
parentDocs = null;
|
parentDocs = null;
|
||||||
childDocs = null;
|
|
||||||
} else {
|
} else {
|
||||||
parentDocs = parentSet.bits();
|
parentDocs = parentSet.bits();
|
||||||
// In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
|
}
|
||||||
BitDocIdSet childSet = childFilter.getDocIdSet(reader);
|
// In ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here.
|
||||||
if (DocIdSets.isEmpty(childSet)) {
|
DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
|
||||||
childDocs = new Bits.MatchAllBits(reader.reader().maxDoc());
|
if (DocIdSets.isEmpty(childDocIdSet)) {
|
||||||
} else {
|
childDocs = null;
|
||||||
childDocs = childSet.bits();
|
} else {
|
||||||
}
|
childDocs = childDocIdSet.iterator();
|
||||||
}
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe);
|
throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe);
|
||||||
|
@ -105,18 +114,24 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void collect(int parentDoc, long bucketOrd) throws IOException {
|
public void collect(int parentDoc, long bucketOrd) throws IOException {
|
||||||
// here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them
|
// here we translate the parent doc to a list of its nested docs, and then call super.collect for evey one of them so they'll be collected
|
||||||
// so they'll be collected
|
|
||||||
if (parentDoc == 0 || parentDocs == null) {
|
// if parentDoc is 0 then this means that this parent doesn't have child docs (b/c these appear always before the parent doc), so we can skip:
|
||||||
|
if (parentDoc == 0 || childDocs == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
|
int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
|
||||||
|
int childDocId;
|
||||||
|
if (childDocs.docID() > prevParentDoc) {
|
||||||
|
childDocId = childDocs.docID();
|
||||||
|
} else {
|
||||||
|
childDocId = childDocs.advance(prevParentDoc + 1);
|
||||||
|
}
|
||||||
|
|
||||||
int numChildren = 0;
|
int numChildren = 0;
|
||||||
for (int childDocId = prevParentDoc + 1; childDocId < parentDoc; childDocId++) {
|
for (; childDocId < parentDoc; childDocId = childDocs.nextDoc()) {
|
||||||
if (childDocs.get(childDocId)) {
|
numChildren++;
|
||||||
++numChildren;
|
collectBucketNoCounts(childDocId, bucketOrd);
|
||||||
collectBucketNoCounts(childDocId, bucketOrd);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
incrementBucketDocCount(bucketOrd, numChildren);
|
incrementBucketDocCount(bucketOrd, numChildren);
|
||||||
}
|
}
|
||||||
|
@ -126,7 +141,7 @@ public class NestedAggregator extends SingleBucketAggregator implements ReaderCo
|
||||||
return new InternalNested(name, bucketDocCount(owningBucketOrdinal), bucketAggregations(owningBucketOrdinal), getMetaData());
|
return new InternalNested(name, bucketDocCount(owningBucketOrdinal), bucketAggregations(owningBucketOrdinal), getMetaData());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InternalAggregation buildEmptyAggregation() {
|
public InternalAggregation buildEmptyAggregation() {
|
||||||
return new InternalNested(name, 0, buildEmptySubAggregations(), getMetaData());
|
return new InternalNested(name, 0, buildEmptySubAggregations(), getMetaData());
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue