Make parent and child aggregator more obvious (#57490) (#57553)

Pulls the way that the `ParentJoinAggregator` collects global ordinals
into a strategy object so it is a little simpler to reason about and
it'll be simpler to save memory by removing `asMultiBucketAggregator` in
the future.

Relates to #56487
This commit is contained in:
Nik Everett 2020-06-02 16:22:38 -04:00 committed by GitHub
parent 2a27c411fb
commit 928794cd61
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 79 additions and 25 deletions

View File

@ -28,8 +28,10 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.BitArray; import org.elasticsearch.common.util.BitArray;
import org.elasticsearch.common.util.LongHash; import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
@ -52,16 +54,11 @@ public abstract class ParentJoinAggregator extends BucketsAggregator implements
private final Weight inFilter; private final Weight inFilter;
private final Weight outFilter; private final Weight outFilter;
private final ValuesSource.Bytes.WithOrdinals valuesSource; private final ValuesSource.Bytes.WithOrdinals valuesSource;
private final boolean singleAggregator;
/** /**
* If this aggregator is nested under another aggregator we allocate a long hash per bucket. * Strategy for collecting results.
*/ */
private final LongHash ordsHash; private final CollectionStrategy collectionStrategy;
/**
* Otherwise we use a dense bit array to record the global ordinals.
*/
private final BitArray ordsBit;
public ParentJoinAggregator(String name, public ParentJoinAggregator(String name,
AggregatorFactories factories, AggregatorFactories factories,
@ -83,21 +80,9 @@ public abstract class ParentJoinAggregator extends BucketsAggregator implements
this.inFilter = context.searcher().createWeight(context.searcher().rewrite(inFilter), ScoreMode.COMPLETE_NO_SCORES, 1f); this.inFilter = context.searcher().createWeight(context.searcher().rewrite(inFilter), ScoreMode.COMPLETE_NO_SCORES, 1f);
this.outFilter = context.searcher().createWeight(context.searcher().rewrite(outFilter), ScoreMode.COMPLETE_NO_SCORES, 1f); this.outFilter = context.searcher().createWeight(context.searcher().rewrite(outFilter), ScoreMode.COMPLETE_NO_SCORES, 1f);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
this.singleAggregator = parent == null; boolean singleAggregator = parent == null;
this.ordsBit = singleAggregator ? new BitArray((int) maxOrd, context.bigArrays()) : null; collectionStrategy = singleAggregator ?
this.ordsHash = singleAggregator ? null : new LongHash(1, context.bigArrays()); new DenseCollectionStrategy(maxOrd, context.bigArrays()) : new SparseCollectionStrategy(context.bigArrays());
}
private void addGlobalOrdinal(int globalOrdinal) {
if (singleAggregator) {
ordsBit.set(globalOrdinal);
} else {
ordsHash.add(globalOrdinal);
}
}
private boolean existsGlobalOrdinal(int globalOrdinal) {
return singleAggregator ? ordsBit.get(globalOrdinal): ordsHash.find(globalOrdinal) >= 0;
} }
@Override @Override
@ -115,7 +100,7 @@ public abstract class ParentJoinAggregator extends BucketsAggregator implements
if (parentDocs.get(docId) && globalOrdinals.advanceExact(docId)) { if (parentDocs.get(docId) && globalOrdinals.advanceExact(docId)) {
int globalOrdinal = (int) globalOrdinals.nextOrd(); int globalOrdinal = (int) globalOrdinals.nextOrd();
assert globalOrdinal != -1 && globalOrdinals.nextOrd() == SortedSetDocValues.NO_MORE_ORDS; assert globalOrdinal != -1 && globalOrdinals.nextOrd() == SortedSetDocValues.NO_MORE_ORDS;
addGlobalOrdinal(globalOrdinal); collectionStrategy.addGlobalOrdinal(globalOrdinal);
} }
} }
}; };
@ -155,7 +140,7 @@ public abstract class ParentJoinAggregator extends BucketsAggregator implements
if (globalOrdinals.advanceExact(docId)) { if (globalOrdinals.advanceExact(docId)) {
int globalOrdinal = (int) globalOrdinals.nextOrd(); int globalOrdinal = (int) globalOrdinals.nextOrd();
assert globalOrdinal != -1 && globalOrdinals.nextOrd() == SortedSetDocValues.NO_MORE_ORDS; assert globalOrdinal != -1 && globalOrdinals.nextOrd() == SortedSetDocValues.NO_MORE_ORDS;
if (existsGlobalOrdinal(globalOrdinal)) { if (collectionStrategy.existsGlobalOrdinal(globalOrdinal)) {
collectBucket(sub, docId, 0); collectBucket(sub, docId, 0);
} }
} }
@ -165,6 +150,75 @@ public abstract class ParentJoinAggregator extends BucketsAggregator implements
@Override @Override
protected void doClose() { protected void doClose() {
Releasables.close(ordsBit, ordsHash); Releasables.close(collectionStrategy);
}
/**
* Strategy for collecting the global ordinals of the join field in for all
* docs that match the {@code ParentJoinAggregator#inFilter} and then
* checking if which of the docs in the
* {@code ParentJoinAggregator#outFilter} also have the ordinal.
*/
protected interface CollectionStrategy extends Releasable {
void addGlobalOrdinal(int globalOrdinal);
boolean existsGlobalOrdinal(int globalOrdinal);
}
/**
* Uses a dense, bit per ordinal representation of the join field in the
* docs that match {@code ParentJoinAggregator#inFilter}. Its memory usage
* is proportional to the maximum ordinal so it is only a good choice if
* most docs match.
*/
protected class DenseCollectionStrategy implements CollectionStrategy {
private final BitArray ordsBits;
public DenseCollectionStrategy(long maxOrd, BigArrays bigArrays) {
ordsBits = new BitArray((int) maxOrd, context.bigArrays());
}
@Override
public void addGlobalOrdinal(int globalOrdinal) {
ordsBits.set(globalOrdinal);
}
@Override
public boolean existsGlobalOrdinal(int globalOrdinal) {
return ordsBits.get(globalOrdinal);
}
@Override
public void close() {
ordsBits.close();
}
}
/**
* Uses a hashed representation of whether of the join field in the docs
* that match {@code ParentJoinAggregator#inFilter}. Its memory usage is
* proportional to the number of matching global ordinals so it is used
* when only some docs might match.
*/
protected class SparseCollectionStrategy implements CollectionStrategy {
private final LongHash ordsHash;
public SparseCollectionStrategy(BigArrays bigArrays) {
ordsHash = new LongHash(1, bigArrays);
}
@Override
public void addGlobalOrdinal(int globalOrdinal) {
ordsHash.add(globalOrdinal);
}
@Override
public boolean existsGlobalOrdinal(int globalOrdinal) {
return ordsHash.find(globalOrdinal) >= 0;
}
@Override
public void close() {
ordsHash.close();
}
} }
} }