Terms aggregation should remap global ordinal buckets when a sub-aggregator is used to sort the terms (#24941)

`terms` aggregations at the root level use the `global_ordinals` execution hint by default.
When all sub-aggregators can be run in `breadth_first` mode the collected buckets for these sub-aggs are dense (remapped after the initial pruning).
But if a sub-aggregator is not deferrable and needs to collect all buckets before pruning we don't remap global ords and the aggregator needs to deal with sparse buckets.
Most (if not all) aggregators expect dense buckets and uses this information to allocate memories.
This change forces the remap of the global ordinals but only when there is at least one sub-aggregator that cannot be deferred.

Relates #24788
This commit is contained in:
Jim Ferenczi 2017-05-30 19:13:07 +02:00 committed by GitHub
parent 2a6e6866bd
commit ce7195d81a
8 changed files with 357 additions and 289 deletions

View File

@ -53,22 +53,28 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
protected final SignificantTermsAggregatorFactory termsAggFactory; protected final SignificantTermsAggregatorFactory termsAggFactory;
private final SignificanceHeuristic significanceHeuristic; private final SignificanceHeuristic significanceHeuristic;
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, public GlobalOrdinalsSignificantTermsAggregator(String name,
ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, DocValueFormat format, AggregatorFactories factories,
BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
SearchContext context, Aggregator parent, DocValueFormat format,
SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggFactory, BucketCountThresholds bucketCountThresholds,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException { IncludeExclude.OrdinalsFilter includeExclude,
SearchContext context,
Aggregator parent,
boolean forceRemapGlobalOrds,
SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggFactory,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, context, parent, super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, context, parent,
SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData); forceRemapGlobalOrds, SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData);
this.significanceHeuristic = significanceHeuristic; this.significanceHeuristic = significanceHeuristic;
this.termsAggFactory = termsAggFactory; this.termsAggFactory = termsAggFactory;
this.numCollectedDocs = 0;
} }
@Override @Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
final LeafBucketCollector sub) throws IOException {
return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) { return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) {
@Override @Override
public void collect(int doc, long bucket) throws IOException { public void collect(int doc, long bucket) throws IOException {
@ -78,18 +84,17 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
}; };
} }
@Override @Override
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException { public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0; assert owningBucketOrdinal == 0;
if (globalOrds == null) { // no context in this reader if (valueCount == 0) { // no context in this reader
return buildEmptyAggregation(); return buildEmptyAggregation();
} }
final int size; final int size;
if (bucketCountThresholds.getMinDocCount() == 0) { if (bucketCountThresholds.getMinDocCount() == 0) {
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize()); size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize());
} else { } else {
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
} }
@ -98,7 +103,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size); BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size);
SignificantStringTerms.Bucket spare = null; SignificantStringTerms.Bucket spare = null;
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) { for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) { if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
continue; continue;
} }
@ -115,7 +120,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format); spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
} }
spare.bucketOrd = bucketOrd; spare.bucketOrd = bucketOrd;
copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes); copy(lookupGlobalOrd.apply(globalTermOrd), spare.termBytes);
spare.subsetDf = bucketDocCount; spare.subsetDf = bucketDocCount;
spare.subsetSize = subsetSize; spare.subsetSize = subsetSize;
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes); spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
@ -148,63 +153,13 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
IndexReader topReader = searcher.getIndexReader(); IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs(); int supersetSize = topReader.numDocs();
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
pipelineAggregators(), metaData(), format, 0, supersetSize, significanceHeuristic, emptyList()); pipelineAggregators(), metaData(), format, numCollectedDocs, supersetSize, significanceHeuristic, emptyList());
} }
@Override @Override
protected void doClose() { protected void doClose() {
super.doClose();
Releasables.close(termsAggFactory); Releasables.close(termsAggFactory);
} }
public static class WithHash extends GlobalOrdinalsSignificantTermsAggregator {
private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
SearchContext context, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic,
termsAggFactory, pipelineAggregators, metaData);
bucketOrds = new LongHash(1, context.bigArrays());
}
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) {
@Override
public void collect(int doc, long bucket) throws IOException {
assert bucket == 0;
numCollectedDocs++;
if (globalOrds.advanceExact(doc)) {
for (long globalOrd = globalOrds.nextOrd();
globalOrd != SortedSetDocValues.NO_MORE_ORDS;
globalOrd = globalOrds.nextOrd()) {
long bucketOrd = bucketOrds.add(globalOrd);
if (bucketOrd < 0) {
bucketOrd = -1 - bucketOrd;
collectExistingBucket(sub, doc, bucketOrd);
} else {
collectBucket(sub, doc, bucketOrd);
}
}
}
}
};
}
@Override
protected long getBucketOrd(long termOrd) {
return bucketOrds.find(termOrd);
}
@Override
protected void doClose() {
Releasables.close(termsAggFactory, bucketOrds);
}
}
} }

View File

@ -70,10 +70,17 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
private final TermsAggregator.BucketCountThresholds bucketCountThresholds; private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private final SignificanceHeuristic significanceHeuristic; private final SignificanceHeuristic significanceHeuristic;
public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config, IncludeExclude includeExclude, public SignificantTermsAggregatorFactory(String name,
String executionHint, QueryBuilder filterBuilder, TermsAggregator.BucketCountThresholds bucketCountThresholds, ValuesSourceConfig<ValuesSource> config,
SignificanceHeuristic significanceHeuristic, SearchContext context, AggregatorFactory<?> parent, IncludeExclude includeExclude,
AggregatorFactories.Builder subFactoriesBuilder, Map<String, Object> metaData) throws IOException { String executionHint,
QueryBuilder filterBuilder,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
SignificanceHeuristic significanceHeuristic,
SearchContext context,
AggregatorFactory<?> parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metaData) throws IOException {
super(name, config, context, parent, subFactoriesBuilder, metaData); super(name, config, context, parent, subFactoriesBuilder, metaData);
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
this.executionHint = executionHint; this.executionHint = executionHint;
@ -246,44 +253,71 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
MAP(new ParseField("map")) { MAP(new ParseField("map")) {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, Aggregator create(String name,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, ValuesSource valuesSource,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext aggregationContext,
Aggregator parent,
SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException { Map<String, Object> metaData) throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format); final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter, return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
} }
}, },
GLOBAL_ORDINALS(new ParseField("global_ordinals")) { GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, Aggregator create(String name,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, ValuesSource valuesSource,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext aggregationContext,
Aggregator parent,
SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException { Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); aggregationContext, parent, false, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
} }
}, },
GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) { GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, Aggregator create(String name,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, ValuesSource valuesSource,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext aggregationContext,
Aggregator parent,
SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException { Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); true, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
} }
}; };
@ -302,10 +336,17 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
this.parseField = parseField; this.parseField = parseField;
} }
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, abstract Aggregator create(String name,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, ValuesSource valuesSource,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators, DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext aggregationContext,
Aggregator parent,
SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException; Map<String, Object> metaData) throws IOException;
@Override @Override

View File

@ -20,6 +20,7 @@
package org.elasticsearch.search.aggregations.bucket.terms; package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
@ -52,6 +53,8 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
/** /**
* An aggregator of string values that relies on global ordinals in order to build buckets. * An aggregator of string values that relies on global ordinals in order to build buckets.
*/ */
@ -66,67 +69,104 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
// first defined one. // first defined one.
// So currently for each instance of this aggregator the acceptedglobalValues will be computed, this is unnecessary // So currently for each instance of this aggregator the acceptedglobalValues will be computed, this is unnecessary
// especially if this agg is on a second layer or deeper. // especially if this agg is on a second layer or deeper.
protected LongBitSet acceptedGlobalOrdinals; protected final LongBitSet acceptedGlobalOrdinals;
protected final long valueCount;
protected final GlobalOrdLookupFunction lookupGlobalOrd;
protected SortedSetDocValues globalOrds; private final LongHash bucketOrds;
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, public interface GlobalOrdLookupFunction {
BucketOrder order, DocValueFormat format, BucketCountThresholds bucketCountThresholds, BytesRef apply(long ord) throws IOException;
IncludeExclude.OrdinalsFilter includeExclude, SearchContext context, Aggregator parent, }
SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories,
ValuesSource.Bytes.WithOrdinals valuesSource,
BucketOrder order,
DocValueFormat format,
BucketCountThresholds bucketCountThresholds,
IncludeExclude.OrdinalsFilter includeExclude,
SearchContext context,
Aggregator parent,
boolean forceRemapGlobalOrds,
SubAggCollectionMode collectionMode,
boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException { Map<String, Object> metaData) throws IOException {
super(name, factories, context, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError, super(name, factories, context, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError,
pipelineAggregators, metaData); pipelineAggregators, metaData);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
final IndexReader reader = context.searcher().getIndexReader();
final SortedSetDocValues values = reader.leaves().size() > 0 ?
valuesSource.globalOrdinalsValues(context.searcher().getIndexReader().leaves().get(0)) : DocValues.emptySortedSet();
this.valueCount = values.getValueCount();
this.lookupGlobalOrd = values::lookupOrd;
this.acceptedGlobalOrdinals = includeExclude != null ? includeExclude.acceptedGlobalOrdinals(values) : null;
/**
* Remap global ords to dense bucket ordinals if any sub-aggregator cannot be deferred.
* Sub-aggregators expect dense buckets and allocate memories based on this assumption.
* Deferred aggregators are safe because the selected ordinals are remapped when the buckets
* are replayed.
*/
boolean remapGlobalOrds = forceRemapGlobalOrds || Arrays.stream(subAggregators).anyMatch((a) -> shouldDefer(a) == false);
this.bucketOrds = remapGlobalOrds ? new LongHash(1, context.bigArrays()) : null;
} }
protected long getBucketOrd(long termOrd) {
return termOrd; boolean remapGlobalOrds() {
return bucketOrds != null;
}
protected final long getBucketOrd(long globalOrd) {
return bucketOrds == null ? globalOrd : bucketOrds.find(globalOrd);
}
private void collectGlobalOrd(int doc, long globalOrd, LeafBucketCollector sub) throws IOException {
if (bucketOrds == null) {
collectExistingBucket(sub, doc, globalOrd);
} else {
long bucketOrd = bucketOrds.add(globalOrd);
if (bucketOrd < 0) {
bucketOrd = -1 - bucketOrd;
collectExistingBucket(sub, doc, bucketOrd);
} else {
collectBucket(sub, doc, bucketOrd);
}
}
}
private SortedSetDocValues getGlobalOrds(LeafReaderContext ctx) throws IOException {
return acceptedGlobalOrdinals == null ?
valuesSource.globalOrdinalsValues(ctx) : new FilteredOrdinals(valuesSource.globalOrdinalsValues(ctx), acceptedGlobalOrdinals);
} }
@Override @Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
final LeafBucketCollector sub) throws IOException { final SortedSetDocValues globalOrds = getGlobalOrds(ctx);
if (bucketOrds == null) {
globalOrds = valuesSource.globalOrdinalsValues(ctx); grow(globalOrds.getValueCount());
if (acceptedGlobalOrdinals == null && includeExclude != null) {
acceptedGlobalOrdinals = includeExclude.acceptedGlobalOrdinals(globalOrds);
} }
final SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
if (acceptedGlobalOrdinals != null) {
globalOrds = new FilteredOrdinals(globalOrds, acceptedGlobalOrdinals);
}
return newCollector(globalOrds, sub);
}
protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
final LeafBucketCollector sub) {
grow(ords.getValueCount());
final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
if (singleValues != null) { if (singleValues != null) {
return new LeafBucketCollectorBase(sub, ords) { return new LeafBucketCollectorBase(sub, globalOrds) {
@Override @Override
public void collect(int doc, long bucket) throws IOException { public void collect(int doc, long bucket) throws IOException {
assert bucket == 0; assert bucket == 0;
if (singleValues.advanceExact(doc)) { if (singleValues.advanceExact(doc)) {
final int ord = singleValues.ordValue(); final int ord = singleValues.ordValue();
collectExistingBucket(sub, doc, ord); collectGlobalOrd(doc, ord, sub);
} }
} }
}; };
} else { } else {
return new LeafBucketCollectorBase(sub, ords) { return new LeafBucketCollectorBase(sub, globalOrds) {
@Override @Override
public void collect(int doc, long bucket) throws IOException { public void collect(int doc, long bucket) throws IOException {
assert bucket == 0; assert bucket == 0;
if (ords.advanceExact(doc)) { if (globalOrds.advanceExact(doc)) {
for (long globalOrd = ords.nextOrd(); for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
globalOrd != SortedSetDocValues.NO_MORE_ORDS; collectGlobalOrd(doc, globalOrd, sub);
globalOrd = ords.nextOrd()) {
collectExistingBucket(sub, doc, globalOrd);
} }
} }
} }
@ -145,21 +185,21 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
@Override @Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException { public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
if (globalOrds == null) { // no context in this reader if (valueCount == 0) { // no context in this reader
return buildEmptyAggregation(); return buildEmptyAggregation();
} }
final int size; final int size;
if (bucketCountThresholds.getMinDocCount() == 0) { if (bucketCountThresholds.getMinDocCount() == 0) {
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize()); size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize());
} else { } else {
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
} }
long otherDocCount = 0; long otherDocCount = 0;
BucketPriorityQueue<OrdBucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this)); BucketPriorityQueue<OrdBucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0); OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) { for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) { if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
continue; continue;
} }
@ -184,10 +224,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()]; final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
long survivingBucketOrds[] = new long[ordered.size()]; long survivingBucketOrds[] = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) { for (int i = ordered.size() - 1; i >= 0; --i) {
final OrdBucket bucket = (OrdBucket) ordered.pop(); final OrdBucket bucket = ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd; survivingBucketOrds[i] = bucket.bucketOrd;
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
copy(globalOrds.lookupOrd(bucket.globalOrd), scratch); copy(lookupGlobalOrd.apply(bucket.globalOrd), scratch);
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format); list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format);
list[i].bucketOrd = bucket.bucketOrd; list[i].bucketOrd = bucket.bucketOrd;
otherDocCount -= list[i].docCount; otherDocCount -= list[i].docCount;
@ -254,109 +294,54 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
} }
} }
/**
* Variant of {@link GlobalOrdinalsStringTermsAggregator} that rebases hashes in order to make them dense. Might be
* useful in case few hashes are visited.
*/
public static class WithHash extends GlobalOrdinalsStringTermsAggregator {
private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, BucketOrder order,
DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
SearchContext context, Aggregator parent, SubAggCollectionMode collectionMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
super(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, collectionMode,
showTermDocCountError, pipelineAggregators, metaData);
bucketOrds = new LongHash(1, context.bigArrays());
}
@Override
protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
final LeafBucketCollector sub) {
final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
if (singleValues != null) {
return new LeafBucketCollectorBase(sub, ords) {
@Override
public void collect(int doc, long bucket) throws IOException {
if (singleValues.advanceExact(doc)) {
final int globalOrd = singleValues.ordValue();
long bucketOrd = bucketOrds.add(globalOrd);
if (bucketOrd < 0) {
bucketOrd = -1 - bucketOrd;
collectExistingBucket(sub, doc, bucketOrd);
} else {
collectBucket(sub, doc, bucketOrd);
}
}
}
};
} else {
return new LeafBucketCollectorBase(sub, ords) {
@Override
public void collect(int doc, long bucket) throws IOException {
if (ords.advanceExact(doc)) {
for (long globalOrd = ords.nextOrd();
globalOrd != SortedSetDocValues.NO_MORE_ORDS;
globalOrd = ords.nextOrd()) {
long bucketOrd = bucketOrds.add(globalOrd);
if (bucketOrd < 0) {
bucketOrd = -1 - bucketOrd;
collectExistingBucket(sub, doc, bucketOrd);
} else {
collectBucket(sub, doc, bucketOrd);
}
}
}
}
};
}
}
@Override
protected long getBucketOrd(long termOrd) {
return bucketOrds.find(termOrd);
}
@Override @Override
protected void doClose() { protected void doClose() {
Releasables.close(bucketOrds); Releasables.close(bucketOrds);
} }
}
/** /**
* Variant of {@link GlobalOrdinalsStringTermsAggregator} that resolves global ordinals post segment collection * Variant of {@link GlobalOrdinalsStringTermsAggregator} that resolves global ordinals post segment collection
* instead of on the fly for each match.This is beneficial for low cardinality fields, because it can reduce * instead of on the fly for each match.This is beneficial for low cardinality fields, because it can reduce
* the amount of look-ups significantly. * the amount of look-ups significantly.
*/ */
public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
private IntArray segmentDocCounts; private IntArray segmentDocCounts;
private SortedSetDocValues globalOrds;
private SortedSetDocValues segmentOrds; private SortedSetDocValues segmentOrds;
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, LowCardinality(String name,
BucketOrder order, DocValueFormat format, AggregatorFactories factories,
BucketCountThresholds bucketCountThresholds, SearchContext context, Aggregator parent, ValuesSource.Bytes.WithOrdinals valuesSource,
SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, BucketOrder order,
DocValueFormat format,
BucketCountThresholds bucketCountThresholds,
SearchContext context,
Aggregator parent,
boolean forceDenseMode,
SubAggCollectionMode collectionMode,
boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException { Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, order, format, bucketCountThresholds, null, context, parent, collectionMode, super(name, factories, valuesSource, order, format, bucketCountThresholds, null,
showTermDocCountError, pipelineAggregators, metaData); context, parent, forceDenseMode, collectionMode, showTermDocCountError, pipelineAggregators, metaData);
assert factories == null || factories.countAggregators() == 0; assert factories == null || factories.countAggregators() == 0;
this.segmentDocCounts = context.bigArrays().newIntArray(1, true); this.segmentDocCounts = context.bigArrays().newIntArray(1, true);
} }
// bucketOrd is ord + 1 to avoid a branch to deal with the missing ord
@Override @Override
protected LeafBucketCollector newCollector(final SortedSetDocValues ords, public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
LeafBucketCollector sub) { final LeafBucketCollector sub) throws IOException {
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + ords.getValueCount()); if (segmentOrds != null) {
mapSegmentCountsToGlobalCounts();
}
globalOrds = valuesSource.globalOrdinalsValues(ctx);
segmentOrds = valuesSource.ordinalsValues(ctx);
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
assert sub == LeafBucketCollector.NO_OP_COLLECTOR; assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
final SortedDocValues singleValues = DocValues.unwrapSingleton(ords); final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
if (singleValues != null) { if (singleValues != null) {
return new LeafBucketCollectorBase(sub, ords) { return new LeafBucketCollectorBase(sub, segmentOrds) {
@Override @Override
public void collect(int doc, long bucket) throws IOException { public void collect(int doc, long bucket) throws IOException {
assert bucket == 0; assert bucket == 0;
@ -367,14 +352,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
} }
}; };
} else { } else {
return new LeafBucketCollectorBase(sub, ords) { return new LeafBucketCollectorBase(sub, segmentOrds) {
@Override @Override
public void collect(int doc, long bucket) throws IOException { public void collect(int doc, long bucket) throws IOException {
assert bucket == 0; assert bucket == 0;
if (ords.advanceExact(doc)) { if (segmentOrds.advanceExact(doc)) {
for (long segmentOrd = ords.nextOrd(); for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
segmentOrd != SortedSetDocValues.NO_MORE_ORDS;
segmentOrd = ords.nextOrd()) {
segmentDocCounts.increment(segmentOrd + 1, 1); segmentDocCounts.increment(segmentOrd + 1, 1);
} }
} }
@ -383,18 +366,6 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
} }
} }
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException {
if (segmentOrds != null) {
mapSegmentCountsToGlobalCounts();
}
globalOrds = valuesSource.globalOrdinalsValues(ctx);
segmentOrds = valuesSource.ordinalsValues(ctx);
return newCollector(segmentOrds, sub);
}
@Override @Override
protected void doPostCollection() { protected void doPostCollection() {
if (segmentOrds != null) { if (segmentOrds != null) {
@ -426,7 +397,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
} }
final long ord = i - 1; // remember we do +1 when counting final long ord = i - 1; // remember we do +1 when counting
final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord); final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord);
incrementBucketDocCount(globalOrd, inc); long bucketOrd = getBucketOrd(globalOrd);
incrementBucketDocCount(bucketOrd, inc);
} }
} }
} }

View File

@ -53,10 +53,18 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
private final TermsAggregator.BucketCountThresholds bucketCountThresholds; private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
private boolean showTermDocCountError; private boolean showTermDocCountError;
public TermsAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config, BucketOrder order, public TermsAggregatorFactory(String name,
IncludeExclude includeExclude, String executionHint, SubAggCollectionMode collectMode, ValuesSourceConfig<ValuesSource> config,
TermsAggregator.BucketCountThresholds bucketCountThresholds, boolean showTermDocCountError, SearchContext context, BucketOrder order,
AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder, Map<String, Object> metaData) throws IOException { IncludeExclude includeExclude,
String executionHint,
SubAggCollectionMode collectMode,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
boolean showTermDocCountError,
SearchContext context,
AggregatorFactory<?> parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metaData) throws IOException {
super(name, config, context, parent, subFactoriesBuilder, metaData); super(name, config, context, parent, subFactoriesBuilder, metaData);
this.order = order; this.order = order;
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
@ -225,14 +233,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
MAP(new ParseField("map")) { MAP(new ParseField("map")) {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, Aggregator create(String name,
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, ValuesSource valuesSource,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) BucketOrder order,
throws IOException { DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context,
Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format); final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter, return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
} }
@Override @Override
@ -244,15 +262,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
GLOBAL_ORDINALS(new ParseField("global_ordinals")) { GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, Aggregator create(String name,
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, ValuesSource valuesSource,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) BucketOrder order,
throws IOException { DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context, Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError, format, bucketCountThresholds, filter, context, parent, false, subAggCollectMode, showTermDocCountError,
pipelineAggregators, metaData); pipelineAggregators, metaData);
} }
@Override @Override
@ -264,15 +291,25 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) { GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, Aggregator create(String name,
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, ValuesSource valuesSource,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) BucketOrder order,
throws IOException { DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context,
Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
order, format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError, order, format, bucketCountThresholds, filter, context, parent, true, subAggCollectMode,
pipelineAggregators, metaData); showTermDocCountError, pipelineAggregators, metaData);
} }
@Override @Override
@ -283,11 +320,20 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) { GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, Aggregator create(String name,
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, ValuesSource valuesSource,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) BucketOrder order,
throws IOException { DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context,
Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
if (includeExclude != null || factories.countAggregators() > 0 if (includeExclude != null || factories.countAggregators() > 0
// we need the FieldData impl to be able to extract the // we need the FieldData impl to be able to extract the
// segment to global ord mapping // segment to global ord mapping
@ -297,7 +343,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
} }
return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories,
(ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, context, parent, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, context, parent,
subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); false, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
} }
@Override @Override
@ -321,11 +368,19 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
this.parseField = parseField; this.parseField = parseField;
} }
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, abstract Aggregator create(String name,
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregatorFactories factories,
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, ValuesSource valuesSource,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) BucketOrder order,
throws IOException; DocValueFormat format,
TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude,
SearchContext context,
Aggregator parent,
SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException;
abstract boolean needsGlobalOrdinals(); abstract boolean needsGlobalOrdinals();

View File

@ -233,16 +233,14 @@ public class IncludeExclude implements Writeable, ToXContent {
} }
public abstract static class OrdinalsFilter { public abstract static class OrdinalsFilter {
public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException;
throws IOException;
} }
class PartitionedOrdinalsFilter extends OrdinalsFilter { class PartitionedOrdinalsFilter extends OrdinalsFilter {
@Override @Override
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
throws IOException {
final long numOrds = globalOrdinals.getValueCount(); final long numOrds = globalOrdinals.getValueCount();
final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds); final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds);
final TermsEnum termEnum = globalOrdinals.termsEnum(); final TermsEnum termEnum = globalOrdinals.termsEnum();
@ -271,8 +269,7 @@ public class IncludeExclude implements Writeable, ToXContent {
* *
*/ */
@Override @Override
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
TermsEnum globalTermsEnum; TermsEnum globalTermsEnum;
Terms globalTerms = new DocValuesTerms(globalOrdinals); Terms globalTerms = new DocValuesTerms(globalOrdinals);
@ -297,8 +294,7 @@ public class IncludeExclude implements Writeable, ToXContent {
} }
@Override @Override
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
if (includeValues != null) { if (includeValues != null) {
for (BytesRef term : includeValues) { for (BytesRef term : includeValues) {

View File

@ -21,10 +21,11 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.ESTestCase;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
public class TermsAggregatorFactoryTests extends ESSingleNodeTestCase { public class TermsAggregatorFactoryTests extends ESTestCase {
public void testSubAggCollectMode() throws Exception { public void testSubAggCollectMode() throws Exception {
assertThat(TermsAggregatorFactory.subAggCollectionMode(Integer.MAX_VALUE, -1), assertThat(TermsAggregatorFactory.subAggCollectionMode(Integer.MAX_VALUE, -1),
equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST)); equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));

View File

@ -35,6 +35,8 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.BucketOrder;
@ -44,7 +46,53 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import static org.hamcrest.Matchers.instanceOf;
public class TermsAggregatorTests extends AggregatorTestCase { public class TermsAggregatorTests extends AggregatorTestCase {
public void testGlobalOrdinalsExecutionHint() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
// We do not use LuceneTestCase.newSearcher because we need a DirectoryReader
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING)
.field("string")
.collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST);
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setName("string");
fieldType.setHasDocValues(true);
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
GlobalOrdinalsStringTermsAggregator globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
assertFalse(globalAgg.remapGlobalOrds());
aggregationBuilder
.subAggregation(AggregationBuilders.cardinality("card").field("string"));
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
assertFalse(globalAgg.remapGlobalOrds());
aggregationBuilder
.order(BucketOrder.aggregation("card", true));
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
assertTrue(globalAgg.remapGlobalOrds());
aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING)
.field("string")
.executionHint("global_ordinals_hash");
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
assertTrue(globalAgg.remapGlobalOrds());
indexReader.close();
directory.close();
}
public void testTermsAggregator() throws Exception { public void testTermsAggregator() throws Exception {
Directory directory = newDirectory(); Directory directory = newDirectory();

View File

@ -154,7 +154,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0); ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0);
assertThat(termsAggResult, notNullValue()); assertThat(termsAggResult, notNullValue());
assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
assertThat(termsAggResult.getLuceneDescription(), equalTo("terms")); assertThat(termsAggResult.getLuceneDescription(), equalTo("terms"));
assertThat(termsAggResult.getTime(), greaterThan(0L)); assertThat(termsAggResult.getTime(), greaterThan(0L));
Map<String, Long> termsBreakdown = termsAggResult.getTimeBreakdown(); Map<String, Long> termsBreakdown = termsAggResult.getTimeBreakdown();
@ -224,7 +224,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0); ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0);
assertThat(termsAggResult, notNullValue()); assertThat(termsAggResult, notNullValue());
assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
assertThat(termsAggResult.getLuceneDescription(), equalTo("terms")); assertThat(termsAggResult.getLuceneDescription(), equalTo("terms"));
assertThat(termsAggResult.getTime(), greaterThan(0L)); assertThat(termsAggResult.getTime(), greaterThan(0L));
Map<String, Long> termsBreakdown = termsAggResult.getTimeBreakdown(); Map<String, Long> termsBreakdown = termsAggResult.getTimeBreakdown();
@ -355,7 +355,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
ProfileResult tagsAggResult = histoAggResult.getProfiledChildren().get(0); ProfileResult tagsAggResult = histoAggResult.getProfiledChildren().get(0);
assertThat(tagsAggResult, notNullValue()); assertThat(tagsAggResult, notNullValue());
assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags")); assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags"));
assertThat(tagsAggResult.getTime(), greaterThan(0L)); assertThat(tagsAggResult.getTime(), greaterThan(0L));
Map<String, Long> tagsBreakdown = tagsAggResult.getTimeBreakdown(); Map<String, Long> tagsBreakdown = tagsAggResult.getTimeBreakdown();
@ -406,7 +406,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
ProfileResult stringsAggResult = histoAggResult.getProfiledChildren().get(1); ProfileResult stringsAggResult = histoAggResult.getProfiledChildren().get(1);
assertThat(stringsAggResult, notNullValue()); assertThat(stringsAggResult, notNullValue());
assertThat(stringsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); assertThat(stringsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
assertThat(stringsAggResult.getLuceneDescription(), equalTo("strings")); assertThat(stringsAggResult.getLuceneDescription(), equalTo("strings"));
assertThat(stringsAggResult.getTime(), greaterThan(0L)); assertThat(stringsAggResult.getTime(), greaterThan(0L));
Map<String, Long> stringsBreakdown = stringsAggResult.getTimeBreakdown(); Map<String, Long> stringsBreakdown = stringsAggResult.getTimeBreakdown();
@ -457,7 +457,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
tagsAggResult = stringsAggResult.getProfiledChildren().get(2); tagsAggResult = stringsAggResult.getProfiledChildren().get(2);
assertThat(tagsAggResult, notNullValue()); assertThat(tagsAggResult, notNullValue());
assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags")); assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags"));
assertThat(tagsAggResult.getTime(), greaterThan(0L)); assertThat(tagsAggResult.getTime(), greaterThan(0L));
tagsBreakdown = tagsAggResult.getTimeBreakdown(); tagsBreakdown = tagsAggResult.getTimeBreakdown();