Terms aggregation should remap global ordinal buckets when a sub-aggregator is used to sort the terms (#24941)
`terms` aggregations at the root level use the `global_ordinals` execution hint by default. When all sub-aggregators can be run in `breadth_first` mode the collected buckets for these sub-aggs are dense (remapped after the initial pruning). But if a sub-aggregator is not deferrable and needs to collect all buckets before pruning we don't remap global ords and the aggregator needs to deal with sparse buckets. Most (if not all) aggregators expect dense buckets and uses this information to allocate memories. This change forces the remap of the global ordinals but only when there is at least one sub-aggregator that cannot be deferred. Relates #24788
This commit is contained in:
parent
2a6e6866bd
commit
ce7195d81a
|
@ -53,22 +53,28 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
protected final SignificantTermsAggregatorFactory termsAggFactory;
|
||||
private final SignificanceHeuristic significanceHeuristic;
|
||||
|
||||
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories,
|
||||
ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, DocValueFormat format,
|
||||
BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
|
||||
SearchContext context, Aggregator parent,
|
||||
SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggFactory,
|
||||
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
|
||||
|
||||
public GlobalOrdinalsSignificantTermsAggregator(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||
DocValueFormat format,
|
||||
BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude.OrdinalsFilter includeExclude,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
boolean forceRemapGlobalOrds,
|
||||
SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggFactory,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, context, parent,
|
||||
SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData);
|
||||
forceRemapGlobalOrds, SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData);
|
||||
this.significanceHeuristic = significanceHeuristic;
|
||||
this.termsAggFactory = termsAggFactory;
|
||||
this.numCollectedDocs = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
|
||||
return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
|
@ -78,18 +84,17 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException {
|
||||
assert owningBucketOrdinal == 0;
|
||||
if (globalOrds == null) { // no context in this reader
|
||||
if (valueCount == 0) { // no context in this reader
|
||||
return buildEmptyAggregation();
|
||||
}
|
||||
|
||||
final int size;
|
||||
if (bucketCountThresholds.getMinDocCount() == 0) {
|
||||
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
|
||||
size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
|
||||
size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize());
|
||||
} else {
|
||||
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
|
||||
}
|
||||
|
@ -98,7 +103,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
|
||||
BucketSignificancePriorityQueue<SignificantStringTerms.Bucket> ordered = new BucketSignificancePriorityQueue<>(size);
|
||||
SignificantStringTerms.Bucket spare = null;
|
||||
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
|
||||
for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
|
||||
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -115,7 +120,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format);
|
||||
}
|
||||
spare.bucketOrd = bucketOrd;
|
||||
copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes);
|
||||
copy(lookupGlobalOrd.apply(globalTermOrd), spare.termBytes);
|
||||
spare.subsetDf = bucketDocCount;
|
||||
spare.subsetSize = subsetSize;
|
||||
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
|
||||
|
@ -148,63 +153,13 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
IndexReader topReader = searcher.getIndexReader();
|
||||
int supersetSize = topReader.numDocs();
|
||||
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
|
||||
pipelineAggregators(), metaData(), format, 0, supersetSize, significanceHeuristic, emptyList());
|
||||
pipelineAggregators(), metaData(), format, numCollectedDocs, supersetSize, significanceHeuristic, emptyList());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() {
|
||||
super.doClose();
|
||||
Releasables.close(termsAggFactory);
|
||||
}
|
||||
|
||||
public static class WithHash extends GlobalOrdinalsSignificantTermsAggregator {
|
||||
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||
DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
|
||||
SearchContext context, Aggregator parent, SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggFactory, List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic,
|
||||
termsAggFactory, pipelineAggregators, metaData);
|
||||
bucketOrds = new LongHash(1, context.bigArrays());
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
assert bucket == 0;
|
||||
numCollectedDocs++;
|
||||
if (globalOrds.advanceExact(doc)) {
|
||||
for (long globalOrd = globalOrds.nextOrd();
|
||||
globalOrd != SortedSetDocValues.NO_MORE_ORDS;
|
||||
globalOrd = globalOrds.nextOrd()) {
|
||||
long bucketOrd = bucketOrds.add(globalOrd);
|
||||
if (bucketOrd < 0) {
|
||||
bucketOrd = -1 - bucketOrd;
|
||||
collectExistingBucket(sub, doc, bucketOrd);
|
||||
} else {
|
||||
collectBucket(sub, doc, bucketOrd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected long getBucketOrd(long termOrd) {
|
||||
return bucketOrds.find(termOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() {
|
||||
Releasables.close(termsAggFactory, bucketOrds);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -70,10 +70,17 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
|
||||
private final SignificanceHeuristic significanceHeuristic;
|
||||
|
||||
public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config, IncludeExclude includeExclude,
|
||||
String executionHint, QueryBuilder filterBuilder, TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
SignificanceHeuristic significanceHeuristic, SearchContext context, AggregatorFactory<?> parent,
|
||||
AggregatorFactories.Builder subFactoriesBuilder, Map<String, Object> metaData) throws IOException {
|
||||
public SignificantTermsAggregatorFactory(String name,
|
||||
ValuesSourceConfig<ValuesSource> config,
|
||||
IncludeExclude includeExclude,
|
||||
String executionHint,
|
||||
QueryBuilder filterBuilder,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
SignificanceHeuristic significanceHeuristic,
|
||||
SearchContext context,
|
||||
AggregatorFactory<?> parent,
|
||||
AggregatorFactories.Builder subFactoriesBuilder,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, config, context, parent, subFactoriesBuilder, metaData);
|
||||
this.includeExclude = includeExclude;
|
||||
this.executionHint = executionHint;
|
||||
|
@ -246,44 +253,71 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
MAP(new ParseField("map")) {
|
||||
|
||||
@Override
|
||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
|
||||
Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext,
|
||||
Aggregator parent,
|
||||
SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
|
||||
return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter,
|
||||
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
|
||||
|
||||
@Override
|
||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
|
||||
Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext,
|
||||
Aggregator parent,
|
||||
SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
||||
return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
|
||||
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
|
||||
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
|
||||
aggregationContext, parent, false, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {
|
||||
|
||||
@Override
|
||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
|
||||
Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext,
|
||||
Aggregator parent,
|
||||
SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
||||
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories,
|
||||
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
|
||||
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
|
||||
return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
|
||||
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent,
|
||||
true, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -302,10 +336,17 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
this.parseField = parseField;
|
||||
}
|
||||
|
||||
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
|
||||
abstract Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext aggregationContext,
|
||||
Aggregator parent,
|
||||
SignificanceHeuristic significanceHeuristic,
|
||||
SignificantTermsAggregatorFactory termsAggregatorFactory,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
|
@ -52,6 +53,8 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||
|
||||
/**
|
||||
* An aggregator of string values that relies on global ordinals in order to build buckets.
|
||||
*/
|
||||
|
@ -66,67 +69,104 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
// first defined one.
|
||||
// So currently for each instance of this aggregator the acceptedglobalValues will be computed, this is unnecessary
|
||||
// especially if this agg is on a second layer or deeper.
|
||||
protected LongBitSet acceptedGlobalOrdinals;
|
||||
protected final LongBitSet acceptedGlobalOrdinals;
|
||||
protected final long valueCount;
|
||||
protected final GlobalOrdLookupFunction lookupGlobalOrd;
|
||||
|
||||
protected SortedSetDocValues globalOrds;
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
|
||||
BucketOrder order, DocValueFormat format, BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude.OrdinalsFilter includeExclude, SearchContext context, Aggregator parent,
|
||||
SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
|
||||
public interface GlobalOrdLookupFunction {
|
||||
BytesRef apply(long ord) throws IOException;
|
||||
}
|
||||
|
||||
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories,
|
||||
ValuesSource.Bytes.WithOrdinals valuesSource,
|
||||
BucketOrder order,
|
||||
DocValueFormat format,
|
||||
BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude.OrdinalsFilter includeExclude,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
boolean forceRemapGlobalOrds,
|
||||
SubAggCollectionMode collectionMode,
|
||||
boolean showTermDocCountError,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, context, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError,
|
||||
pipelineAggregators, metaData);
|
||||
this.valuesSource = valuesSource;
|
||||
this.includeExclude = includeExclude;
|
||||
final IndexReader reader = context.searcher().getIndexReader();
|
||||
final SortedSetDocValues values = reader.leaves().size() > 0 ?
|
||||
valuesSource.globalOrdinalsValues(context.searcher().getIndexReader().leaves().get(0)) : DocValues.emptySortedSet();
|
||||
this.valueCount = values.getValueCount();
|
||||
this.lookupGlobalOrd = values::lookupOrd;
|
||||
this.acceptedGlobalOrdinals = includeExclude != null ? includeExclude.acceptedGlobalOrdinals(values) : null;
|
||||
|
||||
/**
|
||||
* Remap global ords to dense bucket ordinals if any sub-aggregator cannot be deferred.
|
||||
* Sub-aggregators expect dense buckets and allocate memories based on this assumption.
|
||||
* Deferred aggregators are safe because the selected ordinals are remapped when the buckets
|
||||
* are replayed.
|
||||
*/
|
||||
boolean remapGlobalOrds = forceRemapGlobalOrds || Arrays.stream(subAggregators).anyMatch((a) -> shouldDefer(a) == false);
|
||||
this.bucketOrds = remapGlobalOrds ? new LongHash(1, context.bigArrays()) : null;
|
||||
}
|
||||
|
||||
protected long getBucketOrd(long termOrd) {
|
||||
return termOrd;
|
||||
|
||||
boolean remapGlobalOrds() {
|
||||
return bucketOrds != null;
|
||||
}
|
||||
|
||||
protected final long getBucketOrd(long globalOrd) {
|
||||
return bucketOrds == null ? globalOrd : bucketOrds.find(globalOrd);
|
||||
}
|
||||
|
||||
private void collectGlobalOrd(int doc, long globalOrd, LeafBucketCollector sub) throws IOException {
|
||||
if (bucketOrds == null) {
|
||||
collectExistingBucket(sub, doc, globalOrd);
|
||||
} else {
|
||||
long bucketOrd = bucketOrds.add(globalOrd);
|
||||
if (bucketOrd < 0) {
|
||||
bucketOrd = -1 - bucketOrd;
|
||||
collectExistingBucket(sub, doc, bucketOrd);
|
||||
} else {
|
||||
collectBucket(sub, doc, bucketOrd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SortedSetDocValues getGlobalOrds(LeafReaderContext ctx) throws IOException {
|
||||
return acceptedGlobalOrdinals == null ?
|
||||
valuesSource.globalOrdinalsValues(ctx) : new FilteredOrdinals(valuesSource.globalOrdinalsValues(ctx), acceptedGlobalOrdinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
|
||||
globalOrds = valuesSource.globalOrdinalsValues(ctx);
|
||||
|
||||
if (acceptedGlobalOrdinals == null && includeExclude != null) {
|
||||
acceptedGlobalOrdinals = includeExclude.acceptedGlobalOrdinals(globalOrds);
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
|
||||
final SortedSetDocValues globalOrds = getGlobalOrds(ctx);
|
||||
if (bucketOrds == null) {
|
||||
grow(globalOrds.getValueCount());
|
||||
}
|
||||
|
||||
if (acceptedGlobalOrdinals != null) {
|
||||
globalOrds = new FilteredOrdinals(globalOrds, acceptedGlobalOrdinals);
|
||||
}
|
||||
|
||||
return newCollector(globalOrds, sub);
|
||||
}
|
||||
|
||||
protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
|
||||
final LeafBucketCollector sub) {
|
||||
grow(ords.getValueCount());
|
||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
|
||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
|
||||
if (singleValues != null) {
|
||||
return new LeafBucketCollectorBase(sub, ords) {
|
||||
return new LeafBucketCollectorBase(sub, globalOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
assert bucket == 0;
|
||||
if (singleValues.advanceExact(doc)) {
|
||||
final int ord = singleValues.ordValue();
|
||||
collectExistingBucket(sub, doc, ord);
|
||||
collectGlobalOrd(doc, ord, sub);
|
||||
}
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return new LeafBucketCollectorBase(sub, ords) {
|
||||
return new LeafBucketCollectorBase(sub, globalOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
assert bucket == 0;
|
||||
if (ords.advanceExact(doc)) {
|
||||
for (long globalOrd = ords.nextOrd();
|
||||
globalOrd != SortedSetDocValues.NO_MORE_ORDS;
|
||||
globalOrd = ords.nextOrd()) {
|
||||
collectExistingBucket(sub, doc, globalOrd);
|
||||
if (globalOrds.advanceExact(doc)) {
|
||||
for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
|
||||
collectGlobalOrd(doc, globalOrd, sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -145,21 +185,21 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
|
||||
@Override
|
||||
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
|
||||
if (globalOrds == null) { // no context in this reader
|
||||
if (valueCount == 0) { // no context in this reader
|
||||
return buildEmptyAggregation();
|
||||
}
|
||||
|
||||
final int size;
|
||||
if (bucketCountThresholds.getMinDocCount() == 0) {
|
||||
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
|
||||
size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize());
|
||||
size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize());
|
||||
} else {
|
||||
size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
|
||||
}
|
||||
long otherDocCount = 0;
|
||||
BucketPriorityQueue<OrdBucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
|
||||
OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0);
|
||||
for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) {
|
||||
for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) {
|
||||
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -184,10 +224,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()];
|
||||
long survivingBucketOrds[] = new long[ordered.size()];
|
||||
for (int i = ordered.size() - 1; i >= 0; --i) {
|
||||
final OrdBucket bucket = (OrdBucket) ordered.pop();
|
||||
final OrdBucket bucket = ordered.pop();
|
||||
survivingBucketOrds[i] = bucket.bucketOrd;
|
||||
BytesRef scratch = new BytesRef();
|
||||
copy(globalOrds.lookupOrd(bucket.globalOrd), scratch);
|
||||
copy(lookupGlobalOrd.apply(bucket.globalOrd), scratch);
|
||||
list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format);
|
||||
list[i].bucketOrd = bucket.bucketOrd;
|
||||
otherDocCount -= list[i].docCount;
|
||||
|
@ -254,109 +294,54 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Variant of {@link GlobalOrdinalsStringTermsAggregator} that rebases hashes in order to make them dense. Might be
|
||||
* useful in case few hashes are visited.
|
||||
*/
|
||||
public static class WithHash extends GlobalOrdinalsStringTermsAggregator {
|
||||
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, BucketOrder order,
|
||||
DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude,
|
||||
SearchContext context, Aggregator parent, SubAggCollectionMode collectionMode,
|
||||
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||
throws IOException {
|
||||
super(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, collectionMode,
|
||||
showTermDocCountError, pipelineAggregators, metaData);
|
||||
bucketOrds = new LongHash(1, context.bigArrays());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
|
||||
final LeafBucketCollector sub) {
|
||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
|
||||
if (singleValues != null) {
|
||||
return new LeafBucketCollectorBase(sub, ords) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
if (singleValues.advanceExact(doc)) {
|
||||
final int globalOrd = singleValues.ordValue();
|
||||
long bucketOrd = bucketOrds.add(globalOrd);
|
||||
if (bucketOrd < 0) {
|
||||
bucketOrd = -1 - bucketOrd;
|
||||
collectExistingBucket(sub, doc, bucketOrd);
|
||||
} else {
|
||||
collectBucket(sub, doc, bucketOrd);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return new LeafBucketCollectorBase(sub, ords) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
if (ords.advanceExact(doc)) {
|
||||
for (long globalOrd = ords.nextOrd();
|
||||
globalOrd != SortedSetDocValues.NO_MORE_ORDS;
|
||||
globalOrd = ords.nextOrd()) {
|
||||
long bucketOrd = bucketOrds.add(globalOrd);
|
||||
if (bucketOrd < 0) {
|
||||
bucketOrd = -1 - bucketOrd;
|
||||
collectExistingBucket(sub, doc, bucketOrd);
|
||||
} else {
|
||||
collectBucket(sub, doc, bucketOrd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected long getBucketOrd(long termOrd) {
|
||||
return bucketOrds.find(termOrd);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() {
|
||||
Releasables.close(bucketOrds);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Variant of {@link GlobalOrdinalsStringTermsAggregator} that resolves global ordinals post segment collection
|
||||
* instead of on the fly for each match.This is beneficial for low cardinality fields, because it can reduce
|
||||
* the amount of look-ups significantly.
|
||||
*/
|
||||
public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
|
||||
static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
|
||||
|
||||
private IntArray segmentDocCounts;
|
||||
|
||||
private SortedSetDocValues globalOrds;
|
||||
private SortedSetDocValues segmentOrds;
|
||||
|
||||
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
|
||||
BucketOrder order, DocValueFormat format,
|
||||
BucketCountThresholds bucketCountThresholds, SearchContext context, Aggregator parent,
|
||||
SubAggCollectionMode collectionMode, boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators,
|
||||
LowCardinality(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource.Bytes.WithOrdinals valuesSource,
|
||||
BucketOrder order,
|
||||
DocValueFormat format,
|
||||
BucketCountThresholds bucketCountThresholds,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
boolean forceDenseMode,
|
||||
SubAggCollectionMode collectionMode,
|
||||
boolean showTermDocCountError,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, valuesSource, order, format, bucketCountThresholds, null, context, parent, collectionMode,
|
||||
showTermDocCountError, pipelineAggregators, metaData);
|
||||
super(name, factories, valuesSource, order, format, bucketCountThresholds, null,
|
||||
context, parent, forceDenseMode, collectionMode, showTermDocCountError, pipelineAggregators, metaData);
|
||||
assert factories == null || factories.countAggregators() == 0;
|
||||
this.segmentDocCounts = context.bigArrays().newIntArray(1, true);
|
||||
}
|
||||
|
||||
// bucketOrd is ord + 1 to avoid a branch to deal with the missing ord
|
||||
@Override
|
||||
protected LeafBucketCollector newCollector(final SortedSetDocValues ords,
|
||||
LeafBucketCollector sub) {
|
||||
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + ords.getValueCount());
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
if (segmentOrds != null) {
|
||||
mapSegmentCountsToGlobalCounts();
|
||||
}
|
||||
globalOrds = valuesSource.globalOrdinalsValues(ctx);
|
||||
segmentOrds = valuesSource.ordinalsValues(ctx);
|
||||
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
|
||||
assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(ords);
|
||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
|
||||
if (singleValues != null) {
|
||||
return new LeafBucketCollectorBase(sub, ords) {
|
||||
return new LeafBucketCollectorBase(sub, segmentOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
assert bucket == 0;
|
||||
|
@ -367,14 +352,12 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
};
|
||||
} else {
|
||||
return new LeafBucketCollectorBase(sub, ords) {
|
||||
return new LeafBucketCollectorBase(sub, segmentOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long bucket) throws IOException {
|
||||
assert bucket == 0;
|
||||
if (ords.advanceExact(doc)) {
|
||||
for (long segmentOrd = ords.nextOrd();
|
||||
segmentOrd != SortedSetDocValues.NO_MORE_ORDS;
|
||||
segmentOrd = ords.nextOrd()) {
|
||||
if (segmentOrds.advanceExact(doc)) {
|
||||
for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
|
||||
segmentDocCounts.increment(segmentOrd + 1, 1);
|
||||
}
|
||||
}
|
||||
|
@ -383,18 +366,6 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
if (segmentOrds != null) {
|
||||
mapSegmentCountsToGlobalCounts();
|
||||
}
|
||||
|
||||
globalOrds = valuesSource.globalOrdinalsValues(ctx);
|
||||
segmentOrds = valuesSource.ordinalsValues(ctx);
|
||||
return newCollector(segmentOrds, sub);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doPostCollection() {
|
||||
if (segmentOrds != null) {
|
||||
|
@ -426,7 +397,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
final long ord = i - 1; // remember we do +1 when counting
|
||||
final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord);
|
||||
incrementBucketDocCount(globalOrd, inc);
|
||||
long bucketOrd = getBucketOrd(globalOrd);
|
||||
incrementBucketDocCount(bucketOrd, inc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,10 +53,18 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
|
||||
private boolean showTermDocCountError;
|
||||
|
||||
public TermsAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config, BucketOrder order,
|
||||
IncludeExclude includeExclude, String executionHint, SubAggCollectionMode collectMode,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, boolean showTermDocCountError, SearchContext context,
|
||||
AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder, Map<String, Object> metaData) throws IOException {
|
||||
public TermsAggregatorFactory(String name,
|
||||
ValuesSourceConfig<ValuesSource> config,
|
||||
BucketOrder order,
|
||||
IncludeExclude includeExclude,
|
||||
String executionHint,
|
||||
SubAggCollectionMode collectMode,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
boolean showTermDocCountError,
|
||||
SearchContext context,
|
||||
AggregatorFactory<?> parent,
|
||||
AggregatorFactories.Builder subFactoriesBuilder,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
super(name, config, context, parent, subFactoriesBuilder, metaData);
|
||||
this.order = order;
|
||||
this.includeExclude = includeExclude;
|
||||
|
@ -225,14 +233,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
MAP(new ParseField("map")) {
|
||||
|
||||
@Override
|
||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
|
||||
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||
throws IOException {
|
||||
Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
BucketOrder order,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
|
||||
return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
|
||||
context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -244,15 +262,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
GLOBAL_ORDINALS(new ParseField("global_ordinals")) {
|
||||
|
||||
@Override
|
||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
|
||||
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||
throws IOException {
|
||||
Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
BucketOrder order,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext context, Aggregator parent,
|
||||
SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
|
||||
format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError,
|
||||
format, bucketCountThresholds, filter, context, parent, false, subAggCollectMode, showTermDocCountError,
|
||||
pipelineAggregators, metaData);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -264,15 +291,25 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) {
|
||||
|
||||
@Override
|
||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
|
||||
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||
throws IOException {
|
||||
Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
BucketOrder order,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
||||
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
|
||||
order, format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError,
|
||||
pipelineAggregators, metaData);
|
||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
|
||||
order, format, bucketCountThresholds, filter, context, parent, true, subAggCollectMode,
|
||||
showTermDocCountError, pipelineAggregators, metaData);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -283,11 +320,20 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) {
|
||||
|
||||
@Override
|
||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
|
||||
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||
throws IOException {
|
||||
Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
BucketOrder order,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException {
|
||||
|
||||
if (includeExclude != null || factories.countAggregators() > 0
|
||||
// we need the FieldData impl to be able to extract the
|
||||
// segment to global ord mapping
|
||||
|
@ -297,7 +343,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
}
|
||||
return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories,
|
||||
(ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, context, parent,
|
||||
subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
|
||||
false, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -321,11 +368,19 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
this.parseField = parseField;
|
||||
}
|
||||
|
||||
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order,
|
||||
DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||
throws IOException;
|
||||
abstract Aggregator create(String name,
|
||||
AggregatorFactories factories,
|
||||
ValuesSource valuesSource,
|
||||
BucketOrder order,
|
||||
DocValueFormat format,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude,
|
||||
SearchContext context,
|
||||
Aggregator parent,
|
||||
SubAggCollectionMode subAggCollectMode,
|
||||
boolean showTermDocCountError,
|
||||
List<PipelineAggregator> pipelineAggregators,
|
||||
Map<String, Object> metaData) throws IOException;
|
||||
|
||||
abstract boolean needsGlobalOrdinals();
|
||||
|
||||
|
|
|
@ -233,16 +233,14 @@ public class IncludeExclude implements Writeable, ToXContent {
|
|||
}
|
||||
|
||||
public abstract static class OrdinalsFilter {
|
||||
public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
|
||||
throws IOException;
|
||||
public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException;
|
||||
|
||||
}
|
||||
|
||||
class PartitionedOrdinalsFilter extends OrdinalsFilter {
|
||||
|
||||
@Override
|
||||
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
|
||||
throws IOException {
|
||||
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
|
||||
final long numOrds = globalOrdinals.getValueCount();
|
||||
final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds);
|
||||
final TermsEnum termEnum = globalOrdinals.termsEnum();
|
||||
|
@ -271,8 +269,7 @@ public class IncludeExclude implements Writeable, ToXContent {
|
|||
*
|
||||
*/
|
||||
@Override
|
||||
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
|
||||
throws IOException {
|
||||
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
|
||||
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
||||
TermsEnum globalTermsEnum;
|
||||
Terms globalTerms = new DocValuesTerms(globalOrdinals);
|
||||
|
@ -297,8 +294,7 @@ public class IncludeExclude implements Writeable, ToXContent {
|
|||
}
|
||||
|
||||
@Override
|
||||
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals)
|
||||
throws IOException {
|
||||
public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException {
|
||||
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
||||
if (includeValues != null) {
|
||||
for (BytesRef term : includeValues) {
|
||||
|
|
|
@ -21,10 +21,11 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class TermsAggregatorFactoryTests extends ESSingleNodeTestCase {
|
||||
public class TermsAggregatorFactoryTests extends ESTestCase {
|
||||
public void testSubAggCollectMode() throws Exception {
|
||||
assertThat(TermsAggregatorFactory.subAggCollectionMode(Integer.MAX_VALUE, -1),
|
||||
equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
|||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.NumberFieldMapper;
|
||||
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilders;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorTestCase;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.BucketOrder;
|
||||
|
@ -44,7 +46,53 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
public class TermsAggregatorTests extends AggregatorTestCase {
|
||||
public void testGlobalOrdinalsExecutionHint() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
indexWriter.close();
|
||||
IndexReader indexReader = DirectoryReader.open(directory);
|
||||
// We do not use LuceneTestCase.newSearcher because we need a DirectoryReader
|
||||
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
||||
|
||||
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING)
|
||||
.field("string")
|
||||
.collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST);
|
||||
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
|
||||
fieldType.setName("string");
|
||||
fieldType.setHasDocValues(true);
|
||||
|
||||
TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
|
||||
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
|
||||
GlobalOrdinalsStringTermsAggregator globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
|
||||
assertFalse(globalAgg.remapGlobalOrds());
|
||||
|
||||
aggregationBuilder
|
||||
.subAggregation(AggregationBuilders.cardinality("card").field("string"));
|
||||
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
|
||||
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
|
||||
globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
|
||||
assertFalse(globalAgg.remapGlobalOrds());
|
||||
|
||||
aggregationBuilder
|
||||
.order(BucketOrder.aggregation("card", true));
|
||||
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
|
||||
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
|
||||
globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
|
||||
assertTrue(globalAgg.remapGlobalOrds());
|
||||
|
||||
aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING)
|
||||
.field("string")
|
||||
.executionHint("global_ordinals_hash");
|
||||
aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
|
||||
assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
|
||||
globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
|
||||
assertTrue(globalAgg.remapGlobalOrds());
|
||||
indexReader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testTermsAggregator() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
|
|
|
@ -154,7 +154,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
|
|||
|
||||
ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0);
|
||||
assertThat(termsAggResult, notNullValue());
|
||||
assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName()));
|
||||
assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
|
||||
assertThat(termsAggResult.getLuceneDescription(), equalTo("terms"));
|
||||
assertThat(termsAggResult.getTime(), greaterThan(0L));
|
||||
Map<String, Long> termsBreakdown = termsAggResult.getTimeBreakdown();
|
||||
|
@ -224,7 +224,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
|
|||
|
||||
ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0);
|
||||
assertThat(termsAggResult, notNullValue());
|
||||
assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName()));
|
||||
assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
|
||||
assertThat(termsAggResult.getLuceneDescription(), equalTo("terms"));
|
||||
assertThat(termsAggResult.getTime(), greaterThan(0L));
|
||||
Map<String, Long> termsBreakdown = termsAggResult.getTimeBreakdown();
|
||||
|
@ -355,7 +355,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
|
|||
|
||||
ProfileResult tagsAggResult = histoAggResult.getProfiledChildren().get(0);
|
||||
assertThat(tagsAggResult, notNullValue());
|
||||
assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName()));
|
||||
assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
|
||||
assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags"));
|
||||
assertThat(tagsAggResult.getTime(), greaterThan(0L));
|
||||
Map<String, Long> tagsBreakdown = tagsAggResult.getTimeBreakdown();
|
||||
|
@ -406,7 +406,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
|
|||
|
||||
ProfileResult stringsAggResult = histoAggResult.getProfiledChildren().get(1);
|
||||
assertThat(stringsAggResult, notNullValue());
|
||||
assertThat(stringsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName()));
|
||||
assertThat(stringsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
|
||||
assertThat(stringsAggResult.getLuceneDescription(), equalTo("strings"));
|
||||
assertThat(stringsAggResult.getTime(), greaterThan(0L));
|
||||
Map<String, Long> stringsBreakdown = stringsAggResult.getTimeBreakdown();
|
||||
|
@ -457,7 +457,7 @@ public class AggregationProfilerIT extends ESIntegTestCase {
|
|||
|
||||
tagsAggResult = stringsAggResult.getProfiledChildren().get(2);
|
||||
assertThat(tagsAggResult, notNullValue());
|
||||
assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName()));
|
||||
assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName()));
|
||||
assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags"));
|
||||
assertThat(tagsAggResult.getTime(), greaterThan(0L));
|
||||
tagsBreakdown = tagsAggResult.getTimeBreakdown();
|
||||
|
|
Loading…
Reference in New Issue