refactor: make requiredSize, shardSize, minDocCount and shardMinDocCount a single parameter

Every class using these parameters has their own member where these four
are stored. This clutters the code. Because they mostly needed together
it might make sense to group them.
This commit is contained in:
Britta Weber 2014-05-09 17:54:28 +02:00
parent 8e3bcb5e2f
commit d4a0eb818e
18 changed files with 290 additions and 261 deletions

View File

@ -42,16 +42,14 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
protected long numCollectedDocs; protected long numCollectedDocs;
protected final SignificantTermsAggregatorFactory termsAggFactory; protected final SignificantTermsAggregatorFactory termsAggFactory;
protected long shardMinDocCount;
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
long estimatedBucketCount, long maxOrd, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, long estimatedBucketCount, long maxOrd, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
SignificantTermsAggregatorFactory termsAggFactory) { SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, bucketCountThresholds, includeExclude, aggregationContext, parent);
this.termsAggFactory = termsAggFactory; this.termsAggFactory = termsAggFactory;
this.shardMinDocCount = shardMinDocCount;
} }
@Override @Override
@ -68,11 +66,11 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
} }
final int size; final int size;
if (minDocCount == 0) { if (bucketCountThresholds.getMinDocCount() == 0) {
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
size = (int) Math.min(globalOrdinals.getMaxOrd(), shardSize); size = (int) Math.min(globalOrdinals.getMaxOrd(), bucketCountThresholds.getShardSize());
} else { } else {
size = (int) Math.min(maxBucketOrd(), shardSize); size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
} }
long supersetSize = termsAggFactory.prepareBackground(context); long supersetSize = termsAggFactory.prepareBackground(context);
long subsetSize = numCollectedDocs; long subsetSize = numCollectedDocs;
@ -85,7 +83,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
} }
final long bucketOrd = getBucketOrd(globalTermOrd); final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (minDocCount > 0 && bucketDocCount == 0) { if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue; continue;
} }
if (spare == null) { if (spare == null) {
@ -102,7 +100,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
// Back at the central reducer these properties will be updated with // Back at the central reducer these properties will be updated with
// global stats // global stats
spare.updateScore(); spare.updateScore();
if (spare.subsetDf >= shardMinDocCount) { if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
} }
} }
@ -116,7 +114,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
list[i] = bucket; list[i] = bucket;
} }
return new SignificantStringTerms(subsetSize, supersetSize, name, requiredSize, minDocCount, Arrays.asList(list)); return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list));
} }
@Override @Override
@ -125,7 +123,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
ContextIndexSearcher searcher = context.searchContext().searcher(); ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader(); IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs(); int supersetSize = topReader.numDocs();
return new SignificantStringTerms(0, supersetSize, name, requiredSize, minDocCount, Collections.<InternalSignificantTerms.Bucket>emptyList()); return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalSignificantTerms.Bucket>emptyList());
} }
@Override @Override
@ -137,8 +135,8 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
private final LongHash bucketOrds; private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggFactory); super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggFactory);
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
} }

View File

@ -39,17 +39,15 @@ import java.util.Collections;
public class SignificantLongTermsAggregator extends LongTermsAggregator { public class SignificantLongTermsAggregator extends LongTermsAggregator {
public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format,
long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, long estimatedBucketCount, BucketCountThresholds bucketCountThresholds,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, format, estimatedBucketCount, null, requiredSize, shardSize, minDocCount, aggregationContext, parent); super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent);
this.termsAggFactory = termsAggFactory; this.termsAggFactory = termsAggFactory;
this.shardMinDocCount = shardMinDocCount;
} }
protected long numCollectedDocs; protected long numCollectedDocs;
private final SignificantTermsAggregatorFactory termsAggFactory; private final SignificantTermsAggregatorFactory termsAggFactory;
protected long shardMinDocCount;
@Override @Override
public void collect(int doc, long owningBucketOrdinal) throws IOException { public void collect(int doc, long owningBucketOrdinal) throws IOException {
@ -61,7 +59,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
public SignificantLongTerms buildAggregation(long owningBucketOrdinal) { public SignificantLongTerms buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0; assert owningBucketOrdinal == 0;
final int size = (int) Math.min(bucketOrds.size(), shardSize); final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
long supersetSize = termsAggFactory.prepareBackground(context); long supersetSize = termsAggFactory.prepareBackground(context);
long subsetSize = numCollectedDocs; long subsetSize = numCollectedDocs;
@ -83,7 +81,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
spare.updateScore(); spare.updateScore();
spare.bucketOrd = i; spare.bucketOrd = i;
if (spare.subsetDf >= shardMinDocCount) { if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare); spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
} }
} }
@ -94,7 +92,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
bucket.aggregations = bucketAggregations(bucket.bucketOrd); bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket; list[i] = bucket;
} }
return new SignificantLongTerms(subsetSize, supersetSize, name, formatter, requiredSize, minDocCount, Arrays.asList(list)); return new SignificantLongTerms(subsetSize, supersetSize, name, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list));
} }
@Override @Override
@ -103,7 +101,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
ContextIndexSearcher searcher = context.searchContext().searcher(); ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader(); IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs(); int supersetSize = topReader.numDocs();
return new SignificantLongTerms(0, supersetSize, name, formatter, requiredSize, minDocCount, Collections.<InternalSignificantTerms.Bucket>emptyList()); return new SignificantLongTerms(0, supersetSize, name, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalSignificantTerms.Bucket>emptyList());
} }
@Override @Override

View File

@ -45,17 +45,14 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
protected long numCollectedDocs; protected long numCollectedDocs;
protected final SignificantTermsAggregatorFactory termsAggFactory; protected final SignificantTermsAggregatorFactory termsAggFactory;
protected long shardMinDocCount;
public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, long estimatedBucketCount, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
SignificantTermsAggregatorFactory termsAggFactory) { SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, estimatedBucketCount, null, requiredSize, shardSize, super(name, factories, valuesSource, estimatedBucketCount, null, bucketCountThresholds, includeExclude, aggregationContext, parent);
minDocCount, includeExclude, aggregationContext, parent);
this.termsAggFactory = termsAggFactory; this.termsAggFactory = termsAggFactory;
this.shardMinDocCount = shardMinDocCount;
} }
@Override @Override
@ -68,7 +65,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
public SignificantStringTerms buildAggregation(long owningBucketOrdinal) { public SignificantStringTerms buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0; assert owningBucketOrdinal == 0;
final int size = (int) Math.min(bucketOrds.size(), shardSize); final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
long supersetSize = termsAggFactory.prepareBackground(context); long supersetSize = termsAggFactory.prepareBackground(context);
long subsetSize = numCollectedDocs; long subsetSize = numCollectedDocs;
@ -91,7 +88,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
spare.updateScore(); spare.updateScore();
spare.bucketOrd = i; spare.bucketOrd = i;
if (spare.subsetDf >= shardMinDocCount) { if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
} }
} }
@ -105,7 +102,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
list[i] = bucket; list[i] = bucket;
} }
return new SignificantStringTerms(subsetSize, supersetSize, name, requiredSize, minDocCount, Arrays.asList(list)); return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list));
} }
@Override @Override
@ -114,7 +111,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
ContextIndexSearcher searcher = context.searchContext().searcher(); ContextIndexSearcher searcher = context.searchContext().searcher();
IndexReader topReader = searcher.getIndexReader(); IndexReader topReader = searcher.getIndexReader();
int supersetSize = topReader.numDocs(); int supersetSize = topReader.numDocs();
return new SignificantStringTerms(0, supersetSize, name, requiredSize, minDocCount, Collections.<InternalSignificantTerms.Bucket>emptyList()); return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalSignificantTerms.Bucket>emptyList());
} }
@Override @Override
@ -133,9 +130,9 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
private LongArray ordinalToBucket; private LongArray ordinalToBucket;
public WithOrdinals(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, public WithOrdinals(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource,
long esitmatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, long esitmatedBucketCount, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext,
Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) {
super(name, factories, valuesSource, esitmatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, null, aggregationContext, parent, termsAggFactory); super(name, factories, valuesSource, esitmatedBucketCount, bucketCountThresholds, null, aggregationContext, parent, termsAggFactory);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
} }

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.lucene.index.FilterableTermsEnum;
import org.elasticsearch.common.lucene.index.FreqTermsEnum; import org.elasticsearch.common.lucene.index.FreqTermsEnum;
import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.aggregations.*; import org.elasticsearch.search.aggregations.*;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.AggregationContext;
@ -52,9 +53,9 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
return new SignificantStringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); return new SignificantStringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory);
} }
@Override @Override
@ -67,12 +68,12 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
if (includeExclude != null) { if (includeExclude != null) {
return MAP.create(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); return MAP.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory);
} }
return new SignificantStringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory); return new SignificantStringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, termsAggregatorFactory);
} }
@Override @Override
@ -85,12 +86,12 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource; ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher(); IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
long maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher); long maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory);
} }
@Override @Override
@ -103,9 +104,9 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory);
} }
@Override @Override
@ -130,7 +131,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
} }
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory); AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory);
abstract boolean needsGlobalOrdinals(); abstract boolean needsGlobalOrdinals();
@ -140,11 +141,6 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
return parseField.getPreferredName(); return parseField.getPreferredName();
} }
} }
private final int requiredSize;
private final int shardSize;
private final long minDocCount;
private final long shardMinDocCount;
private final IncludeExclude includeExclude; private final IncludeExclude includeExclude;
private final String executionHint; private final String executionHint;
private String indexedFieldName; private String indexedFieldName;
@ -152,16 +148,13 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
private FilterableTermsEnum termsEnum; private FilterableTermsEnum termsEnum;
private int numberOfAggregatorsCreated = 0; private int numberOfAggregatorsCreated = 0;
private Filter filter; private Filter filter;
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, int requiredSize, public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude,
String executionHint, Filter filter) { String executionHint, Filter filter) {
super(name, SignificantStringTerms.TYPE.name(), valueSourceConfig); super(name, SignificantStringTerms.TYPE.name(), valueSourceConfig);
this.requiredSize = requiredSize; this.bucketCountThresholds = bucketCountThresholds;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
this.shardMinDocCount = shardMinDocCount;
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
this.executionHint = executionHint; this.executionHint = executionHint;
if (!valueSourceConfig.unmapped()) { if (!valueSourceConfig.unmapped()) {
@ -173,7 +166,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
@Override @Override
protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) { protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
final InternalAggregation aggregation = new UnmappedSignificantTerms(name, requiredSize, minDocCount); final InternalAggregation aggregation = new UnmappedSignificantTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount());
return new NonCollectingAggregator(name, aggregationContext, parent) { return new NonCollectingAggregator(name, aggregationContext, parent) {
@Override @Override
public InternalAggregation buildEmptyAggregation() { public InternalAggregation buildEmptyAggregation() {
@ -205,7 +198,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
} }
assert execution != null; assert execution != null;
valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals()); valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals());
return execution.create(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, this); return execution.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, this);
} }
if (includeExclude != null) { if (includeExclude != null) {
@ -218,7 +211,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
throw new UnsupportedOperationException("No support for examining floating point numerics"); throw new UnsupportedOperationException("No support for examining floating point numerics");
} }
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, this); return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this);
} }
throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() +

View File

@ -23,6 +23,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.FilterBuilder; import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser; import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
import java.io.IOException; import java.io.IOException;
@ -34,11 +35,9 @@ import java.io.IOException;
*/ */
public class SignificantTermsBuilder extends AggregationBuilder<SignificantTermsBuilder> { public class SignificantTermsBuilder extends AggregationBuilder<SignificantTermsBuilder> {
private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(-1, -1, -1, -1);
private String field; private String field;
private int requiredSize = AbstractTermsParametersParser.DEFAULT_REQUIRED_SIZE;
private int shardSize = AbstractTermsParametersParser.DEFAULT_SHARD_SIZE;
private int minDocCount = AbstractTermsParametersParser.DEFAULT_MIN_DOC_COUNT;
private int shardMinDocCount = AbstractTermsParametersParser.DEFAULT_SHARD_MIN_DOC_COUNT;
private String executionHint; private String executionHint;
private String includePattern; private String includePattern;
private int includeFlags; private int includeFlags;
@ -57,17 +56,17 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
} }
public SignificantTermsBuilder size(int requiredSize) { public SignificantTermsBuilder size(int requiredSize) {
this.requiredSize = requiredSize; bucketCountThresholds.setRequiredSize(requiredSize);
return this; return this;
} }
public SignificantTermsBuilder shardSize(int shardSize) { public SignificantTermsBuilder shardSize(int shardSize) {
this.shardSize = shardSize; bucketCountThresholds.setShardSize(shardSize);
return this; return this;
} }
public SignificantTermsBuilder minDocCount(int minDocCount) { public SignificantTermsBuilder minDocCount(int minDocCount) {
this.minDocCount = minDocCount; bucketCountThresholds.setMinDocCount(minDocCount);
return this; return this;
} }
@ -78,7 +77,7 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
public SignificantTermsBuilder shardMinDocCount(int shardMinDocCount) { public SignificantTermsBuilder shardMinDocCount(int shardMinDocCount) {
this.shardMinDocCount = shardMinDocCount; bucketCountThresholds.setShardMinDocCount(shardMinDocCount);
return this; return this;
} }
@ -137,20 +136,9 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
if (field != null) { if (field != null) {
builder.field("field", field); builder.field("field", field);
} }
if (minDocCount != AbstractTermsParametersParser.DEFAULT_MIN_DOC_COUNT) { bucketCountThresholds.toXContent(builder);
builder.field("minDocCount", minDocCount);
}
if (shardMinDocCount != AbstractTermsParametersParser.DEFAULT_SHARD_MIN_DOC_COUNT) {
builder.field("shardMinDocCount", shardMinDocCount);
}
if (requiredSize != AbstractTermsParametersParser.DEFAULT_REQUIRED_SIZE) {
builder.field("size", requiredSize);
}
if (shardSize != AbstractTermsParametersParser.DEFAULT_SHARD_SIZE) {
builder.field("shard_size", shardSize);
}
if (executionHint != null) { if (executionHint != null) {
builder.field("execution_hint", executionHint); builder.field(AbstractTermsParametersParser.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint);
} }
if (includePattern != null) { if (includePattern != null) {
if (includeFlags == 0) { if (includeFlags == 0) {
@ -180,5 +168,4 @@ public class SignificantTermsBuilder extends AggregationBuilder<SignificantTerms
return builder.endObject(); return builder.endObject();
} }
} }

View File

@ -25,6 +25,7 @@ import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser; import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException; import java.io.IOException;
@ -32,6 +33,7 @@ import java.io.IOException;
public class SignificantTermsParametersParser extends AbstractTermsParametersParser { public class SignificantTermsParametersParser extends AbstractTermsParametersParser {
private static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(3, 0, 10, -1);
public Filter getFilter() { public Filter getFilter() {
return filter; return filter;
@ -39,9 +41,8 @@ public class SignificantTermsParametersParser extends AbstractTermsParametersPar
private Filter filter = null; private Filter filter = null;
@Override public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() {
public void setDefaults() { return new TermsAggregator.BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS);
setMinDocCount(3);
} }
static final ParseField BACKGROUND_FILTER = new ParseField("background_filter"); static final ParseField BACKGROUND_FILTER = new ParseField("background_filter");

View File

@ -22,6 +22,7 @@ import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.bucket.BucketUtils; import org.elasticsearch.search.aggregations.bucket.BucketUtils;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.ValuesSourceParser; import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
@ -33,8 +34,6 @@ import java.io.IOException;
*/ */
public class SignificantTermsParser implements Aggregator.Parser { public class SignificantTermsParser implements Aggregator.Parser {
@Override @Override
public String type() { public String type() {
return SignificantStringTerms.TYPE.name(); return SignificantStringTerms.TYPE.name();
@ -51,8 +50,9 @@ public class SignificantTermsParser implements Aggregator.Parser {
.build(); .build();
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, SignificantStringTerms.TYPE, context); IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, SignificantStringTerms.TYPE, context);
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
int shardSize = aggParser.getShardSize();
if ( shardSize == aggParser.DEFAULT_SHARD_SIZE) { TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();
if (bucketCountThresholds.getShardSize() == new SignificantTermsParametersParser().getDefaultBucketCountThresholds().getShardSize()) {
//The user has not made a shardSize selection . //The user has not made a shardSize selection .
//Use default heuristic to avoid any wrong-ranking caused by distributed counting //Use default heuristic to avoid any wrong-ranking caused by distributed counting
//but request double the usual amount. //but request double the usual amount.
@ -60,20 +60,10 @@ public class SignificantTermsParser implements Aggregator.Parser {
//as the significance algorithm is in less of a position to down-select at shard-level - //as the significance algorithm is in less of a position to down-select at shard-level -
//some of the things we want to find have only one occurrence on each shard and as //some of the things we want to find have only one occurrence on each shard and as
// such are impossible to differentiate from non-significant terms at that early stage. // such are impossible to differentiate from non-significant terms at that early stage.
shardSize = 2 * BucketUtils.suggestShardSideQueueSize(aggParser.getRequiredSize(), context.numberOfShards()); bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
} }
// shard_size cannot be smaller than size as we need to at least fetch <size> entries from every shards in order to return <size> bucketCountThresholds.ensureValidity();
if (shardSize < aggParser.getRequiredSize()) { return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), bucketCountThresholds, aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getFilter());
shardSize = aggParser.getRequiredSize();
}
long shardMinDocCount = aggParser.getShardMinDocCount();
// shard_min_doc_count should not be larger than min_doc_count because this can cause buckets to be removed that would match the min_doc_count criteria
if (shardMinDocCount > aggParser.getMinDocCount()) {
shardMinDocCount = aggParser.getMinDocCount();
}
return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), aggParser.getRequiredSize(), shardSize, aggParser.getMinDocCount(), shardMinDocCount, aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getFilter());
} }
} }

View File

@ -22,26 +22,19 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.AggregationContext;
import java.util.Collections; import java.util.Collections;
abstract class AbstractStringTermsAggregator extends BucketsAggregator { abstract class AbstractStringTermsAggregator extends TermsAggregator {
protected final InternalOrder order; protected final InternalOrder order;
protected final int requiredSize;
protected final int shardSize;
protected final long minDocCount;
public AbstractStringTermsAggregator(String name, AggregatorFactories factories, public AbstractStringTermsAggregator(String name, AggregatorFactories factories,
long estimatedBucketsCount, AggregationContext context, Aggregator parent, long estimatedBucketsCount, AggregationContext context, Aggregator parent,
InternalOrder order, int requiredSize, int shardSize, long minDocCount) { InternalOrder order, BucketCountThresholds bucketCountThresholds) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketsCount, context, parent); super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketsCount, context, parent, bucketCountThresholds);
this.order = InternalOrder.validate(order, this); this.order = InternalOrder.validate(order, this);
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
} }
@Override @Override
@ -51,7 +44,7 @@ abstract class AbstractStringTermsAggregator extends BucketsAggregator {
@Override @Override
public InternalAggregation buildEmptyAggregation() { public InternalAggregation buildEmptyAggregation() {
return new StringTerms(name, order, requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList()); return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList());
} }
} }

View File

@ -29,37 +29,20 @@ import java.io.IOException;
public abstract class AbstractTermsParametersParser { public abstract class AbstractTermsParametersParser {
public static final int DEFAULT_REQUIRED_SIZE = 10; public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint");
public static final int DEFAULT_SHARD_SIZE = -1; public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size");
public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count");
public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count");
public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size");
//Typically need more than one occurrence of something for it to be statistically significant //These are the results of the parsing.
public static final int DEFAULT_MIN_DOC_COUNT = 1; private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds();
public static final int DEFAULT_SHARD_MIN_DOC_COUNT = 1;
static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); private String executionHint = null;
static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size");
static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count");
static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count");
public int getRequiredSize() { IncludeExclude includeExclude;
return requiredSize;
}
public int getShardSize() { public TermsAggregator.BucketCountThresholds getBucketCountThresholds() {return bucketCountThresholds;}
return shardSize;
}
public void setMinDocCount(long minDocCount) {
this.minDocCount = minDocCount;
}
public long getMinDocCount() {
return minDocCount;
}
public long getShardMinDocCount() {
return shardMinDocCount;
}
//These are the results of the parsing. //These are the results of the parsing.
@ -71,17 +54,10 @@ public abstract class AbstractTermsParametersParser {
return includeExclude; return includeExclude;
} }
private int requiredSize = DEFAULT_REQUIRED_SIZE;
private int shardSize = DEFAULT_SHARD_SIZE;
private long minDocCount = DEFAULT_MIN_DOC_COUNT;
private long shardMinDocCount = DEFAULT_SHARD_MIN_DOC_COUNT;
private String executionHint = null;
IncludeExclude includeExclude;
public void parse(String aggregationName, XContentParser parser, SearchContext context, ValuesSourceParser vsParser, IncludeExclude.Parser incExcParser) throws IOException { public void parse(String aggregationName, XContentParser parser, SearchContext context, ValuesSourceParser vsParser, IncludeExclude.Parser incExcParser) throws IOException {
bucketCountThresholds = getDefaultBucketCountThresholds();
XContentParser.Token token; XContentParser.Token token;
String currentFieldName = null; String currentFieldName = null;
setDefaults();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) { if (token == XContentParser.Token.FIELD_NAME) {
@ -97,14 +73,14 @@ public abstract class AbstractTermsParametersParser {
parseSpecial(aggregationName, parser, context, token, currentFieldName); parseSpecial(aggregationName, parser, context, token, currentFieldName);
} }
} else if (token == XContentParser.Token.VALUE_NUMBER) { } else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("size".equals(currentFieldName)) { if (REQUIRED_SIZE_FIELD_NAME.match(currentFieldName)) {
requiredSize = parser.intValue(); bucketCountThresholds.setRequiredSize(parser.intValue());
} else if (SHARD_SIZE_FIELD_NAME.match(currentFieldName)) { } else if (SHARD_SIZE_FIELD_NAME.match(currentFieldName)) {
shardSize = parser.intValue(); bucketCountThresholds.setShardSize(parser.intValue());
} else if (MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) { } else if (MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) {
minDocCount = parser.intValue(); bucketCountThresholds.setMinDocCount(parser.intValue());
} else if (SHARD_MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) { } else if (SHARD_MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) {
shardMinDocCount = parser.longValue(); bucketCountThresholds.setShardMinDocCount(parser.longValue());
} else { } else {
parseSpecial(aggregationName, parser, context, token, currentFieldName); parseSpecial(aggregationName, parser, context, token, currentFieldName);
} }
@ -117,5 +93,5 @@ public abstract class AbstractTermsParametersParser {
public abstract void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException; public abstract void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException;
public abstract void setDefaults(); protected abstract TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds();
} }

View File

@ -25,7 +25,6 @@ import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.index.fielddata.DoubleValues; import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue; import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource;
@ -39,26 +38,20 @@ import java.util.Collections;
/** /**
* *
*/ */
public class DoubleTermsAggregator extends BucketsAggregator { public class DoubleTermsAggregator extends TermsAggregator {
private final InternalOrder order; private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final long minDocCount;
private final ValuesSource.Numeric valuesSource; private final ValuesSource.Numeric valuesSource;
private final ValueFormatter formatter; private final ValueFormatter formatter;
private final LongHash bucketOrds; private final LongHash bucketOrds;
private DoubleValues values; private DoubleValues values;
public DoubleTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount, public DoubleTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount,
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent); super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent, bucketCountThresholds);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
this.formatter = format != null ? format.formatter() : null; this.formatter = format != null ? format.formatter() : null;
this.order = InternalOrder.validate(order, this); this.order = InternalOrder.validate(order, this);
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
} }
@ -94,7 +87,7 @@ public class DoubleTermsAggregator extends BucketsAggregator {
public DoubleTerms buildAggregation(long owningBucketOrdinal) { public DoubleTerms buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0; assert owningBucketOrdinal == 0;
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) { if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks // we need to fill-in the blanks
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) { for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
context.setNextReader(ctx); context.setNextReader(ctx);
@ -108,7 +101,7 @@ public class DoubleTermsAggregator extends BucketsAggregator {
} }
} }
final int size = (int) Math.min(bucketOrds.size(), shardSize); final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
DoubleTerms.Bucket spare = null; DoubleTerms.Bucket spare = null;
@ -128,12 +121,12 @@ public class DoubleTermsAggregator extends BucketsAggregator {
bucket.aggregations = bucketAggregations(bucket.bucketOrd); bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket; list[i] = bucket;
} }
return new DoubleTerms(name, order, formatter, requiredSize, minDocCount, Arrays.asList(list)); return new DoubleTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list));
} }
@Override @Override
public DoubleTerms buildEmptyAggregation() { public DoubleTerms buildEmptyAggregation() {
return new DoubleTerms(name, order, formatter, requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList()); return new DoubleTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList());
} }
@Override @Override

View File

@ -62,9 +62,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
protected LongBitSet acceptedGlobalOrdinals; protected LongBitSet acceptedGlobalOrdinals;
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, maxOrd, aggregationContext, parent, order, requiredSize, shardSize, minDocCount); super(name, factories, maxOrd, aggregationContext, parent, order, bucketCountThresholds);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
} }
@ -115,11 +115,11 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
} }
final int size; final int size;
if (minDocCount == 0) { if (bucketCountThresholds.getMinDocCount() == 0) {
// if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns
size = (int) Math.min(globalOrdinals.getMaxOrd(), shardSize); size = (int) Math.min(globalOrdinals.getMaxOrd(), bucketCountThresholds.getShardSize());
} else { } else {
size = (int) Math.min(maxBucketOrd(), shardSize); size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize());
} }
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
StringTerms.Bucket spare = null; StringTerms.Bucket spare = null;
@ -129,7 +129,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
} }
final long bucketOrd = getBucketOrd(globalTermOrd); final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (minDocCount > 0 && bucketDocCount == 0) { if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue; continue;
} }
if (spare == null) { if (spare == null) {
@ -148,7 +148,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
list[i] = bucket; list[i] = bucket;
} }
return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list)); return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list));
} }
/** /**
@ -160,10 +160,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
private final LongHash bucketOrds; private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext,
Aggregator parent) { Aggregator parent) {
// Set maxOrd to estimatedBucketCount! To be conservative with memory. // Set maxOrd to estimatedBucketCount! To be conservative with memory.
super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, bucketCountThresholds, includeExclude, aggregationContext, parent);
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
} }
@ -207,8 +207,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
private LongArray current; private LongArray current;
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent); super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, null, aggregationContext, parent);
this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true); this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true);
} }

View File

@ -26,7 +26,6 @@ import org.elasticsearch.index.fielddata.LongValues;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue; import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource;
@ -40,26 +39,20 @@ import java.util.Collections;
/** /**
* *
*/ */
public class LongTermsAggregator extends BucketsAggregator { public class LongTermsAggregator extends TermsAggregator {
private final InternalOrder order; private final InternalOrder order;
protected final int requiredSize;
protected final int shardSize;
protected final long minDocCount;
protected final ValuesSource.Numeric valuesSource; protected final ValuesSource.Numeric valuesSource;
protected final @Nullable ValueFormatter formatter; protected final @Nullable ValueFormatter formatter;
protected final LongHash bucketOrds; protected final LongHash bucketOrds;
private LongValues values; private LongValues values;
public LongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount, public LongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount,
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent); super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent, bucketCountThresholds);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
this.formatter = format != null ? format.formatter() : null; this.formatter = format != null ? format.formatter() : null;
this.order = InternalOrder.validate(order, this); this.order = InternalOrder.validate(order, this);
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
} }
@ -94,7 +87,7 @@ public class LongTermsAggregator extends BucketsAggregator {
public InternalAggregation buildAggregation(long owningBucketOrdinal) { public InternalAggregation buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0; assert owningBucketOrdinal == 0;
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) { if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks // we need to fill-in the blanks
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) { for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
context.setNextReader(ctx); context.setNextReader(ctx);
@ -108,7 +101,7 @@ public class LongTermsAggregator extends BucketsAggregator {
} }
} }
final int size = (int) Math.min(bucketOrds.size(), shardSize); final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
LongTerms.Bucket spare = null; LongTerms.Bucket spare = null;
@ -128,12 +121,12 @@ public class LongTermsAggregator extends BucketsAggregator {
bucket.aggregations = bucketAggregations(bucket.bucketOrd); bucket.aggregations = bucketAggregations(bucket.bucketOrd);
list[i] = bucket; list[i] = bucket;
} }
return new LongTerms(name, order, formatter, requiredSize, minDocCount, Arrays.asList(list)); return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list));
} }
@Override @Override
public InternalAggregation buildEmptyAggregation() { public InternalAggregation buildEmptyAggregation() {
return new LongTerms(name, order, formatter, requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList()); return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList());
} }
@Override @Override

View File

@ -53,10 +53,10 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
private BytesValues values; private BytesValues values;
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
InternalOrder order, int requiredSize, int shardSize, long minDocCount, InternalOrder order, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, estimatedBucketCount, aggregationContext, parent, order, requiredSize, shardSize, minDocCount); super(name, factories, estimatedBucketCount, aggregationContext, parent, order, bucketCountThresholds);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.bigArrays()); bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.bigArrays());
@ -138,7 +138,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
public InternalAggregation buildAggregation(long owningBucketOrdinal) { public InternalAggregation buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0; assert owningBucketOrdinal == 0;
if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) { if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks // we need to fill-in the blanks
List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList(); List<BytesValues.WithOrdinals> valuesWithOrdinals = Lists.newArrayList();
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) { for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
@ -186,19 +186,19 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
// let's try to find `shardSize` terms that matched no hit // let's try to find `shardSize` terms that matched no hit
// this one needs shardSize and not requiredSize because even though terms have a count of 0 here, // this one needs shardSize and not requiredSize because even though terms have a count of 0 here,
// they might have higher counts on other shards // they might have higher counts on other shards
for (int added = 0; added < shardSize && terms.hasNext(); ) { for (int added = 0; added < bucketCountThresholds.getShardSize() && terms.hasNext(); ) {
if (bucketOrds.add(terms.next()) >= 0) { if (bucketOrds.add(terms.next()) >= 0) {
++added; ++added;
} }
} }
} else if (order == InternalOrder.COUNT_DESC) { } else if (order == InternalOrder.COUNT_DESC) {
// add terms until there are enough buckets // add terms until there are enough buckets
while (bucketOrds.size() < requiredSize && terms.hasNext()) { while (bucketOrds.size() < bucketCountThresholds.getRequiredSize() && terms.hasNext()) {
bucketOrds.add(terms.next()); bucketOrds.add(terms.next());
} }
} else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) { } else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) {
// add the `requiredSize` least terms // add the `requiredSize` least terms
for (int i = 0; i < requiredSize && terms.hasNext(); ++i) { for (int i = 0; i < bucketCountThresholds.getRequiredSize() && terms.hasNext(); ++i) {
bucketOrds.add(terms.next()); bucketOrds.add(terms.next());
} }
} else { } else {
@ -210,7 +210,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
} }
} }
final int size = (int) Math.min(bucketOrds.size(), shardSize); final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
StringTerms.Bucket spare = null; StringTerms.Bucket spare = null;
@ -233,12 +233,12 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
list[i] = bucket; list[i] = bucket;
} }
return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list)); return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list));
} }
@Override @Override
public InternalAggregation buildEmptyAggregation() { public InternalAggregation buildEmptyAggregation() {
return new StringTerms(name, order, requiredSize, minDocCount, Collections.<InternalTerms.Bucket>emptyList()); return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList());
} }
@Override @Override
@ -257,8 +257,8 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
private LongArray ordinalToBucket; private LongArray ordinalToBucket;
public WithOrdinals(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, long esitmatedBucketCount, public WithOrdinals(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, long esitmatedBucketCount,
InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) {
super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent); super(name, factories, valuesSource, esitmatedBucketCount, order, bucketCountThresholds, null, aggregationContext, parent);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
} }

View File

@ -0,0 +1,133 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import java.io.IOException;
public abstract class TermsAggregator extends BucketsAggregator {
public static class BucketCountThresholds {
private Explicit<Long> minDocCount;
private Explicit<Long> shardMinDocCount;
private Explicit<Integer> requiredSize;
private Explicit<Integer> shardSize;
public BucketCountThresholds(long minDocCount, long shardMinDocCount, int requiredSize, int shardSize) {
this.minDocCount = new Explicit<>(minDocCount, false);
this.shardMinDocCount = new Explicit<>(shardMinDocCount, false);
this.requiredSize = new Explicit<>(requiredSize, false);
this.shardSize = new Explicit<>(shardSize, false);
}
public BucketCountThresholds() {
this(-1, -1, -1, -1);
}
public BucketCountThresholds(BucketCountThresholds bucketCountThresholds) {
this(bucketCountThresholds.minDocCount.value(), bucketCountThresholds.shardMinDocCount.value(), bucketCountThresholds.requiredSize.value(), bucketCountThresholds.shardSize.value());
}
public void ensureValidity() {
if (shardSize.value() == 0) {
setShardSize(Integer.MAX_VALUE);
}
if (requiredSize.value() == 0) {
setRequiredSize(Integer.MAX_VALUE);
}
// shard_size cannot be smaller than size as we need to at least fetch <size> entries from every shards in order to return <size>
if (shardSize.value() < requiredSize.value()) {
setShardSize(requiredSize.value());
}
// shard_min_doc_count should not be larger than min_doc_count because this can cause buckets to be removed that would match the min_doc_count criteria
if (shardMinDocCount.value() > minDocCount.value()) {
setShardMinDocCount(minDocCount.value());
}
if (requiredSize.value() < 0 || minDocCount.value() < 0) {
throw new ElasticsearchException("parameters [requiredSize] and [minDocCount] must be >=0 in terms aggregation.");
}
}
public long getShardMinDocCount() {
return shardMinDocCount.value();
}
public void setShardMinDocCount(long shardMinDocCount) {
this.shardMinDocCount = new Explicit<>(shardMinDocCount, true);
}
public long getMinDocCount() {
return minDocCount.value();
}
public void setMinDocCount(long minDocCount) {
this.minDocCount = new Explicit<>(minDocCount, true);
}
public int getRequiredSize() {
return requiredSize.value();
}
public void setRequiredSize(int requiredSize) {
this.requiredSize = new Explicit<>(requiredSize, true);
}
public int getShardSize() {
return shardSize.value();
}
public void setShardSize(int shardSize) {
this.shardSize = new Explicit<>(shardSize, true);
}
public void toXContent(XContentBuilder builder) throws IOException {
if (requiredSize.explicit()) {
builder.field(AbstractTermsParametersParser.REQUIRED_SIZE_FIELD_NAME.getPreferredName(), requiredSize.value());
}
if (shardSize.explicit()) {
builder.field(AbstractTermsParametersParser.SHARD_SIZE_FIELD_NAME.getPreferredName(), shardSize.value());
}
if (minDocCount.explicit()) {
builder.field(AbstractTermsParametersParser.MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), minDocCount.value());
}
if (shardMinDocCount.explicit()) {
builder.field(AbstractTermsParametersParser.SHARD_MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), shardMinDocCount.value());
}
}
}
protected final BucketCountThresholds bucketCountThresholds;
public TermsAggregator(String name, BucketAggregationMode bucketAggregationMode, AggregatorFactories factories, long estimatedBucketsCount, AggregationContext context, Aggregator parent, BucketCountThresholds bucketCountThresholds) {
super(name, bucketAggregationMode, factories, estimatedBucketsCount, context, parent);
this.bucketCountThresholds = bucketCountThresholds;
}
}

View File

@ -39,9 +39,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) { AggregationContext aggregationContext, Aggregator parent) {
return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, bucketCountThresholds, includeExclude, aggregationContext, parent);
} }
@Override @Override
@ -54,12 +54,12 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) { AggregationContext aggregationContext, Aggregator parent) {
if (includeExclude != null) { if (includeExclude != null) {
return MAP.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); return MAP.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent);
} }
return new StringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); return new StringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent);
} }
@Override @Override
@ -72,9 +72,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) { AggregationContext aggregationContext, Aggregator parent) {
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent);
} }
@Override @Override
@ -87,9 +87,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) { AggregationContext aggregationContext, Aggregator parent) {
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent);
} }
@Override @Override
@ -101,12 +101,12 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
@Override @Override
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) { AggregationContext aggregationContext, Aggregator parent) {
if (includeExclude != null || factories != null) { if (includeExclude != null || factories != null) {
return GLOBAL_ORDINALS.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); return GLOBAL_ORDINALS.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent);
} }
return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, aggregationContext, parent);
} }
@Override @Override
@ -131,7 +131,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
} }
abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent); IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent);
abstract boolean needsGlobalOrdinals(); abstract boolean needsGlobalOrdinals();
@ -143,27 +143,22 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
} }
private final InternalOrder order; private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final long minDocCount;
private final IncludeExclude includeExclude; private final IncludeExclude includeExclude;
private final String executionHint; private final String executionHint;
private final TermsAggregator.BucketCountThresholds bucketCountThresholds;
public TermsAggregatorFactory(String name, ValuesSourceConfig config, InternalOrder order, int requiredSize, public TermsAggregatorFactory(String name, ValuesSourceConfig config, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, String executionHint) {
int shardSize, long minDocCount, IncludeExclude includeExclude, String executionHint) {
super(name, StringTerms.TYPE.name(), config); super(name, StringTerms.TYPE.name(), config);
this.order = order; this.order = order;
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.minDocCount = minDocCount;
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
this.executionHint = executionHint; this.executionHint = executionHint;
this.bucketCountThresholds = bucketCountThresholds;
} }
@Override @Override
protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) { protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) {
final InternalAggregation aggregation = new UnmappedTerms(name, order, requiredSize, minDocCount); final InternalAggregation aggregation = new UnmappedTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount());
return new NonCollectingAggregator(name, aggregationContext, parent) { return new NonCollectingAggregator(name, aggregationContext, parent) {
@Override @Override
public InternalAggregation buildEmptyAggregation() { public InternalAggregation buildEmptyAggregation() {
@ -247,7 +242,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
assert execution != null; assert execution != null;
valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals()); valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals());
return execution.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); return execution.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent);
} }
if (includeExclude != null) { if (includeExclude != null) {
@ -257,9 +252,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
if (valuesSource instanceof ValuesSource.Numeric) { if (valuesSource instanceof ValuesSource.Numeric) {
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent);
} }
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent);
} }
throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() + throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() +

View File

@ -30,10 +30,8 @@ import java.util.Locale;
*/ */
public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> { public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(-1, -1, -1, -1);
private int size = -1;
private int shardSize = -1;
private long minDocCount = -1;
private Terms.ValueType valueType; private Terms.ValueType valueType;
private Terms.Order order; private Terms.Order order;
private String includePattern; private String includePattern;
@ -50,7 +48,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
* Sets the size - indicating how many term buckets should be returned (defaults to 10) * Sets the size - indicating how many term buckets should be returned (defaults to 10)
*/ */
public TermsBuilder size(int size) { public TermsBuilder size(int size) {
this.size = size; bucketCountThresholds.setRequiredSize(size);
return this; return this;
} }
@ -59,7 +57,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
* node that coordinates the search execution). The higher the shard size is, the more accurate the results are. * node that coordinates the search execution). The higher the shard size is, the more accurate the results are.
*/ */
public TermsBuilder shardSize(int shardSize) { public TermsBuilder shardSize(int shardSize) {
this.shardSize = shardSize; bucketCountThresholds.setShardSize(shardSize);
return this; return this;
} }
@ -67,7 +65,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
* Set the minimum document count terms should have in order to appear in the response. * Set the minimum document count terms should have in order to appear in the response.
*/ */
public TermsBuilder minDocCount(long minDocCount) { public TermsBuilder minDocCount(long minDocCount) {
this.minDocCount = minDocCount; bucketCountThresholds.setMinDocCount(minDocCount);
return this; return this;
} }
@ -138,14 +136,11 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
@Override @Override
protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException { protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException {
if (size >=0) {
builder.field("size", size); bucketCountThresholds.toXContent(builder);
}
if (shardSize >= 0) { if (executionHint != null) {
builder.field("shard_size", shardSize); builder.field(AbstractTermsParametersParser.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint);
}
if (minDocCount >= 0) {
builder.field("min_doc_count", minDocCount);
} }
if (valueType != null) { if (valueType != null) {
builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT)); builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT));
@ -174,9 +169,6 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
.endObject(); .endObject();
} }
} }
if (executionHint != null) {
builder.field("execution_hint", executionHint);
}
return builder; return builder;
} }
} }

View File

@ -29,6 +29,8 @@ import java.io.IOException;
public class TermsParametersParser extends AbstractTermsParametersParser { public class TermsParametersParser extends AbstractTermsParametersParser {
private static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(1, 0, 10, -1);
public String getOrderKey() { public String getOrderKey() {
return orderKey; return orderKey;
} }
@ -40,10 +42,6 @@ public class TermsParametersParser extends AbstractTermsParametersParser {
String orderKey = "_count"; String orderKey = "_count";
boolean orderAsc = false; boolean orderAsc = false;
@Override
public void setDefaults() {
}
@Override @Override
public void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException { public void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException {
if (token == XContentParser.Token.START_OBJECT) { if (token == XContentParser.Token.START_OBJECT) {
@ -72,4 +70,8 @@ public class TermsParametersParser extends AbstractTermsParametersParser {
} }
} }
@Override
public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() {
return new TermsAggregator.BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS);
}
} }

View File

@ -49,22 +49,10 @@ public class TermsParser implements Aggregator.Parser {
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context); IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context);
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
int shardSize = aggParser.getShardSize(); TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();
if (shardSize == 0) { bucketCountThresholds.ensureValidity();
shardSize = Integer.MAX_VALUE;
}
int requiredSize = aggParser.getRequiredSize();
if (requiredSize == 0) {
requiredSize = Integer.MAX_VALUE;
}
// shard_size cannot be smaller than size as we need to at least fetch <size> entries from every shards in order to return <size>
if (shardSize < requiredSize) {
shardSize = requiredSize;
}
InternalOrder order = resolveOrder(aggParser.getOrderKey(), aggParser.isOrderAsc()); InternalOrder order = resolveOrder(aggParser.getOrderKey(), aggParser.isOrderAsc());
return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, requiredSize, shardSize, aggParser.getMinDocCount(), aggParser.getIncludeExclude(), aggParser.getExecutionHint()); return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, bucketCountThresholds, aggParser.getIncludeExclude(), aggParser.getExecutionHint());
} }
static InternalOrder resolveOrder(String key, boolean asc) { static InternalOrder resolveOrder(String key, boolean asc) {