diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java index 867ac004ba9..d3f89567ab9 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java @@ -42,16 +42,14 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri protected long numCollectedDocs; protected final SignificantTermsAggregatorFactory termsAggFactory; - protected long shardMinDocCount; public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, - long estimatedBucketCount, long maxOrd, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, + long estimatedBucketCount, long maxOrd, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, bucketCountThresholds, includeExclude, aggregationContext, parent); this.termsAggFactory = termsAggFactory; - this.shardMinDocCount = shardMinDocCount; } @Override @@ -68,11 +66,11 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri } final int size; - if (minDocCount == 0) { + if (bucketCountThresholds.getMinDocCount() == 0) { // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns - size = (int) Math.min(globalOrdinals.getMaxOrd(), shardSize); + size = (int) Math.min(globalOrdinals.getMaxOrd(), bucketCountThresholds.getShardSize()); } else { - size = (int) Math.min(maxBucketOrd(), shardSize); + size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); } long supersetSize = termsAggFactory.prepareBackground(context); long subsetSize = numCollectedDocs; @@ -85,7 +83,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri } final long bucketOrd = getBucketOrd(globalTermOrd); final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); - if (minDocCount > 0 && bucketDocCount == 0) { + if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) { continue; } if (spare == null) { @@ -102,7 +100,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri // Back at the central reducer these properties will be updated with // global stats spare.updateScore(); - if (spare.subsetDf >= shardMinDocCount) { + if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) { spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); } } @@ -116,7 +114,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri list[i] = bucket; } - return new SignificantStringTerms(subsetSize, supersetSize, name, requiredSize, minDocCount, Arrays.asList(list)); + return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list)); } @Override @@ -125,7 +123,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri ContextIndexSearcher searcher = context.searchContext().searcher(); IndexReader topReader = searcher.getIndexReader(); int supersetSize = topReader.numDocs(); - return new SignificantStringTerms(0, supersetSize, name, requiredSize, minDocCount, Collections.emptyList()); + return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.emptyList()); } @Override @@ -137,8 +135,8 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri private final LongHash bucketOrds; - public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggFactory); + public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { + super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggFactory); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java index 82e20ecba9f..4f085f9d892 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java @@ -39,17 +39,15 @@ import java.util.Collections; public class SignificantLongTermsAggregator extends LongTermsAggregator { public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, - long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, + long estimatedBucketCount, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, format, estimatedBucketCount, null, requiredSize, shardSize, minDocCount, aggregationContext, parent); + super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent); this.termsAggFactory = termsAggFactory; - this.shardMinDocCount = shardMinDocCount; } protected long numCollectedDocs; private final SignificantTermsAggregatorFactory termsAggFactory; - protected long shardMinDocCount; @Override public void collect(int doc, long owningBucketOrdinal) throws IOException { @@ -61,7 +59,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator { public SignificantLongTerms buildAggregation(long owningBucketOrdinal) { assert owningBucketOrdinal == 0; - final int size = (int) Math.min(bucketOrds.size(), shardSize); + final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); long supersetSize = termsAggFactory.prepareBackground(context); long subsetSize = numCollectedDocs; @@ -83,7 +81,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator { spare.updateScore(); spare.bucketOrd = i; - if (spare.subsetDf >= shardMinDocCount) { + if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) { spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare); } } @@ -94,7 +92,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator { bucket.aggregations = bucketAggregations(bucket.bucketOrd); list[i] = bucket; } - return new SignificantLongTerms(subsetSize, supersetSize, name, formatter, requiredSize, minDocCount, Arrays.asList(list)); + return new SignificantLongTerms(subsetSize, supersetSize, name, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list)); } @Override @@ -103,7 +101,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator { ContextIndexSearcher searcher = context.searchContext().searcher(); IndexReader topReader = searcher.getIndexReader(); int supersetSize = topReader.numDocs(); - return new SignificantLongTerms(0, supersetSize, name, formatter, requiredSize, minDocCount, Collections.emptyList()); + return new SignificantLongTerms(0, supersetSize, name, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.emptyList()); } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java index 67d05200586..3aff7e0f7c6 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java @@ -45,17 +45,14 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { protected long numCollectedDocs; protected final SignificantTermsAggregatorFactory termsAggFactory; - protected long shardMinDocCount; public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, - long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, + long estimatedBucketCount, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, estimatedBucketCount, null, requiredSize, shardSize, - minDocCount, includeExclude, aggregationContext, parent); + super(name, factories, valuesSource, estimatedBucketCount, null, bucketCountThresholds, includeExclude, aggregationContext, parent); this.termsAggFactory = termsAggFactory; - this.shardMinDocCount = shardMinDocCount; } @Override @@ -68,7 +65,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { public SignificantStringTerms buildAggregation(long owningBucketOrdinal) { assert owningBucketOrdinal == 0; - final int size = (int) Math.min(bucketOrds.size(), shardSize); + final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); long supersetSize = termsAggFactory.prepareBackground(context); long subsetSize = numCollectedDocs; @@ -91,7 +88,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { spare.updateScore(); spare.bucketOrd = i; - if (spare.subsetDf >= shardMinDocCount) { + if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) { spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); } } @@ -105,7 +102,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { list[i] = bucket; } - return new SignificantStringTerms(subsetSize, supersetSize, name, requiredSize, minDocCount, Arrays.asList(list)); + return new SignificantStringTerms(subsetSize, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list)); } @Override @@ -114,7 +111,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { ContextIndexSearcher searcher = context.searchContext().searcher(); IndexReader topReader = searcher.getIndexReader(); int supersetSize = topReader.numDocs(); - return new SignificantStringTerms(0, supersetSize, name, requiredSize, minDocCount, Collections.emptyList()); + return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.emptyList()); } @Override @@ -133,9 +130,9 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { private LongArray ordinalToBucket; public WithOrdinals(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, - long esitmatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, + long esitmatedBucketCount, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, esitmatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, null, aggregationContext, parent, termsAggFactory); + super(name, factories, valuesSource, esitmatedBucketCount, bucketCountThresholds, null, aggregationContext, parent, termsAggFactory); this.valuesSource = valuesSource; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index a3d7e937d46..51b49e43a40 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -31,6 +31,7 @@ import org.elasticsearch.common.lucene.index.FilterableTermsEnum; import org.elasticsearch.common.lucene.index.FreqTermsEnum; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.aggregations.*; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.support.AggregationContext; @@ -52,9 +53,9 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, + TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { - return new SignificantStringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); + return new SignificantStringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -67,12 +68,12 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, + TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { if (includeExclude != null) { - return MAP.create(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); + return MAP.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory); } - return new SignificantStringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory); + return new SignificantStringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -85,12 +86,12 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, + TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource; IndexSearcher indexSearcher = aggregationContext.searchContext().searcher(); long maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher); - return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); + return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -103,9 +104,9 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, + TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { - return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); + return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -130,7 +131,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, + TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory); abstract boolean needsGlobalOrdinals(); @@ -140,11 +141,6 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac return parseField.getPreferredName(); } } - - private final int requiredSize; - private final int shardSize; - private final long minDocCount; - private final long shardMinDocCount; private final IncludeExclude includeExclude; private final String executionHint; private String indexedFieldName; @@ -152,16 +148,13 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac private FilterableTermsEnum termsEnum; private int numberOfAggregatorsCreated = 0; private Filter filter; + private final TermsAggregator.BucketCountThresholds bucketCountThresholds; - public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, int requiredSize, - int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, + public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, String executionHint, Filter filter) { super(name, SignificantStringTerms.TYPE.name(), valueSourceConfig); - this.requiredSize = requiredSize; - this.shardSize = shardSize; - this.minDocCount = minDocCount; - this.shardMinDocCount = shardMinDocCount; + this.bucketCountThresholds = bucketCountThresholds; this.includeExclude = includeExclude; this.executionHint = executionHint; if (!valueSourceConfig.unmapped()) { @@ -173,7 +166,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) { - final InternalAggregation aggregation = new UnmappedSignificantTerms(name, requiredSize, minDocCount); + final InternalAggregation aggregation = new UnmappedSignificantTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount()); return new NonCollectingAggregator(name, aggregationContext, parent) { @Override public InternalAggregation buildEmptyAggregation() { @@ -205,7 +198,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } assert execution != null; valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals()); - return execution.create(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, this); + return execution.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, this); } if (includeExclude != null) { @@ -218,7 +211,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { throw new UnsupportedOperationException("No support for examining floating point numerics"); } - return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, this); + return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this); } throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index 2e636078191..7340ea1e8fd 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.FilterBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator; import java.io.IOException; @@ -34,11 +35,9 @@ import java.io.IOException; */ public class SignificantTermsBuilder extends AggregationBuilder { + private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(-1, -1, -1, -1); + private String field; - private int requiredSize = AbstractTermsParametersParser.DEFAULT_REQUIRED_SIZE; - private int shardSize = AbstractTermsParametersParser.DEFAULT_SHARD_SIZE; - private int minDocCount = AbstractTermsParametersParser.DEFAULT_MIN_DOC_COUNT; - private int shardMinDocCount = AbstractTermsParametersParser.DEFAULT_SHARD_MIN_DOC_COUNT; private String executionHint; private String includePattern; private int includeFlags; @@ -57,17 +56,17 @@ public class SignificantTermsBuilder extends AggregationBuilder entries from every shards in order to return - if (shardSize < aggParser.getRequiredSize()) { - shardSize = aggParser.getRequiredSize(); - } - - long shardMinDocCount = aggParser.getShardMinDocCount(); - // shard_min_doc_count should not be larger than min_doc_count because this can cause buckets to be removed that would match the min_doc_count criteria - if (shardMinDocCount > aggParser.getMinDocCount()) { - shardMinDocCount = aggParser.getMinDocCount(); - } - return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), aggParser.getRequiredSize(), shardSize, aggParser.getMinDocCount(), shardMinDocCount, aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getFilter()); + bucketCountThresholds.ensureValidity(); + return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), bucketCountThresholds, aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getFilter()); } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java index 8c0b67d7875..25bb14219f3 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java @@ -22,26 +22,19 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.InternalAggregation; -import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import java.util.Collections; -abstract class AbstractStringTermsAggregator extends BucketsAggregator { +abstract class AbstractStringTermsAggregator extends TermsAggregator { protected final InternalOrder order; - protected final int requiredSize; - protected final int shardSize; - protected final long minDocCount; public AbstractStringTermsAggregator(String name, AggregatorFactories factories, long estimatedBucketsCount, AggregationContext context, Aggregator parent, - InternalOrder order, int requiredSize, int shardSize, long minDocCount) { - super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketsCount, context, parent); + InternalOrder order, BucketCountThresholds bucketCountThresholds) { + super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketsCount, context, parent, bucketCountThresholds); this.order = InternalOrder.validate(order, this); - this.requiredSize = requiredSize; - this.shardSize = shardSize; - this.minDocCount = minDocCount; } @Override @@ -51,7 +44,7 @@ abstract class AbstractStringTermsAggregator extends BucketsAggregator { @Override public InternalAggregation buildEmptyAggregation() { - return new StringTerms(name, order, requiredSize, minDocCount, Collections.emptyList()); + return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.emptyList()); } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java index 71adcab3c40..e8493cb0933 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java @@ -29,37 +29,20 @@ import java.io.IOException; public abstract class AbstractTermsParametersParser { - public static final int DEFAULT_REQUIRED_SIZE = 10; - public static final int DEFAULT_SHARD_SIZE = -1; + public static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); + public static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); + public static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); + public static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); + public static final ParseField REQUIRED_SIZE_FIELD_NAME = new ParseField("size"); - //Typically need more than one occurrence of something for it to be statistically significant - public static final int DEFAULT_MIN_DOC_COUNT = 1; - public static final int DEFAULT_SHARD_MIN_DOC_COUNT = 1; + //These are the results of the parsing. + private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(); - static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); - static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); - static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); - static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); + private String executionHint = null; - public int getRequiredSize() { - return requiredSize; - } + IncludeExclude includeExclude; - public int getShardSize() { - return shardSize; - } - - public void setMinDocCount(long minDocCount) { - this.minDocCount = minDocCount; - } - - public long getMinDocCount() { - return minDocCount; - } - - public long getShardMinDocCount() { - return shardMinDocCount; - } + public TermsAggregator.BucketCountThresholds getBucketCountThresholds() {return bucketCountThresholds;} //These are the results of the parsing. @@ -71,17 +54,10 @@ public abstract class AbstractTermsParametersParser { return includeExclude; } - private int requiredSize = DEFAULT_REQUIRED_SIZE; - private int shardSize = DEFAULT_SHARD_SIZE; - private long minDocCount = DEFAULT_MIN_DOC_COUNT; - private long shardMinDocCount = DEFAULT_SHARD_MIN_DOC_COUNT; - private String executionHint = null; - IncludeExclude includeExclude; - public void parse(String aggregationName, XContentParser parser, SearchContext context, ValuesSourceParser vsParser, IncludeExclude.Parser incExcParser) throws IOException { + bucketCountThresholds = getDefaultBucketCountThresholds(); XContentParser.Token token; String currentFieldName = null; - setDefaults(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -97,14 +73,14 @@ public abstract class AbstractTermsParametersParser { parseSpecial(aggregationName, parser, context, token, currentFieldName); } } else if (token == XContentParser.Token.VALUE_NUMBER) { - if ("size".equals(currentFieldName)) { - requiredSize = parser.intValue(); + if (REQUIRED_SIZE_FIELD_NAME.match(currentFieldName)) { + bucketCountThresholds.setRequiredSize(parser.intValue()); } else if (SHARD_SIZE_FIELD_NAME.match(currentFieldName)) { - shardSize = parser.intValue(); + bucketCountThresholds.setShardSize(parser.intValue()); } else if (MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) { - minDocCount = parser.intValue(); + bucketCountThresholds.setMinDocCount(parser.intValue()); } else if (SHARD_MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) { - shardMinDocCount = parser.longValue(); + bucketCountThresholds.setShardMinDocCount(parser.longValue()); } else { parseSpecial(aggregationName, parser, context, token, currentFieldName); } @@ -117,5 +93,5 @@ public abstract class AbstractTermsParametersParser { public abstract void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException; - public abstract void setDefaults(); + protected abstract TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds(); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java index 4731d11753a..c9e0894e76a 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java @@ -25,7 +25,6 @@ import org.elasticsearch.common.util.LongHash; import org.elasticsearch.index.fielddata.DoubleValues; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; -import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -39,26 +38,20 @@ import java.util.Collections; /** * */ -public class DoubleTermsAggregator extends BucketsAggregator { +public class DoubleTermsAggregator extends TermsAggregator { private final InternalOrder order; - private final int requiredSize; - private final int shardSize; - private final long minDocCount; private final ValuesSource.Numeric valuesSource; private final ValueFormatter formatter; private final LongHash bucketOrds; private DoubleValues values; public DoubleTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount, - InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { - super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent); + InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) { + super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent, bucketCountThresholds); this.valuesSource = valuesSource; this.formatter = format != null ? format.formatter() : null; this.order = InternalOrder.validate(order, this); - this.requiredSize = requiredSize; - this.shardSize = shardSize; - this.minDocCount = minDocCount; bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); } @@ -94,7 +87,7 @@ public class DoubleTermsAggregator extends BucketsAggregator { public DoubleTerms buildAggregation(long owningBucketOrdinal) { assert owningBucketOrdinal == 0; - if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) { + if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) { // we need to fill-in the blanks for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) { context.setNextReader(ctx); @@ -108,7 +101,7 @@ public class DoubleTermsAggregator extends BucketsAggregator { } } - final int size = (int) Math.min(bucketOrds.size(), shardSize); + final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); DoubleTerms.Bucket spare = null; @@ -128,12 +121,12 @@ public class DoubleTermsAggregator extends BucketsAggregator { bucket.aggregations = bucketAggregations(bucket.bucketOrd); list[i] = bucket; } - return new DoubleTerms(name, order, formatter, requiredSize, minDocCount, Arrays.asList(list)); + return new DoubleTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list)); } @Override public DoubleTerms buildEmptyAggregation() { - return new DoubleTerms(name, order, formatter, requiredSize, minDocCount, Collections.emptyList()); + return new DoubleTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.emptyList()); } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index c4f86db6e13..adeec5741fa 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -62,9 +62,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr protected LongBitSet acceptedGlobalOrdinals; public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, + long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - super(name, factories, maxOrd, aggregationContext, parent, order, requiredSize, shardSize, minDocCount); + super(name, factories, maxOrd, aggregationContext, parent, order, bucketCountThresholds); this.valuesSource = valuesSource; this.includeExclude = includeExclude; } @@ -115,11 +115,11 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr } final int size; - if (minDocCount == 0) { + if (bucketCountThresholds.getMinDocCount() == 0) { // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns - size = (int) Math.min(globalOrdinals.getMaxOrd(), shardSize); + size = (int) Math.min(globalOrdinals.getMaxOrd(), bucketCountThresholds.getShardSize()); } else { - size = (int) Math.min(maxBucketOrd(), shardSize); + size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); } BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); StringTerms.Bucket spare = null; @@ -129,7 +129,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr } final long bucketOrd = getBucketOrd(globalTermOrd); final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd); - if (minDocCount > 0 && bucketDocCount == 0) { + if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) { continue; } if (spare == null) { @@ -148,7 +148,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr list[i] = bucket; } - return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list)); + return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list)); } /** @@ -160,10 +160,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr private final LongHash bucketOrds; public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, + long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { // Set maxOrd to estimatedBucketCount! To be conservative with memory. - super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, order, bucketCountThresholds, includeExclude, aggregationContext, parent); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); } @@ -207,8 +207,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr private LongArray current; public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { - super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent); + long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) { + super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, null, aggregationContext, parent); this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java index fab6b530bcf..bc2a0360dad 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java @@ -26,7 +26,6 @@ import org.elasticsearch.index.fielddata.LongValues; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.InternalAggregation; -import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -40,26 +39,20 @@ import java.util.Collections; /** * */ -public class LongTermsAggregator extends BucketsAggregator { +public class LongTermsAggregator extends TermsAggregator { private final InternalOrder order; - protected final int requiredSize; - protected final int shardSize; - protected final long minDocCount; protected final ValuesSource.Numeric valuesSource; protected final @Nullable ValueFormatter formatter; protected final LongHash bucketOrds; private LongValues values; public LongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount, - InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { - super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent); + InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) { + super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent, bucketCountThresholds); this.valuesSource = valuesSource; this.formatter = format != null ? format.formatter() : null; this.order = InternalOrder.validate(order, this); - this.requiredSize = requiredSize; - this.shardSize = shardSize; - this.minDocCount = minDocCount; bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); } @@ -94,7 +87,7 @@ public class LongTermsAggregator extends BucketsAggregator { public InternalAggregation buildAggregation(long owningBucketOrdinal) { assert owningBucketOrdinal == 0; - if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) { + if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) { // we need to fill-in the blanks for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) { context.setNextReader(ctx); @@ -108,7 +101,7 @@ public class LongTermsAggregator extends BucketsAggregator { } } - final int size = (int) Math.min(bucketOrds.size(), shardSize); + final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); LongTerms.Bucket spare = null; @@ -128,12 +121,12 @@ public class LongTermsAggregator extends BucketsAggregator { bucket.aggregations = bucketAggregations(bucket.bucketOrd); list[i] = bucket; } - return new LongTerms(name, order, formatter, requiredSize, minDocCount, Arrays.asList(list)); + return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list)); } @Override public InternalAggregation buildEmptyAggregation() { - return new LongTerms(name, order, formatter, requiredSize, minDocCount, Collections.emptyList()); + return new LongTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.emptyList()); } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java index 2472866c9c2..43fbfe369ca 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java @@ -53,10 +53,10 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator { private BytesValues values; public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - InternalOrder order, int requiredSize, int shardSize, long minDocCount, + InternalOrder order, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - super(name, factories, estimatedBucketCount, aggregationContext, parent, order, requiredSize, shardSize, minDocCount); + super(name, factories, estimatedBucketCount, aggregationContext, parent, order, bucketCountThresholds); this.valuesSource = valuesSource; this.includeExclude = includeExclude; bucketOrds = new BytesRefHash(estimatedBucketCount, aggregationContext.bigArrays()); @@ -138,7 +138,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator { public InternalAggregation buildAggregation(long owningBucketOrdinal) { assert owningBucketOrdinal == 0; - if (minDocCount == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < requiredSize)) { + if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) { // we need to fill-in the blanks List valuesWithOrdinals = Lists.newArrayList(); for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) { @@ -186,19 +186,19 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator { // let's try to find `shardSize` terms that matched no hit // this one needs shardSize and not requiredSize because even though terms have a count of 0 here, // they might have higher counts on other shards - for (int added = 0; added < shardSize && terms.hasNext(); ) { + for (int added = 0; added < bucketCountThresholds.getShardSize() && terms.hasNext(); ) { if (bucketOrds.add(terms.next()) >= 0) { ++added; } } } else if (order == InternalOrder.COUNT_DESC) { // add terms until there are enough buckets - while (bucketOrds.size() < requiredSize && terms.hasNext()) { + while (bucketOrds.size() < bucketCountThresholds.getRequiredSize() && terms.hasNext()) { bucketOrds.add(terms.next()); } } else if (order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) { // add the `requiredSize` least terms - for (int i = 0; i < requiredSize && terms.hasNext(); ++i) { + for (int i = 0; i < bucketCountThresholds.getRequiredSize() && terms.hasNext(); ++i) { bucketOrds.add(terms.next()); } } else { @@ -210,7 +210,7 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator { } } - final int size = (int) Math.min(bucketOrds.size(), shardSize); + final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this)); StringTerms.Bucket spare = null; @@ -233,12 +233,12 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator { list[i] = bucket; } - return new StringTerms(name, order, requiredSize, minDocCount, Arrays.asList(list)); + return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list)); } @Override public InternalAggregation buildEmptyAggregation() { - return new StringTerms(name, order, requiredSize, minDocCount, Collections.emptyList()); + return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), Collections.emptyList()); } @Override @@ -257,8 +257,8 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator { private LongArray ordinalToBucket; public WithOrdinals(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, long esitmatedBucketCount, - InternalOrder order, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent) { - super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, minDocCount, null, aggregationContext, parent); + InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent) { + super(name, factories, valuesSource, esitmatedBucketCount, order, bucketCountThresholds, null, aggregationContext, parent); this.valuesSource = valuesSource; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java new file mode 100644 index 00000000000..60f468d14a6 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregator.java @@ -0,0 +1,133 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.elasticsearch.search.aggregations.bucket.terms; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.Explicit; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.support.AggregationContext; + +import java.io.IOException; + +public abstract class TermsAggregator extends BucketsAggregator { + + public static class BucketCountThresholds { + private Explicit minDocCount; + private Explicit shardMinDocCount; + private Explicit requiredSize; + private Explicit shardSize; + + public BucketCountThresholds(long minDocCount, long shardMinDocCount, int requiredSize, int shardSize) { + this.minDocCount = new Explicit<>(minDocCount, false); + this.shardMinDocCount = new Explicit<>(shardMinDocCount, false); + this.requiredSize = new Explicit<>(requiredSize, false); + this.shardSize = new Explicit<>(shardSize, false); + } + public BucketCountThresholds() { + this(-1, -1, -1, -1); + } + + public BucketCountThresholds(BucketCountThresholds bucketCountThresholds) { + this(bucketCountThresholds.minDocCount.value(), bucketCountThresholds.shardMinDocCount.value(), bucketCountThresholds.requiredSize.value(), bucketCountThresholds.shardSize.value()); + } + + public void ensureValidity() { + + if (shardSize.value() == 0) { + setShardSize(Integer.MAX_VALUE); + } + + if (requiredSize.value() == 0) { + setRequiredSize(Integer.MAX_VALUE); + } + // shard_size cannot be smaller than size as we need to at least fetch entries from every shards in order to return + if (shardSize.value() < requiredSize.value()) { + setShardSize(requiredSize.value()); + } + + // shard_min_doc_count should not be larger than min_doc_count because this can cause buckets to be removed that would match the min_doc_count criteria + if (shardMinDocCount.value() > minDocCount.value()) { + setShardMinDocCount(minDocCount.value()); + } + + if (requiredSize.value() < 0 || minDocCount.value() < 0) { + throw new ElasticsearchException("parameters [requiredSize] and [minDocCount] must be >=0 in terms aggregation."); + } + } + + public long getShardMinDocCount() { + return shardMinDocCount.value(); + } + + public void setShardMinDocCount(long shardMinDocCount) { + this.shardMinDocCount = new Explicit<>(shardMinDocCount, true); + } + + public long getMinDocCount() { + return minDocCount.value(); + } + + public void setMinDocCount(long minDocCount) { + this.minDocCount = new Explicit<>(minDocCount, true); + } + + public int getRequiredSize() { + return requiredSize.value(); + } + + public void setRequiredSize(int requiredSize) { + this.requiredSize = new Explicit<>(requiredSize, true); + } + + public int getShardSize() { + return shardSize.value(); + } + + public void setShardSize(int shardSize) { + this.shardSize = new Explicit<>(shardSize, true); + } + + public void toXContent(XContentBuilder builder) throws IOException { + if (requiredSize.explicit()) { + builder.field(AbstractTermsParametersParser.REQUIRED_SIZE_FIELD_NAME.getPreferredName(), requiredSize.value()); + } + if (shardSize.explicit()) { + builder.field(AbstractTermsParametersParser.SHARD_SIZE_FIELD_NAME.getPreferredName(), shardSize.value()); + } + if (minDocCount.explicit()) { + builder.field(AbstractTermsParametersParser.MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), minDocCount.value()); + } + if (shardMinDocCount.explicit()) { + builder.field(AbstractTermsParametersParser.SHARD_MIN_DOC_COUNT_FIELD_NAME.getPreferredName(), shardMinDocCount.value()); + } + } + } + + protected final BucketCountThresholds bucketCountThresholds; + + public TermsAggregator(String name, BucketAggregationMode bucketAggregationMode, AggregatorFactories factories, long estimatedBucketsCount, AggregationContext context, Aggregator parent, BucketCountThresholds bucketCountThresholds) { + super(name, bucketAggregationMode, factories, estimatedBucketsCount, context, parent); + this.bucketCountThresholds = bucketCountThresholds; + } +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 5fe3dd6cba4..6cc4614e37a 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -39,9 +39,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, bucketCountThresholds, includeExclude, aggregationContext, parent); } @Override @@ -54,12 +54,12 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { if (includeExclude != null) { - return MAP.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + return MAP.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent); } - return new StringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + return new StringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent); } @Override @@ -72,9 +72,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent); } @Override @@ -87,9 +87,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent); } @Override @@ -101,12 +101,12 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { if (includeExclude != null || factories != null) { - return GLOBAL_ORDINALS.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + return GLOBAL_ORDINALS.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent); } - return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, aggregationContext, parent); } @Override @@ -131,7 +131,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { } abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, + long maxOrd, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent); abstract boolean needsGlobalOrdinals(); @@ -143,27 +143,22 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { } private final InternalOrder order; - private final int requiredSize; - private final int shardSize; - private final long minDocCount; private final IncludeExclude includeExclude; private final String executionHint; + private final TermsAggregator.BucketCountThresholds bucketCountThresholds; - public TermsAggregatorFactory(String name, ValuesSourceConfig config, InternalOrder order, int requiredSize, - int shardSize, long minDocCount, IncludeExclude includeExclude, String executionHint) { + public TermsAggregatorFactory(String name, ValuesSourceConfig config, InternalOrder order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, String executionHint) { super(name, StringTerms.TYPE.name(), config); this.order = order; - this.requiredSize = requiredSize; - this.shardSize = shardSize; - this.minDocCount = minDocCount; this.includeExclude = includeExclude; this.executionHint = executionHint; + this.bucketCountThresholds = bucketCountThresholds; } @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) { - final InternalAggregation aggregation = new UnmappedTerms(name, order, requiredSize, minDocCount); + final InternalAggregation aggregation = new UnmappedTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount()); return new NonCollectingAggregator(name, aggregationContext, parent) { @Override public InternalAggregation buildEmptyAggregation() { @@ -247,7 +242,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { assert execution != null; valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals()); - return execution.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); + return execution.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, includeExclude, aggregationContext, parent); } if (includeExclude != null) { @@ -257,9 +252,9 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { if (valuesSource instanceof ValuesSource.Numeric) { if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { - return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent); } - return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent); + return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent); } throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() + diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java index 0a0dac9b997..93e895b6af5 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsBuilder.java @@ -30,10 +30,8 @@ import java.util.Locale; */ public class TermsBuilder extends ValuesSourceAggregationBuilder { + private TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(-1, -1, -1, -1); - private int size = -1; - private int shardSize = -1; - private long minDocCount = -1; private Terms.ValueType valueType; private Terms.Order order; private String includePattern; @@ -50,7 +48,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { * Sets the size - indicating how many term buckets should be returned (defaults to 10) */ public TermsBuilder size(int size) { - this.size = size; + bucketCountThresholds.setRequiredSize(size); return this; } @@ -59,7 +57,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { * node that coordinates the search execution). The higher the shard size is, the more accurate the results are. */ public TermsBuilder shardSize(int shardSize) { - this.shardSize = shardSize; + bucketCountThresholds.setShardSize(shardSize); return this; } @@ -67,7 +65,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { * Set the minimum document count terms should have in order to appear in the response. */ public TermsBuilder minDocCount(long minDocCount) { - this.minDocCount = minDocCount; + bucketCountThresholds.setMinDocCount(minDocCount); return this; } @@ -138,14 +136,11 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { @Override protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException { - if (size >=0) { - builder.field("size", size); - } - if (shardSize >= 0) { - builder.field("shard_size", shardSize); - } - if (minDocCount >= 0) { - builder.field("min_doc_count", minDocCount); + + bucketCountThresholds.toXContent(builder); + + if (executionHint != null) { + builder.field(AbstractTermsParametersParser.EXECUTION_HINT_FIELD_NAME.getPreferredName(), executionHint); } if (valueType != null) { builder.field("value_type", valueType.name().toLowerCase(Locale.ROOT)); @@ -174,9 +169,6 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { .endObject(); } } - if (executionHint != null) { - builder.field("execution_hint", executionHint); - } return builder; } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java index fd2fc105e0b..1dae3b704ca 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java @@ -29,6 +29,8 @@ import java.io.IOException; public class TermsParametersParser extends AbstractTermsParametersParser { + private static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new TermsAggregator.BucketCountThresholds(1, 0, 10, -1); + public String getOrderKey() { return orderKey; } @@ -40,10 +42,6 @@ public class TermsParametersParser extends AbstractTermsParametersParser { String orderKey = "_count"; boolean orderAsc = false; - @Override - public void setDefaults() { - } - @Override public void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException { if (token == XContentParser.Token.START_OBJECT) { @@ -72,4 +70,8 @@ public class TermsParametersParser extends AbstractTermsParametersParser { } } + @Override + public TermsAggregator.BucketCountThresholds getDefaultBucketCountThresholds() { + return new TermsAggregator.BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS); + } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java index e1a4d4af854..398f8721acd 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java @@ -49,22 +49,10 @@ public class TermsParser implements Aggregator.Parser { IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context); aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); - int shardSize = aggParser.getShardSize(); - if (shardSize == 0) { - shardSize = Integer.MAX_VALUE; - } - - int requiredSize = aggParser.getRequiredSize(); - if (requiredSize == 0) { - requiredSize = Integer.MAX_VALUE; - } - - // shard_size cannot be smaller than size as we need to at least fetch entries from every shards in order to return - if (shardSize < requiredSize) { - shardSize = requiredSize; - } + TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds(); + bucketCountThresholds.ensureValidity(); InternalOrder order = resolveOrder(aggParser.getOrderKey(), aggParser.isOrderAsc()); - return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, requiredSize, shardSize, aggParser.getMinDocCount(), aggParser.getIncludeExclude(), aggParser.getExecutionHint()); + return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, bucketCountThresholds, aggParser.getIncludeExclude(), aggParser.getExecutionHint()); } static InternalOrder resolveOrder(String key, boolean asc) {