diff --git a/docs/reference/search/aggregations/bucket/significantterms-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/significantterms-aggregation.asciidoc index 1906937d036..4f6f1a95b42 100644 --- a/docs/reference/search/aggregations/bucket/significantterms-aggregation.asciidoc +++ b/docs/reference/search/aggregations/bucket/significantterms-aggregation.asciidoc @@ -319,10 +319,23 @@ It is possible to only return terms that match more than a configured number of The above aggregation would only return tags which have been found in 10 hits or more. Default value is `3`. + + + +Terms that score highly will be collected on a shard level and merged with the terms collected from other shards in a second step. However, the shard does not have the information about the global term frequencies available. The decision if a term is added to a candidate list depends only on the score computed on the shard using local shard frequencies, not the global frequencies of the word. The `min_doc_count` criterion is only applied after merging local terms statistics of all shards. In a way the decision to add the term as a candidate is made without being very _certain_ about if the term will actually reach the required `min_doc_count`. This might cause many (globally) high frequent terms to be missing in the final result if low frequent but high scoring terms populated the candidate lists. To avoid this, the `shard_size` parameter can be increased to allow more candidate terms on the shards. However, this increases memory consumption and network traffic. + +The parameter `shard_min_doc_count` regulates the _certainty_ a shard has if the term should actually be added to the candidate list or not with respect to the `min_doc_count`. Terms will only be considered if their local shard frequency within the set is higher than the `shard_min_doc_count`. If your dictionary contains many low frequent words and you are not interested in these (for example misspellings), then you can set the `shard_min_doc_count` parameter to filter out candidate terms on a shard level that will with a resonable certainty not reach the required `min_doc_count` even after merging the local frequencies. `shard_min_doc_count` is set to `1` per default and has no effect unless you explicitly set it. + + + + WARNING: Setting `min_doc_count` to `1` is generally not advised as it tends to return terms that are typos or other bizarre curiosities. Finding more than one instance of a term helps reinforce that, while still rare, the term was not the result of a one-off accident. The default value of 3 is used to provide a minimum weight-of-evidence. + Setting `shard_min_doc_count` too high will cause significant candidate terms to be filtered out on a shard level. This value should be set much lower than `min_doc_count/#shards`. + + ===== Filtering Values diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java index ec34dfeb18c..a23b99149cb 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java @@ -42,15 +42,17 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri protected long numCollectedDocs; protected final SignificantTermsAggregatorFactory termsAggFactory; + protected long shardMinDocCount; public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, - long estimatedBucketCount, long maxOrd, int requiredSize, int shardSize, long minDocCount, + long estimatedBucketCount, long maxOrd, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { super(name, factories, valuesSource, estimatedBucketCount, maxOrd, null, requiredSize, shardSize, minDocCount, aggregationContext, parent); this.termsAggFactory = termsAggFactory; + this.shardMinDocCount = shardMinDocCount; } @Override @@ -99,8 +101,9 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri // Back at the central reducer these properties will be updated with // global stats spare.updateScore(); - - spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); + if (spare.subsetDf >= shardMinDocCount) { + spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); + } } final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()]; @@ -133,8 +136,8 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri private final LongHash bucketOrds; - public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, aggregationContext, parent, termsAggFactory); + public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { + super(name, factories, valuesSource, estimatedBucketCount, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggFactory); bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays()); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java index 52dffedf924..82e20ecba9f 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java @@ -39,15 +39,17 @@ import java.util.Collections; public class SignificantLongTermsAggregator extends LongTermsAggregator { public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, - long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, + long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { super(name, factories, valuesSource, format, estimatedBucketCount, null, requiredSize, shardSize, minDocCount, aggregationContext, parent); this.termsAggFactory = termsAggFactory; + this.shardMinDocCount = shardMinDocCount; } protected long numCollectedDocs; private final SignificantTermsAggregatorFactory termsAggFactory; + protected long shardMinDocCount; @Override public void collect(int doc, long owningBucketOrdinal) throws IOException { @@ -81,7 +83,9 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator { spare.updateScore(); spare.bucketOrd = i; - spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare); + if (spare.subsetDf >= shardMinDocCount) { + spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare); + } } final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()]; diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java index 9d80acf2ede..4782a71384e 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsAggregator.java @@ -45,15 +45,17 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { protected long numCollectedDocs; protected final SignificantTermsAggregatorFactory termsAggFactory; + protected long shardMinDocCount; public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, - long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, + long estimatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { super(name, factories, valuesSource, estimatedBucketCount, null, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent); this.termsAggFactory = termsAggFactory; + this.shardMinDocCount = shardMinDocCount; } @Override @@ -90,7 +92,9 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { spare.updateScore(); spare.bucketOrd = i; - spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); + if (spare.subsetDf >= shardMinDocCount) { + spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare); + } } final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()]; @@ -130,9 +134,9 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator { private LongArray ordinalToBucket; public WithOrdinals(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, - long esitmatedBucketCount, int requiredSize, int shardSize, long minDocCount, AggregationContext aggregationContext, + long esitmatedBucketCount, int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { - super(name, factories, valuesSource, esitmatedBucketCount, requiredSize, shardSize, minDocCount, null, aggregationContext, parent, termsAggFactory); + super(name, factories, valuesSource, esitmatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, null, aggregationContext, parent, termsAggFactory); this.valuesSource = valuesSource; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 7f4442dcc04..132bbd357e6 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -52,9 +52,9 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { - return new SignificantStringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); + return new SignificantStringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -67,12 +67,12 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { if (includeExclude != null) { throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms."); } - return new SignificantStringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, aggregationContext, parent, termsAggregatorFactory); + return new SignificantStringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -85,7 +85,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { if (includeExclude != null) { throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms."); @@ -93,7 +93,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource; IndexSearcher indexSearcher = aggregationContext.searchContext().searcher(); long maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher); - return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, aggregationContext, parent, termsAggregatorFactory); + return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -106,12 +106,12 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac @Override Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) { if (includeExclude != null) { throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms."); } - return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, aggregationContext, parent, termsAggregatorFactory); + return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory); } @Override @@ -136,7 +136,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, - int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude, + int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory); abstract boolean needsGlobalOrdinals(); @@ -150,6 +150,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac private final int requiredSize; private final int shardSize; private final long minDocCount; + private final long shardMinDocCount; private final IncludeExclude includeExclude; private final String executionHint; private String indexedFieldName; @@ -159,13 +160,14 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac private Filter filter; public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, int requiredSize, - int shardSize, long minDocCount, IncludeExclude includeExclude, + int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude, String executionHint, Filter filter) { super(name, SignificantStringTerms.TYPE.name(), valueSourceConfig); this.requiredSize = requiredSize; this.shardSize = shardSize; this.minDocCount = minDocCount; + this.shardMinDocCount = shardMinDocCount; this.includeExclude = includeExclude; this.executionHint = executionHint; if (!valueSourceConfig.unmapped()) { @@ -211,7 +213,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } assert execution != null; valuesSource.setNeedsGlobalOrdinals(execution.needsGlobalOrdinals()); - return execution.create(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent, this); + return execution.create(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, this); } if (includeExclude != null) { @@ -224,7 +226,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { throw new UnsupportedOperationException("No support for examining floating point numerics"); } - return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, requiredSize, shardSize, minDocCount, aggregationContext, parent, this); + return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, this); } throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index be040e8d91c..4b6402facd5 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -36,6 +36,7 @@ public class SignificantTermsBuilder extends AggregationBuilder minDocCount) { + shardMinDocCount = minDocCount; + } + IncludeExclude includeExclude = incExcParser.includeExclude(); - return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), requiredSize, shardSize, minDocCount, includeExclude, executionHint, filter); + return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, executionHint, filter); } } diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsMinDocCountTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsMinDocCountTests.java new file mode 100644 index 00000000000..0365da900e5 --- /dev/null +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsMinDocCountTests.java @@ -0,0 +1,111 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket; + +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.query.FilterBuilders; +import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter; +import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms; +import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory; +import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsBuilder; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; +import static org.hamcrest.Matchers.equalTo; + +/** + * + */ +public class SignificantTermsMinDocCountTests extends ElasticsearchIntegrationTest { + private static final String index = "someindex"; + private static final String type = "testtype"; + public String randomExecutionHint() { + return randomBoolean() ? null : randomFrom(SignificantTermsAggregatorFactory.ExecutionMode.values()).toString(); + } + + // see https://github.com/elasticsearch/elasticsearch/issues/5998 + @Test + public void shardMinDocCountTest() throws Exception { + + String termtype = "string"; + if (randomBoolean()) { + termtype = "long"; + } + assertAcked(prepareCreate(index).setSettings(SETTING_NUMBER_OF_SHARDS, 1, SETTING_NUMBER_OF_REPLICAS, 0).addMapping(type, "{\"properties\":{\"text\": {\"type\": \"" + termtype + "\"}}}")); + ensureYellow(index); + List indexBuilders = new ArrayList<>(); + + addTermsDocs("1", 1, 0, indexBuilders);//high score but low doc freq + addTermsDocs("2", 1, 0, indexBuilders); + addTermsDocs("3", 1, 0, indexBuilders); + addTermsDocs("4", 1, 0, indexBuilders); + addTermsDocs("5", 3, 1, indexBuilders);//low score but high doc freq + addTermsDocs("6", 3, 1, indexBuilders); + addTermsDocs("7", 0, 3, indexBuilders);// make sure the terms all get score > 0 except for this one + indexRandom(true, indexBuilders); + + // first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned + SearchResponse response = client().prepareSearch(index) + .addAggregation( + (new FilterAggregationBuilder("inclass").filter(FilterBuilders.termFilter("class", true))) + .subAggregation(new SignificantTermsBuilder("mySignificantTerms").field("text").minDocCount(2).size(2).executionHint(randomExecutionHint())) + ) + .execute() + .actionGet(); + assertSearchResponse(response); + InternalFilter filteredBucket = response.getAggregations().get("inclass"); + SignificantTerms sigterms = filteredBucket.getAggregations().get("mySignificantTerms"); + assertThat(sigterms.getBuckets().size(), equalTo(0)); + + + response = client().prepareSearch(index) + .addAggregation( + (new FilterAggregationBuilder("inclass").filter(FilterBuilders.termFilter("class", true))) + .subAggregation(new SignificantTermsBuilder("mySignificantTerms").field("text").minDocCount(2).shardMinDocCount(2).size(2).executionHint(randomExecutionHint())) + ) + .execute() + .actionGet(); + assertSearchResponse(response); + filteredBucket = response.getAggregations().get("inclass"); + sigterms = filteredBucket.getAggregations().get("mySignificantTerms"); + assertThat(sigterms.getBuckets().size(), equalTo(2)); + + } + + private void addTermsDocs(String term, int numInClass, int numNotInClass, List builders) { + String sourceClass = "{\"text\": \"" + term + "\", \"class\":" + "true" + "}"; + String sourceNotClass = "{\"text\": \"" + term + "\", \"class\":" + "false" + "}"; + for (int i = 0; i < numInClass; i++) { + builders.add(client().prepareIndex(index, type).setSource(sourceClass)); + } + for (int i = 0; i < numNotInClass; i++) { + builders.add(client().prepareIndex(index, type).setSource(sourceNotClass)); + } + } + +}