[TESTS] Fix MinDocCountTests.

The new include/exclude support for global ordinals didn't exclude terms in
`buildAggregation` (which is required if minDocCount is 0).
This commit is contained in:
Adrien Grand 2014-05-12 15:44:22 +02:00
parent 9361305177
commit 6d9da390ed
6 changed files with 39 additions and 57 deletions

View File

@ -80,6 +80,9 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
SignificantStringTerms.Bucket spare = null;
for (long globalTermOrd = Ordinals.MIN_ORDINAL; globalTermOrd < globalOrdinals.getMaxOrd(); ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
continue;
}
final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (minDocCount > 0 && bucketDocCount == 0) {

View File

@ -70,7 +70,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
int requiredSize, int shardSize, long minDocCount, long shardMinDocCount, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory) {
if (includeExclude != null) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
return MAP.create(name, factories, valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, aggregationContext, parent, termsAggregatorFactory);
}
return new SignificantStringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, requiredSize, shardSize, minDocCount, shardMinDocCount, aggregationContext, parent, termsAggregatorFactory);
}

View File

@ -48,7 +48,7 @@ import java.util.Arrays;
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource;
private final IncludeExclude includeExclude;
protected final IncludeExclude includeExclude;
protected BytesValues.WithOrdinals globalValues;
protected Ordinals.Docs globalOrdinals;
@ -59,7 +59,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
// first defined one.
// So currently for each instance of this aggregator the acceptedGlobalOrdinals will be computed, this is unnecessary
// especially if this agg is on a second layer or deeper.
private LongBitSet acceptedGlobalOrdinals;
protected LongBitSet acceptedGlobalOrdinals;
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount,
@ -124,6 +124,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
StringTerms.Bucket spare = null;
for (long globalTermOrd = Ordinals.MIN_ORDINAL; globalTermOrd < globalOrdinals.getMaxOrd(); ++globalTermOrd) {
if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) {
continue;
}
final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (minDocCount > 0 && bucketDocCount == 0) {

View File

@ -57,7 +57,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) {
if (includeExclude != null) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
return MAP.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
}
return new StringTermsAggregator.WithOrdinals(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
}
@ -103,11 +103,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory {
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, int requiredSize, int shardSize, long minDocCount, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent) {
if (includeExclude != null) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode cannot filter terms.");
}
if (factories != AggregatorFactories.EMPTY) {
throw new ElasticsearchIllegalArgumentException("The `" + this + "` execution mode can only be used as a leaf aggregation");
if (includeExclude != null || factories != null) {
return GLOBAL_ORDINALS.create(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, includeExclude, aggregationContext, parent);
}
return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, estimatedBucketCount, maxOrd, order, requiredSize, shardSize, minDocCount, aggregationContext, parent);
}

View File

@ -26,23 +26,22 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.util.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.*;
/**
*
@ -54,12 +53,6 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest {
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
}
public String randomExecutionHintNoOrdinals() {
EnumSet<SignificantTermsAggregatorFactory.ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
modes.remove(ExecutionMode.ORDINALS);
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
}
@Override
public Settings indexSettings() {
return ImmutableSettings.builder()
@ -130,7 +123,7 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest {
public void includeExclude() throws Exception {
SearchResponse response = client().prepareSearch("test")
.setQuery(new TermQueryBuilder("_all", "weller"))
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals())
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHint())
.exclude("weller"))
.get();
assertSearchResponse(response);
@ -149,7 +142,7 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("test")
.setQuery(new TermQueryBuilder("_all", "weller"))
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHintNoOrdinals())
.addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHint())
.include("weller"))
.get();
assertSearchResponse(response);

View File

@ -37,7 +37,6 @@ import org.hamcrest.Matchers;
import org.junit.Test;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
@ -61,19 +60,6 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
private static final String MULTI_VALUED_FIELD_NAME = "s_values";
public static String randomExecutionHint() {
EnumSet<ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
modes.remove(ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY);
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
}
public static String randomExecutionHintNoOrdinals() {
EnumSet<ExecutionMode> modes = EnumSet.allOf(ExecutionMode.class);
modes.remove(ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY);
modes.remove(ExecutionMode.ORDINALS);
return randomBoolean() ? null : randomFrom(modes.toArray()).toString();
}
public static String randomAllExecutionHint() {
return randomBoolean() ? null : randomFrom(ExecutionMode.values()).toString();
}
@ -121,7 +107,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
final int minDocCount = randomInt(1);
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.minDocCount(minDocCount)
.size(0))
@ -196,7 +182,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+"))
.execute().actionGet();
@ -219,7 +205,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("(val000|val001)"))
.execute().actionGet();
@ -242,7 +228,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+"))
.execute().actionGet();
@ -270,7 +256,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME).include("VAL00.+", Pattern.CASE_INSENSITIVE))
.execute().actionGet();
@ -294,7 +280,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME).include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
.execute().actionGet();
@ -318,7 +304,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHintNoOrdinals())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME).exclude("val0[1-9]+.+", 0))
.execute().actionGet();
@ -342,7 +328,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValueField_WithMaxSize() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.size(20)
.order(Terms.Order.term(true))) // we need to sort by terms cause we're checking the first 20 values
@ -367,7 +353,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValueField_OrderedByTermAsc() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.order(Terms.Order.term(true)))
.execute().actionGet();
@ -392,7 +378,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValueField_OrderedByTermDesc() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.order(Terms.Order.term(false)))
.execute().actionGet();
@ -471,7 +457,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void singleValuedField_WithValueScript() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.script("'foo_' + _value"))
.execute().actionGet();
@ -495,7 +481,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void multiValuedField_WithValueScript_NotUnique() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(MULTI_VALUED_FIELD_NAME)
.script("_value.substring(0,3)"))
.execute().actionGet();
@ -517,7 +503,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void multiValuedField() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(MULTI_VALUED_FIELD_NAME))
.execute().actionGet();
@ -544,7 +530,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void multiValuedField_WithValueScript() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(MULTI_VALUED_FIELD_NAME)
.script("'foo_' + _value"))
.execute().actionGet();
@ -625,7 +611,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_SingleValue() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.script("doc['" + SINGLE_VALUED_FIELD_NAME + "'].value"))
.execute().actionGet();
@ -648,7 +634,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_SingleValue_ExplicitSingleValue() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.script("doc['" + SINGLE_VALUED_FIELD_NAME + "'].value"))
.execute().actionGet();
@ -698,7 +684,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void script_MultiValued() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.script("doc['" + MULTI_VALUED_FIELD_NAME + "'].values"))
.execute().actionGet();
@ -759,7 +745,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void unmapped() throws Exception {
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.size(randomInt(5))
.field(SINGLE_VALUED_FIELD_NAME))
.execute().actionGet();
@ -776,7 +762,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
public void partiallyUnmapped() throws Exception {
SearchResponse response = client().prepareSearch("idx", "idx_unmapped").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME))
.execute().actionGet();
@ -970,7 +956,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
client().prepareSearch("idx").setTypes("type")
.addAggregation(terms("terms")
.executionHint(randomAllExecutionHint())
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.order(Terms.Order.aggregation("avg_i", true))
).execute().actionGet();