diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java index 68cf81bff15..be4367caf4a 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsAggregator.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.bucket.terms.LongTermsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.format.ValueFormat; @@ -40,9 +41,9 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator { public SignificantLongTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount, BucketCountThresholds bucketCountThresholds, - AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory) { + AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, IncludeExclude.LongFilter includeExclude) { - super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false); + super(name, factories, valuesSource, format, estimatedBucketCount, null, bucketCountThresholds, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, includeExclude); this.termsAggFactory = termsAggFactory; } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 5f74bd020ae..afc6fa829b9 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -194,9 +194,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac return execution.create(name, factories, valuesSource, estimatedBucketCount, bucketCountThresholds, includeExclude, aggregationContext, parent, this); } - if (includeExclude != null) { - throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " + - "settings as it can only be applied to string values"); + + if ((includeExclude != null) && (includeExclude.isRegexBased())) { + throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields"); } if (valuesSource instanceof ValuesSource.Numeric) { @@ -204,7 +205,11 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) { throw new UnsupportedOperationException("No support for examining floating point numerics"); } - return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this); + IncludeExclude.LongFilter longFilter = null; + if (includeExclude != null) { + longFilter = includeExclude.convertToLongFilter(); + } + return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), estimatedBucketCount, bucketCountThresholds, aggregationContext, parent, this, longFilter); } throw new AggregationExecutionException("sigfnificant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index 7a7bcb9cf8f..48d70c0f9c2 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.bucket.significant; +import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.FilterBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; @@ -45,6 +46,8 @@ public class SignificantTermsBuilder extends AggregationBuilder { */ public TermsBuilder include(String [] terms) { if (includePattern != null) { - throw new ElasticsearchIllegalArgumentException("include clause must be an array of strings or a regex, not both"); + throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both"); } this.includeTerms = terms; return this; } + + /** + * Define a set of terms that should be aggregated. + */ + public TermsBuilder include(long [] terms) { + if (includePattern != null) { + throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both"); + } + this.includeTerms = longsArrToStringArr(terms); + return this; + } + + private String[] longsArrToStringArr(long[] terms) { + String[] termsAsString = new String[terms.length]; + for (int i = 0; i < terms.length; i++) { + termsAsString[i] = Long.toString(terms[i]); + } + return termsAsString; + } + + /** + * Define a set of terms that should be aggregated. + */ + public TermsBuilder include(double [] terms) { + if (includePattern != null) { + throw new ElasticsearchIllegalArgumentException("include clause must be an array of exact values or a regex, not both"); + } + this.includeTerms = doubleArrToStringArr(terms); + return this; + } + + private String[] doubleArrToStringArr(double[] terms) { + String[] termsAsString = new String[terms.length]; + for (int i = 0; i < terms.length; i++) { + termsAsString[i] = Double.toString(terms[i]); + } + return termsAsString; + } + /** * Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular * expression is based on the {@link java.util.regex.Pattern} class. @@ -141,7 +180,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { */ public TermsBuilder exclude(String regex, int flags) { if (excludeTerms != null) { - throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both"); + throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both"); } this.excludePattern = regex; this.excludeFlags = flags; @@ -153,12 +192,36 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder { */ public TermsBuilder exclude(String [] terms) { if (excludePattern != null) { - throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both"); + throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both"); } this.excludeTerms = terms; return this; } + + /** + * Define a set of terms that should not be aggregated. + */ + public TermsBuilder exclude(long [] terms) { + if (excludePattern != null) { + throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both"); + } + this.excludeTerms = longsArrToStringArr(terms); + return this; + } + + /** + * Define a set of terms that should not be aggregated. + */ + public TermsBuilder exclude(double [] terms) { + if (excludePattern != null) { + throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both"); + } + this.excludeTerms = doubleArrToStringArr(terms); + return this; + } + + /** * When using scripts, the value type indicates the types of the values the script is generating. diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java index 57c0bbbd556..86c74e89302 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java @@ -18,12 +18,11 @@ */ package org.elasticsearch.search.aggregations.bucket.terms.support; +import com.carrotsearch.hppc.LongOpenHashSet; +import com.carrotsearch.hppc.LongSet; import org.apache.lucene.index.RandomAccessOrds; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.*; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.regex.Regex; @@ -44,6 +43,35 @@ import java.util.regex.Pattern; */ public class IncludeExclude { + // The includeValue and excludeValue ByteRefs which are the result of the parsing + // process are converted into a LongFilter when used on numeric fields + // in the index. + public static class LongFilter { + private LongSet valids; + private LongSet invalids; + + private LongFilter(int numValids, int numInvalids) { + if (numValids > 0) { + valids = new LongOpenHashSet(numValids); + } + if (numInvalids > 0) { + invalids = new LongOpenHashSet(numInvalids); + } + } + + public boolean accept(long value) { + return ((valids == null) || (valids.contains(value))) && ((invalids == null) || (!invalids.contains(value))); + } + + private void addAccept(long val) { + valids.add(val); + } + + private void addReject(long val) { + invalids.add(val); + } + } + private final Matcher include; private final Matcher exclude; private final CharsRefBuilder scratch = new CharsRefBuilder(); @@ -281,4 +309,43 @@ public class IncludeExclude { } } + public boolean isRegexBased() { + return hasRegexTest; + } + + public LongFilter convertToLongFilter() { + int numValids = includeValues == null ? 0 : includeValues.size(); + int numInvalids = excludeValues == null ? 0 : excludeValues.size(); + LongFilter result = new LongFilter(numValids, numInvalids); + if (includeValues != null) { + for (BytesRef val : includeValues) { + result.addAccept(Long.parseLong(val.utf8ToString())); + } + } + if (excludeValues != null) { + for (BytesRef val : excludeValues) { + result.addReject(Long.parseLong(val.utf8ToString())); + } + } + return result; + } + public LongFilter convertToDoubleFilter() { + int numValids = includeValues == null ? 0 : includeValues.size(); + int numInvalids = excludeValues == null ? 0 : excludeValues.size(); + LongFilter result = new LongFilter(numValids, numInvalids); + if (includeValues != null) { + for (BytesRef val : includeValues) { + double dval=Double.parseDouble(val.utf8ToString()); + result.addAccept( NumericUtils.doubleToSortableLong(dval)); + } + } + if (excludeValues != null) { + for (BytesRef val : excludeValues) { + double dval=Double.parseDouble(val.utf8ToString()); + result.addReject( NumericUtils.doubleToSortableLong(dval)); + } + } + return result; + } + } diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java index 423d741647d..ec239987335 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/DoubleTermsTests.java @@ -279,6 +279,38 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest { assertThat(bucket.getDocCount(), equalTo(1l)); } } + + @Test + public void singleValueFieldWithFiltering() throws Exception { + double includes[] = { 1, 2, 3, 98.2 }; + double excludes[] = { 2, 4, 99 }; + double empty[] = {}; + testIncludeExcludeResults(includes, empty, new double[] { 1, 2, 3 }); + testIncludeExcludeResults(includes, excludes, new double[] { 1, 3 }); + testIncludeExcludeResults(empty, excludes, new double[] { 0, 1, 3 }); + } + + private void testIncludeExcludeResults(double[] includes, double[] excludes, double[] expecteds) { + SearchResponse response = client().prepareSearch("idx").setTypes("type") + .addAggregation(terms("terms") + .field(SINGLE_VALUED_FIELD_NAME) + .include(includes) + .exclude(excludes) + .collectMode(randomFrom(SubAggCollectionMode.values()))) + .execute().actionGet(); + assertSearchResponse(response); + Terms terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(expecteds.length)); + + for (int i = 0; i < expecteds.length; i++) { + Terms.Bucket bucket = terms.getBucketByKey("" + expecteds[i]); + assertThat(bucket, notNullValue()); + assertThat(bucket.getDocCount(), equalTo(1l)); + } + } + @Test public void singleValueField_OrderedByTermAsc() throws Exception { diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java index bbb9ffb55dc..59a7319b995 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/LongTermsTests.java @@ -255,7 +255,38 @@ public class LongTermsTests extends ElasticsearchIntegrationTest { assertThat(bucket.getDocCount(), equalTo(1l)); } } + + @Test + public void singleValueFieldWithFiltering() throws Exception { + long includes[] = { 1, 2, 3, 98 }; + long excludes[] = { -1, 2, 4 }; + long empty[] = {}; + testIncludeExcludeResults(includes, empty, new long[] { 1, 2, 3 }); + testIncludeExcludeResults(includes, excludes, new long[] { 1, 3 }); + testIncludeExcludeResults(empty, excludes, new long[] { 0, 1, 3 }); + } + private void testIncludeExcludeResults(long[] includes, long[] excludes, long[] expecteds) { + SearchResponse response = client().prepareSearch("idx").setTypes("type") + .addAggregation(terms("terms") + .field(SINGLE_VALUED_FIELD_NAME) + .include(includes) + .exclude(excludes) + .collectMode(randomFrom(SubAggCollectionMode.values()))) + .execute().actionGet(); + assertSearchResponse(response); + Terms terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(expecteds.length)); + + for (int i = 0; i < expecteds.length; i++) { + Terms.Bucket bucket = terms.getBucketByKey("" + expecteds[i]); + assertThat(bucket, notNullValue()); + assertThat(bucket.getDocCount(), equalTo(1l)); + } + } + @Test public void singleValueField_WithMaxSize() throws Exception { SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type") diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java index d28ca22195f..f1718e09cac 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsTests.java @@ -123,6 +123,23 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest { Number topCategory = topTerms.getBuckets().iterator().next().getKeyAsNumber(); assertTrue(topCategory.equals(new Long(SNOWBOARDING_CATEGORY))); } + + @Test + public void structuredAnalysisWithIncludeExclude() throws Exception { + long[] excludeTerms = { MUSIC_CATEGORY }; + SearchResponse response = client().prepareSearch("test") + .setSearchType(SearchType.QUERY_AND_FETCH) + .setQuery(new TermQueryBuilder("_all", "paul")) + .setFrom(0).setSize(60).setExplain(true) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()) + .minDocCount(1).exclude(excludeTerms)) + .execute() + .actionGet(); + assertSearchResponse(response); + SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms"); + Number topCategory = topTerms.getBuckets().iterator().next().getKeyAsNumber(); + assertTrue(topCategory.equals(new Long(OTHER_CATEGORY))); + } @Test public void includeExclude() throws Exception { @@ -160,6 +177,43 @@ public class SignificantTermsTests extends ElasticsearchIntegrationTest { assertThat(terms.contains("weller"), is(true)); } + @Test + public void includeExcludeExactValues() throws Exception { + String []incExcTerms={"weller","nosuchterm"}; + SearchResponse response = client().prepareSearch("test") + .setQuery(new TermQueryBuilder("_all", "weller")) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHint()) + .exclude(incExcTerms)) + .get(); + assertSearchResponse(response); + SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms"); + Set terms = new HashSet<>(); + for (Bucket topTerm : topTerms) { + terms.add(topTerm.getKey()); + } + assertThat(terms, hasSize(6)); + assertThat(terms.contains("jam"), is(true)); + assertThat(terms.contains("council"), is(true)); + assertThat(terms.contains("style"), is(true)); + assertThat(terms.contains("paul"), is(true)); + assertThat(terms.contains("of"), is(true)); + assertThat(terms.contains("the"), is(true)); + + response = client().prepareSearch("test") + .setQuery(new TermQueryBuilder("_all", "weller")) + .addAggregation(new SignificantTermsBuilder("mySignificantTerms").field("description").executionHint(randomExecutionHint()) + .include(incExcTerms)) + .get(); + assertSearchResponse(response); + topTerms = response.getAggregations().get("mySignificantTerms"); + terms = new HashSet<>(); + for (Bucket topTerm : topTerms) { + terms.add(topTerm.getKey()); + } + assertThat(terms, hasSize(1)); + assertThat(terms.contains("weller"), is(true)); + } + @Test public void unmapped() throws Exception { SearchResponse response = client().prepareSearch("idx_unmapped")