- Added support for term filtering based on include/exclude regex on the terms agg

- Added javadoc to the TermsBuilder

Closes #4267
This commit is contained in:
uboness 2013-11-29 10:05:12 +01:00
parent afb0d119e4
commit 0d6a35b9a7
14 changed files with 450 additions and 17 deletions

View File

@ -175,3 +175,65 @@ Generating the terms using a script:
}
--------------------------------------------------
==== Filtering Values
It is possible to filter the values for which buckets will be created. This can be done using the `include` and
`exclude` parameters which are based on regular expressions.
[source,js]
--------------------------------------------------
{
"aggs" : {
"tags" : {
"terms" : {
"field" : "tags",
"include" : ".*sport.*",
"exclude" : "water_.*"
}
}
}
}
--------------------------------------------------
In the above example, buckets will be created for all the tags that has the word `sport` in them, except those starting
with `water_` (so the tag `water_sports` will no be aggregated). The `include` regular expression will determine what
values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
The regular expression are based on the Java(TM) http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html[Pattern],
and as such, they it is also possible to pass in flags that will determine how the compiled regular expression will work:
[source,js]
--------------------------------------------------
{
"aggs" : {
"tags" : {
"terms" : {
"field" : "tags",
"include" : {
"pattern" : ".*sport.*",
"flags" : "CANON_EQ|CASE_INSENSITIVE" <1>
},
"exclude" : {
"pattern" : "water_.*",
"flags" : "CANON_EQ|CASE_INSENSITIVE"
}
}
}
}
}
--------------------------------------------------
<1> the flags are concatenated using the `|` character as a separator
The possible flags that can be used are:
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CANON_EQ[`CANON_EQ`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CASE_INSENSITIVE[`CASE_INSENSITIVE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#COMMENTS[`COMMENTS`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#DOTALL[`DOTALL`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`LITERAL`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]

View File

@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatter;
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatterStreams;

View File

@ -21,12 +21,13 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.index.fielddata.DoubleValues;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.LongHash;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import java.io.IOException;
import java.util.Arrays;

View File

@ -27,6 +27,7 @@ import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import java.util.*;

View File

@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatter;
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatterStreams;

View File

@ -21,12 +21,13 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.index.fielddata.LongValues;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.LongHash;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import java.io.IOException;
import java.util.Arrays;

View File

@ -23,10 +23,12 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import java.io.IOException;
import java.util.Arrays;
@ -45,15 +47,18 @@ public class StringTermsAggregator extends BucketsAggregator {
private final int requiredSize;
private final int shardSize;
private final BytesRefHash bucketOrds;
private final IncludeExclude includeExclude;
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
InternalOrder order, int requiredSize, int shardSize,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
super(name, BucketAggregationMode.PER_BUCKET, factories, INITIAL_CAPACITY, aggregationContext, parent);
this.valuesSource = valuesSource;
this.order = order;
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.includeExclude = includeExclude;
bucketOrds = new BytesRefHash();
}
@ -70,6 +75,9 @@ public class StringTermsAggregator extends BucketsAggregator {
for (int i = 0; i < valuesCount; ++i) {
final BytesRef bytes = values.nextValue();
if (includeExclude != null && !includeExclude.accept(bytes)) {
continue;
}
final int hash = values.currentValueHash();
int bucketOrdinal = bucketOrds.add(bytes, hash);
if (bucketOrdinal < 0) { // already seen
@ -122,3 +130,4 @@ public class StringTermsAggregator extends BucketsAggregator {
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValueSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSource;
@ -36,12 +37,14 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
private final InternalOrder order;
private final int requiredSize;
private final int shardSize;
private final IncludeExclude includeExclude;
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize) {
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude) {
super(name, StringTerms.TYPE.name(), valueSourceConfig);
this.order = order;
this.requiredSize = requiredSize;
this.shardSize = shardSize;
this.includeExclude = includeExclude;
}
@Override
@ -52,7 +55,12 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
@Override
protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
if (valuesSource instanceof BytesValuesSource) {
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, aggregationContext, parent);
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
}
if (includeExclude != null) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " +
"settings as it can only be applied to string values");
}
if (valuesSource instanceof NumericValuesSource) {

View File

@ -7,34 +7,96 @@ import java.io.IOException;
import java.util.Locale;
/**
*
* Builds a {@code terms} aggregation
*/
public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
private int size = -1;
private int shardSize = -1;
private Terms.ValueType valueType;
private Terms.Order order;
private String includePattern;
private int includeFlags;
private String excludePattern;
private int excludeFlags;
public TermsBuilder(String name) {
super(name, "terms");
}
/**
* Sets the size - indicating how many term buckets should be returned (defaults to 10)
*/
public TermsBuilder size(int size) {
this.size = size;
return this;
}
/**
* Sets the shard_size - indicating the number of term buckets each shard will return to the coordinating node (the
* node that coordinates the search execution). The higher the shard size is, the more accurate the results are.
*/
public TermsBuilder shardSize(int shardSize) {
this.shardSize = shardSize;
return this;
}
/**
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
* on the {@link java.util.regex.Pattern} class.
*
* @see #include(String, int)
*/
public TermsBuilder include(String regex) {
return include(regex, 0);
}
/**
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
* on the {@link java.util.regex.Pattern} class.
*
* @see java.util.regex.Pattern#compile(String, int)
*/
public TermsBuilder include(String regex, int flags) {
this.includePattern = regex;
this.includeFlags = flags;
return this;
}
/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class.
*
* @see #exclude(String, int)
*/
public TermsBuilder exclude(String regex) {
return exclude(regex, 0);
}
/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class.
*
* @see java.util.regex.Pattern#compile(String, int)
*/
public TermsBuilder exclude(String regex, int flags) {
this.excludePattern = regex;
this.excludeFlags = flags;
return this;
}
/**
* When using scripts, the value type indicates the types of the values the script is generating.
*/
public TermsBuilder valueType(Terms.ValueType valueType) {
this.valueType = valueType;
return this;
}
/**
* Defines the order in which the buckets will be returned.
*/
public TermsBuilder order(Terms.Order order) {
this.order = order;
return this;
@ -55,6 +117,26 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
builder.field("order");
order.toXContent(builder, params);
}
if (includePattern != null) {
if (includeFlags == 0) {
builder.field("include", includePattern);
} else {
builder.startObject("include")
.field("pattern", includePattern)
.field("flags", includeFlags)
.endObject();
}
}
if (excludePattern != null) {
if (excludeFlags == 0) {
builder.field("exclude", excludePattern);
} else {
builder.startObject("exclude")
.field("pattern", excludePattern)
.field("flags", excludeFlags)
.endObject();
}
}
return builder;
}
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
@ -26,8 +27,10 @@ import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.core.DateFieldMapper;
import org.elasticsearch.index.mapper.ip.IpFieldMapper;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.support.FieldContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
@ -39,6 +42,7 @@ import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Map;
import java.util.regex.Pattern;
/**
*
@ -50,9 +54,6 @@ public class TermsParser implements Aggregator.Parser {
return StringTerms.TYPE.name();
}
// TODO add support for shard_size (vs. size) a la terms facets
// TODO add support for term filtering (regexp/include/exclude) a la terms facets
@Override
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
@ -67,6 +68,10 @@ public class TermsParser implements Aggregator.Parser {
boolean orderAsc = false;
String format = null;
boolean assumeUnique = false;
String include = null;
int includeFlags = 0; // 0 means no flags
String exclude = null;
int excludeFlags = 0; // 0 means no flags
XContentParser.Token token;
@ -85,6 +90,10 @@ public class TermsParser implements Aggregator.Parser {
valueType = Terms.ValueType.resolveType(parser.text());
} else if ("format".equals(currentFieldName)) {
format = parser.text();
} else if ("include".equals(currentFieldName)) {
include = parser.text();
} else if ("exclude".equals(currentFieldName)) {
exclude = parser.text();
}
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
if ("script_values_unique".equals(currentFieldName)) {
@ -105,8 +114,45 @@ public class TermsParser implements Aggregator.Parser {
orderKey = parser.currentName();
} else if (token == XContentParser.Token.VALUE_STRING) {
String dir = parser.text();
orderAsc = "asc".equalsIgnoreCase(dir);
//TODO: do we want to throw a parse error if the alternative is not "desc"???
if ("asc".equalsIgnoreCase(dir)) {
orderAsc = true;
} else if ("desc".equalsIgnoreCase(dir)) {
orderAsc = false;
} else {
throw new SearchParseException(context, "Unknown terms order direction [" + dir + "] in terms aggregation [" + aggregationName + "]");
}
}
}
} else if ("include".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.VALUE_STRING) {
if ("pattern".equals(currentFieldName)) {
include = parser.text();
} else if ("flags".equals(currentFieldName)) {
includeFlags = Regex.flagsFromString(parser.text());
}
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("flags".equals(currentFieldName)) {
includeFlags = parser.intValue();
}
}
}
} else if ("exclude".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.VALUE_STRING) {
if ("pattern".equals(currentFieldName)) {
exclude = parser.text();
} else if ("flags".equals(currentFieldName)) {
excludeFlags = Regex.flagsFromString(parser.text());
}
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("flags".equals(currentFieldName)) {
excludeFlags = parser.intValue();
}
}
}
}
@ -118,6 +164,13 @@ public class TermsParser implements Aggregator.Parser {
shardSize = requiredSize;
}
IncludeExclude includeExclude = null;
if (include != null || exclude != null) {
Pattern includePattern = include != null ? Pattern.compile(include, includeFlags) : null;
Pattern excludePattern = exclude != null ? Pattern.compile(exclude, excludeFlags) : null;
includeExclude = new IncludeExclude(includePattern, excludePattern);
}
InternalOrder order = resolveOrder(orderKey, orderAsc);
SearchScript searchScript = null;
if (script != null) {
@ -139,14 +192,14 @@ public class TermsParser implements Aggregator.Parser {
if (!assumeUnique) {
config.ensureUnique(true);
}
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
}
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
if (mapper == null) {
ValuesSourceConfig<?> config = new ValuesSourceConfig<BytesValuesSource>(BytesValuesSource.class);
config.unmapped(true);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
}
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
@ -188,7 +241,7 @@ public class TermsParser implements Aggregator.Parser {
config.ensureUnique(true);
}
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize);
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
}
static InternalOrder resolveOrder(String key, boolean asc) {

View File

@ -20,9 +20,9 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import java.io.IOException;

View File

@ -17,9 +17,10 @@
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.terms;
package org.elasticsearch.search.aggregations.bucket.terms.support;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import java.util.Comparator;

View File

@ -0,0 +1,68 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.terms.support;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic,
* exclusion has precedence, where the {@code include} is evaluated first and then the {@code exclude}.
*/
public class IncludeExclude {
private final Matcher include;
private final Matcher exclude;
private final CharsRef scratch = new CharsRef();
/**
* @param include The regular expression pattern for the terms to be included
* (may only be {@code null} if {@code exclude} is not {@code null}
* @param exclude The regular expression pattern for the terms to be excluded
* (may only be {@code null} if {@code include} is not {@code null}
*/
public IncludeExclude(Pattern include, Pattern exclude) {
assert include != null || exclude != null : "include & exclude cannot both be null"; // otherwise IncludeExclude object should be null
this.include = include != null ? include.matcher("") : null;
this.exclude = exclude != null ? exclude.matcher("") : null;
}
/**
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
*/
public boolean accept(BytesRef value) {
UnicodeUtil.UTF8toUTF16(value, scratch);
if (include == null) {
// exclude must not be null
return !exclude.reset(scratch).matches();
}
if (!include.reset(scratch).matches()) {
return false;
}
if (exclude == null) {
return true;
}
return !exclude.reset(scratch).matches();
}
}

View File

@ -34,6 +34,7 @@ import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
@ -104,6 +105,150 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
}
}
@Test
public void singleValueField_WithRegexFiltering() throws Exception {
// include without exclude
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.field("value").include("val00.+"))
.execute().actionGet();
assertThat(response.getFailedShards(), equalTo(0));
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.buckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getByTerm("val00" + i);
assertThat(bucket, notNullValue());
assertThat(bucket.getKey().string(), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
// include and exclude
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.field("value").include("val00.+").exclude("(val000|val001)"))
.execute().actionGet();
assertThat(response.getFailedShards(), equalTo(0));
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.buckets().size(), equalTo(8));
for (int i = 2; i < 10; i++) {
Terms.Bucket bucket = terms.getByTerm("val00" + i);
assertThat(bucket, notNullValue());
assertThat(bucket.getKey().string(), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
// exclude without include
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.field("value").exclude("val0[1-9]+.+"))
.execute().actionGet();
assertThat(response.getFailedShards(), equalTo(0));
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.buckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getByTerm("val00" + i);
assertThat(bucket, notNullValue());
assertThat(bucket.getKey().string(), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
}
@Test
public void singleValueField_WithRegexFiltering_WithFlags() throws Exception {
// include without exclude
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
// with case insensitive flag on the include regex
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.field("value").include("VAL00.+", Pattern.CASE_INSENSITIVE))
.execute().actionGet();
assertThat(response.getFailedShards(), equalTo(0));
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.buckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getByTerm("val00" + i);
assertThat(bucket, notNullValue());
assertThat(bucket.getKey().string(), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
// include and exclude
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
// with multi-flag masking on the exclude regex
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.field("value").include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
.execute().actionGet();
assertThat(response.getFailedShards(), equalTo(0));
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.buckets().size(), equalTo(8));
for (int i = 2; i < 10; i++) {
Terms.Bucket bucket = terms.getByTerm("val00" + i);
assertThat(bucket, notNullValue());
assertThat(bucket.getKey().string(), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
// exclude without include
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
// with a "no flag" flag
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.field("value").exclude("val0[1-9]+.+", 0))
.execute().actionGet();
assertThat(response.getFailedShards(), equalTo(0));
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.buckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getByTerm("val00" + i);
assertThat(bucket, notNullValue());
assertThat(bucket.getKey().string(), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
}
@Test
public void singleValueField_WithMaxSize() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")