Merge pull request #10418 from jpountz/enhancement/speed_up_aggs_include_exclude

Aggregations: Speed up include/exclude in terms aggregations with regexps.

Close #10418
This commit is contained in:
Adrien Grand 2015-04-09 12:16:37 +02:00
commit e25db222ee
14 changed files with 337 additions and 329 deletions

View File

@ -139,6 +139,9 @@ equivalent to the former `pre_zone` option. Setting `time_zone` to a value like
being applied in the specified time zone but In addition to this, also the `pre_zone_adjust_large_interval` is removed because we being applied in the specified time zone but In addition to this, also the `pre_zone_adjust_large_interval` is removed because we
now always return dates and bucket keys in UTC. now always return dates and bucket keys in UTC.
`include`/`exclude` filtering on the `terms` aggregation now uses the same syntax as regexp queries instead of the Java syntax. While simple
regexps should still work, more complex ones might need some rewriting. Also, the `flags` parameter is not supported anymore.
=== Terms filter lookup caching === Terms filter lookup caching
The terms filter lookup mechanism does not support the `cache` option anymore The terms filter lookup mechanism does not support the `cache` option anymore

View File

@ -482,42 +482,7 @@ with `water_` (so the tag `water_sports` will no be aggregated). The `include` r
values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`. both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
The regular expression are based on the Java(TM) http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html[Pattern], The syntax is the same as <<regexp-syntax,regexp queries>>.
and as such, they it is also possible to pass in flags that will determine how the compiled regular expression will work:
[source,js]
--------------------------------------------------
{
"aggs" : {
"tags" : {
"terms" : {
"field" : "tags",
"include" : {
"pattern" : ".*sport.*",
"flags" : "CANON_EQ|CASE_INSENSITIVE" <1>
},
"exclude" : {
"pattern" : "water_.*",
"flags" : "CANON_EQ|CASE_INSENSITIVE"
}
}
}
}
}
--------------------------------------------------
<1> the flags are concatenated using the `|` character as a separator
The possible flags that can be used are:
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CANON_EQ[`CANON_EQ`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CASE_INSENSITIVE[`CASE_INSENSITIVE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#COMMENTS[`COMMENTS`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#DOTALL[`DOTALL`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`LITERAL`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
For matching based on exact values the `include` and `exclude` parameters can simply take an array of For matching based on exact values the `include` and `exclude` parameters can simply take an array of
strings that represent the terms as they are found in the index: strings that represent the terms as they are found in the index:

View File

@ -48,7 +48,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
BucketCountThresholds bucketCountThresholds, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException { SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData); super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);
@ -145,7 +145,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
private final LongHash bucketOrds; private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException { public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggFactory, metaData); super(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggFactory, metaData);
bucketOrds = new LongHash(1, aggregationContext.bigArrays()); bucketOrds = new LongHash(1, aggregationContext.bigArrays());
} }

View File

@ -47,7 +47,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
BucketCountThresholds bucketCountThresholds, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, IncludeExclude.StringFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException { SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData); super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);

View File

@ -65,7 +65,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
return new SignificantStringTermsAggregator(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData); final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
return new SignificantStringTermsAggregator(name, factories, valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
} }
}, },
@ -77,7 +78,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource; ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher(); IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
} }
}, },
@ -87,7 +89,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException { AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
} }
}; };

View File

@ -57,7 +57,7 @@ public class SignificantTermsParser implements Aggregator.Parser {
.scriptable(false) .scriptable(false)
.formattable(true) .formattable(true)
.build(); .build();
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, SignificantStringTerms.TYPE, context); IncludeExclude.Parser incExcParser = new IncludeExclude.Parser();
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds(); TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();

View File

@ -57,7 +57,7 @@ import java.util.Map;
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator { public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource; protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource;
protected final IncludeExclude includeExclude; protected final IncludeExclude.OrdinalsFilter includeExclude;
// TODO: cache the acceptedglobalValues per aggregation definition. // TODO: cache the acceptedglobalValues per aggregation definition.
// We can't cache this yet in ValuesSource, since ValuesSource is reused per field for aggs during the execution. // We can't cache this yet in ValuesSource, since ValuesSource is reused per field for aggs during the execution.
@ -71,7 +71,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
Terms.Order order, BucketCountThresholds bucketCountThresholds, Terms.Order order, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException { IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData); super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;
this.includeExclude = includeExclude; this.includeExclude = includeExclude;
@ -260,7 +260,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
private final LongHash bucketOrds; private final LongHash bucketOrds;
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext,
Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException { Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode, showTermDocCountError, metaData); super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode, showTermDocCountError, metaData);
bucketOrds = new LongHash(1, aggregationContext.bigArrays()); bucketOrds = new LongHash(1, aggregationContext.bigArrays());

View File

@ -45,11 +45,11 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
private final ValuesSource valuesSource; private final ValuesSource valuesSource;
protected final BytesRefHash bucketOrds; protected final BytesRefHash bucketOrds;
private final IncludeExclude includeExclude; private final IncludeExclude.StringFilter includeExclude;
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
Terms.Order order, BucketCountThresholds bucketCountThresholds, Terms.Order order, BucketCountThresholds bucketCountThresholds,
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException { IncludeExclude.StringFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData); super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
this.valuesSource = valuesSource; this.valuesSource = valuesSource;

View File

@ -50,7 +50,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException { AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
return new StringTermsAggregator(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData); final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
return new StringTermsAggregator(name, factories, valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
} }
@Override @Override
@ -65,7 +66,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException { AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
} }
@Override @Override
@ -80,7 +82,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException { AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
} }
@Override @Override

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.aggregations.bucket.terms; package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
@ -37,9 +38,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
private Terms.ValueType valueType; private Terms.ValueType valueType;
private Terms.Order order; private Terms.Order order;
private String includePattern; private String includePattern;
private int includeFlags;
private String excludePattern; private String excludePattern;
private int excludeFlags;
private String executionHint; private String executionHint;
private SubAggCollectionMode collectionMode; private SubAggCollectionMode collectionMode;
private Boolean showTermDocCountError; private Boolean showTermDocCountError;
@ -88,26 +87,15 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
/** /**
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based * Define a regular expression that will determine what terms should be aggregated. The regular expression is based
* on the {@link java.util.regex.Pattern} class. * on the {@link RegExp} class.
* *
* @see #include(String, int) * @see {@link RegExp#RegExp(String)}
*/ */
public TermsBuilder include(String regex) { public TermsBuilder include(String regex) {
return include(regex, 0);
}
/**
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
* on the {@link java.util.regex.Pattern} class.
*
* @see java.util.regex.Pattern#compile(String, int)
*/
public TermsBuilder include(String regex, int flags) {
if (includeTerms != null) { if (includeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both"); throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
} }
this.includePattern = regex; this.includePattern = regex;
this.includeFlags = flags;
return this; return this;
} }
@ -160,29 +148,18 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
} }
return termsAsString; return termsAsString;
} }
/**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class.
*
* @see #exclude(String, int)
*/
public TermsBuilder exclude(String regex) {
return exclude(regex, 0);
}
/** /**
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular * Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
* expression is based on the {@link java.util.regex.Pattern} class. * expression is based on the {@link RegExp} class.
* *
* @see java.util.regex.Pattern#compile(String, int) * @see {@link RegExp#RegExp(String)}
*/ */
public TermsBuilder exclude(String regex, int flags) { public TermsBuilder exclude(String regex) {
if (excludeTerms != null) { if (excludeTerms != null) {
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both"); throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
} }
this.excludePattern = regex; this.excludePattern = regex;
this.excludeFlags = flags;
return this; return this;
} }
@ -287,27 +264,13 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
builder.array("include", includeTerms); builder.array("include", includeTerms);
} }
if (includePattern != null) { if (includePattern != null) {
if (includeFlags == 0) { builder.field("include", includePattern);
builder.field("include", includePattern);
} else {
builder.startObject("include")
.field("pattern", includePattern)
.field("flags", includeFlags)
.endObject();
}
} }
if (excludeTerms != null) { if (excludeTerms != null) {
builder.array("exclude", excludeTerms); builder.array("exclude", excludeTerms);
} }
if (excludePattern != null) { if (excludePattern != null) {
if (excludeFlags == 0) { builder.field("exclude", excludePattern);
builder.field("exclude", excludePattern);
} else {
builder.startObject("exclude")
.field("pattern", excludePattern)
.field("flags", excludeFlags)
.endObject();
}
} }
return builder; return builder;
} }

View File

@ -46,7 +46,7 @@ public class TermsParser implements Aggregator.Parser {
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
TermsParametersParser aggParser = new TermsParametersParser(); TermsParametersParser aggParser = new TermsParametersParser();
ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true).build(); ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true).build();
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context); IncludeExclude.Parser incExcParser = new IncludeExclude.Parser();
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
List<OrderElement> orderElements = aggParser.getOrderElements(); List<OrderElement> orderElements = aggParser.getOrderElements();

View File

@ -20,22 +20,30 @@ package org.elasticsearch.search.aggregations.bucket.terms.support;
import com.carrotsearch.hppc.LongOpenHashSet; import com.carrotsearch.hppc.LongOpenHashSet;
import com.carrotsearch.hppc.LongSet; import com.carrotsearch.hppc.LongSet;
import org.apache.lucene.index.RandomAccessOrds; import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.*; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException; import java.io.IOException;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import java.util.regex.Matcher; import java.util.SortedSet;
import java.util.regex.Pattern; import java.util.TreeSet;
/** /**
* Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic, * Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic,
@ -43,8 +51,8 @@ import java.util.regex.Pattern;
*/ */
public class IncludeExclude { public class IncludeExclude {
// The includeValue and excludeValue ByteRefs which are the result of the parsing // The includeValue and excludeValue ByteRefs which are the result of the parsing
// process are converted into a LongFilter when used on numeric fields // process are converted into a LongFilter when used on numeric fields
// in the index. // in the index.
public static class LongFilter { public static class LongFilter {
private LongSet valids; private LongSet valids;
@ -72,152 +80,145 @@ public class IncludeExclude {
} }
} }
private final Matcher include; // Only used for the 'map' execution mode (ie. scripts)
private final Matcher exclude; public static class StringFilter {
private final CharsRefBuilder scratch = new CharsRefBuilder();
private Set<BytesRef> includeValues; private final ByteRunAutomaton runAutomaton;
private Set<BytesRef> excludeValues;
private final boolean hasRegexTest; private StringFilter(Automaton automaton) {
this.runAutomaton = new ByteRunAutomaton(automaton);
}
/**
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
*/
public boolean accept(BytesRef value) {
return runAutomaton.run(value.bytes, value.offset, value.length);
}
}
public static class OrdinalsFilter {
private final CompiledAutomaton compiled;
private OrdinalsFilter(Automaton automaton) {
this.compiled = new CompiledAutomaton(automaton);
}
/**
* Computes which global ordinals are accepted by this IncludeExclude instance.
*/
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
TermsEnum globalTermsEnum;
Terms globalTerms = new DocValuesTerms(globalOrdinals);
// TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits.
globalTermsEnum = compiled.getTermsEnum(globalTerms);
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
}
return acceptedGlobalOrdinals;
}
}
private final RegExp include, exclude;
private final SortedSet<BytesRef> includeValues, excludeValues;
/** /**
* @param include The regular expression pattern for the terms to be included * @param include The regular expression pattern for the terms to be included
* (may only be {@code null} if one of the other arguments is none-null.
* @param includeValues The terms to be included
* (may only be {@code null} if one of the other arguments is none-null.
* @param exclude The regular expression pattern for the terms to be excluded * @param exclude The regular expression pattern for the terms to be excluded
* (may only be {@code null} if one of the other arguments is none-null.
* @param excludeValues The terms to be excluded
* (may only be {@code null} if one of the other arguments is none-null.
*/ */
public IncludeExclude(Pattern include, Pattern exclude, Set<BytesRef> includeValues, Set<BytesRef> excludeValues) { public IncludeExclude(RegExp include, RegExp exclude) {
assert includeValues != null || include != null || if (include == null && exclude == null) {
exclude != null || excludeValues != null : "includes & excludes cannot both be null"; // otherwise IncludeExclude object should be null throw new IllegalArgumentException();
this.include = include != null ? include.matcher("") : null; }
this.exclude = exclude != null ? exclude.matcher("") : null; this.include = include;
hasRegexTest = include != null || exclude != null; this.exclude = exclude;
this.includeValues = null;
this.excludeValues = null;
}
/**
* @param includeValues The terms to be included
* @param excludeValues The terms to be excluded
*/
public IncludeExclude(SortedSet<BytesRef> includeValues, SortedSet<BytesRef> excludeValues) {
if (includeValues == null && excludeValues == null) {
throw new IllegalArgumentException();
}
this.include = null;
this.exclude = null;
this.includeValues = includeValues; this.includeValues = includeValues;
this.excludeValues = excludeValues; this.excludeValues = excludeValues;
} }
/** /**
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns. * Terms adapter around doc values.
*/ */
public boolean accept(BytesRef value) { private static class DocValuesTerms extends Terms {
if (hasRegexTest) { private final SortedSetDocValues values;
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
scratch.copyUTF8Bytes(value);
}
return isIncluded(value, scratch.get()) && !isExcluded(value, scratch.get());
}
private boolean isIncluded(BytesRef value, CharsRef utf16Chars) {
if ((includeValues == null) && (include == null)) { DocValuesTerms(SortedSetDocValues values) {
// No include criteria to be tested. this.values = values;
return true;
} }
if (include != null) { @Override
if (include.reset(scratch.get()).matches()) { public TermsEnum iterator(TermsEnum reuse) throws IOException {
return true; return values.termsEnum();
}
} }
if (includeValues != null) {
if (includeValues.contains(value)) { @Override
return true; public long size() throws IOException {
} return -1;
} }
// Some include criteria was tested but no match found
return false; @Override
} public long getSumTotalTermFreq() throws IOException {
return -1;
private boolean isExcluded(BytesRef value, CharsRef utf16Chars) {
if (exclude != null) {
if (exclude.reset(scratch.get()).matches()) {
return true;
}
} }
if (excludeValues != null) {
if (excludeValues.contains(value)) { @Override
return true; public long getSumDocFreq() throws IOException {
} return -1;
} }
// No exclude criteria was tested or no match found
return false; @Override
public int getDocCount() throws IOException {
return -1;
}
@Override
public boolean hasFreqs() {
return false;
}
@Override
public boolean hasOffsets() {
return false;
}
@Override
public boolean hasPositions() {
return false;
}
@Override
public boolean hasPayloads() {
return false;
}
} }
/**
* Computes which global ordinals are accepted by this IncludeExclude instance.
*/
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
// There are 3 ways of populating this bitset:
// 1) Looking up the global ordinals for known "include" terms
// 2) Looking up the global ordinals for known "exclude" terms
// 3) Traversing the term enum for all terms and running past regexes
// Option 3 is known to be very slow in the case of high-cardinality fields and
// should be avoided if possible.
if (includeValues != null) {
// optimize for the case where the set of accepted values is a set
// of known terms, not a regex that would have to be tested against all terms in the index
for (BytesRef includeValue : includeValues) {
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
scratch.copyUTF8Bytes(includeValue);
if (!isExcluded(includeValue, scratch.get())) {
long ord = globalOrdinals.lookupTerm(includeValue);
if (ord >= 0) {
acceptedGlobalOrdinals.set(ord);
}
}
}
} else {
if(hasRegexTest) {
// We have includeVals that are a regex or only regex excludes - we need to do the potentially
// slow option of hitting termsEnum for every term in the index.
TermsEnum globalTermsEnum = globalOrdinals.termsEnum();
try {
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
if (accept(term)) {
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
}
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
} else {
// we only have a set of known values to exclude - create a bitset with all good values and negate the known bads
acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length());
for (BytesRef excludeValue : excludeValues) {
long ord = globalOrdinals.lookupTerm(excludeValue);
if (ord >= 0) {
acceptedGlobalOrdinals.clear(ord);
}
}
}
}
return acceptedGlobalOrdinals;
}
public static class Parser { public static class Parser {
private final String aggName;
private final InternalAggregation.Type aggType;
private final SearchContext context;
String include = null; String include = null;
int includeFlags = 0; // 0 means no flags
String exclude = null; String exclude = null;
int excludeFlags = 0; // 0 means no flags SortedSet<BytesRef> includeValues;
Set<BytesRef> includeValues; SortedSet<BytesRef> excludeValues;
Set<BytesRef> excludeValues;
public Parser(String aggName, InternalAggregation.Type aggType, SearchContext context) {
this.aggName = aggName;
this.aggType = aggType;
this.context = context;
}
public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser) throws IOException { public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser) throws IOException {
@ -231,14 +232,14 @@ public class IncludeExclude {
} }
return true; return true;
} }
if (token == XContentParser.Token.START_ARRAY) { if (token == XContentParser.Token.START_ARRAY) {
if ("include".equals(currentFieldName)) { if ("include".equals(currentFieldName)) {
includeValues = parseArrayToSet(parser); includeValues = new TreeSet<>(parseArrayToSet(parser));
return true; return true;
} }
if ("exclude".equals(currentFieldName)) { if ("exclude".equals(currentFieldName)) {
excludeValues = parseArrayToSet(parser); excludeValues = new TreeSet<>(parseArrayToSet(parser));
return true; return true;
} }
return false; return false;
@ -252,12 +253,6 @@ public class IncludeExclude {
} else if (token == XContentParser.Token.VALUE_STRING) { } else if (token == XContentParser.Token.VALUE_STRING) {
if ("pattern".equals(currentFieldName)) { if ("pattern".equals(currentFieldName)) {
include = parser.text(); include = parser.text();
} else if ("flags".equals(currentFieldName)) {
includeFlags = Regex.flagsFromString(parser.text());
}
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("flags".equals(currentFieldName)) {
includeFlags = parser.intValue();
} }
} }
} }
@ -268,12 +263,6 @@ public class IncludeExclude {
} else if (token == XContentParser.Token.VALUE_STRING) { } else if (token == XContentParser.Token.VALUE_STRING) {
if ("pattern".equals(currentFieldName)) { if ("pattern".equals(currentFieldName)) {
exclude = parser.text(); exclude = parser.text();
} else if ("flags".equals(currentFieldName)) {
excludeFlags = Regex.flagsFromString(parser.text());
}
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("flags".equals(currentFieldName)) {
excludeFlags = parser.intValue();
} }
} }
} }
@ -298,19 +287,50 @@ public class IncludeExclude {
} }
return set; return set;
} }
public IncludeExclude includeExclude() { public IncludeExclude includeExclude() {
if (include == null && exclude == null && includeValues == null && excludeValues == null) { RegExp includePattern = include != null ? new RegExp(include) : null;
RegExp excludePattern = exclude != null ? new RegExp(exclude) : null;
if (includePattern != null || excludePattern != null) {
if (includeValues != null || excludeValues != null) {
throw new ElasticsearchIllegalArgumentException("Can only use regular expression include/exclude or a set of values, not both");
}
return new IncludeExclude(includePattern, excludePattern);
} else if (includeValues != null || excludeValues != null) {
return new IncludeExclude(includeValues, excludeValues);
} else {
return null; return null;
} }
Pattern includePattern = include != null ? Pattern.compile(include, includeFlags) : null;
Pattern excludePattern = exclude != null ? Pattern.compile(exclude, excludeFlags) : null;
return new IncludeExclude(includePattern, excludePattern, includeValues, excludeValues);
} }
} }
public boolean isRegexBased() { public boolean isRegexBased() {
return hasRegexTest; return include != null || exclude != null;
}
private Automaton toAutomaton() {
Automaton a = null;
if (include != null) {
a = include.toAutomaton();
} else if (includeValues != null) {
a = Automata.makeStringUnion(includeValues);
} else {
a = Automata.makeAnyString();
}
if (exclude != null) {
a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
} else if (excludeValues != null) {
a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}
return a;
}
public StringFilter convertToStringFilter() {
return new StringFilter(toAutomaton());
}
public OrdinalsFilter convertToOrdinalsFilter() {
return new OrdinalsFilter(toAutomaton());
} }
public LongFilter convertToLongFilter() { public LongFilter convertToLongFilter() {
@ -329,6 +349,7 @@ public class IncludeExclude {
} }
return result; return result;
} }
public LongFilter convertToDoubleFilter() { public LongFilter convertToDoubleFilter() {
int numValids = includeValues == null ? 0 : includeValues.size(); int numValids = includeValues == null ? 0 : includeValues.size();
int numInvalids = excludeValues == null ? 0 : excludeValues.size(); int numInvalids = excludeValues == null ? 0 : excludeValues.size();

View File

@ -0,0 +1,130 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.benchmark.search.aggregations;
import org.apache.lucene.util.TestUtil;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.node.Node;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import static org.elasticsearch.client.Requests.createIndexRequest;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
public class IncludeExcludeAggregationSearchBenchmark {
private static final Random R = new Random();
private static final String CLUSTER_NAME = IncludeExcludeAggregationSearchBenchmark.class.getSimpleName();
private static final int NUM_DOCS = 10000000;
private static final int BATCH = 100;
private static final int WARM = 3;
private static final int RUNS = 10;
private static final int ITERS = 3;
public static void main(String[] args) {
Settings settings = settingsBuilder()
.put("index.refresh_interval", "-1")
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.build();
Node[] nodes = new Node[1];
for (int i = 0; i < nodes.length; i++) {
nodes[i] = nodeBuilder().clusterName(CLUSTER_NAME)
.settings(settingsBuilder().put(settings).put("name", "node" + i))
.node();
}
Node clientNode = nodeBuilder()
.clusterName(CLUSTER_NAME)
.settings(settingsBuilder().put(settings).put("name", "client")).client(true).node();
Client client = clientNode.client();
try {
client.admin().indices().create(createIndexRequest("index").settings(settings).mapping("type",
jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("str")
.field("type", "string")
.field("index", "not_analyzed")
.endObject()
.endObject().endObject().endObject())).actionGet();
System.out.println("Indexing " + NUM_DOCS + " documents");
StopWatch stopWatch = new StopWatch().start();
for (int i = 0; i < NUM_DOCS; ) {
BulkRequestBuilder request = client.prepareBulk();
for (int j = 0; j < BATCH && i < NUM_DOCS; ++j) {
request.add(client.prepareIndex("index", "type", Integer.toString(i)).setSource("str", TestUtil.randomSimpleString(R)));
++i;
}
BulkResponse response = request.execute().actionGet();
if (response.hasFailures()) {
System.err.println("--> failures...");
System.err.println(response.buildFailureMessage());
}
if ((i % 100000) == 0) {
System.out.println("--> Indexed " + i + " took " + stopWatch.stop().lastTaskTime());
stopWatch.start();
}
}
client.admin().indices().prepareRefresh("index").execute().actionGet();
} catch (Exception e) {
System.out.println("Index already exists, skipping index creation");
}
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
if (clusterHealthResponse.isTimedOut()) {
System.err.println("--> Timed out waiting for cluster health");
}
for (int i = 0; i < WARM + RUNS; ++i) {
if (i >= WARM) {
System.out.println("RUN " + (i - WARM));
}
long start = System.nanoTime();
SearchResponse resp = null;
for (int j = 0; j < ITERS; ++j) {
resp = client.prepareSearch("index").setQuery(QueryBuilders.prefixQuery("str", "sf")).setSize(0).addAggregation(terms("t").field("str").include("s.*")).execute().actionGet();
}
long end = System.nanoTime();
if (i >= WARM) {
System.out.println(new TimeValue((end - start) / ITERS, TimeUnit.NANOSECONDS));
}
}
}
}

View File

@ -387,86 +387,6 @@ public class StringTermsTests extends AbstractTermsTests {
} }
} }
@Test
public void singleValueField_WithRegexFiltering_WithFlags() throws Exception {
// include without exclude
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
// with case insensitive flag on the include regex
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.collectMode(randomFrom(SubAggCollectionMode.values())).include("VAL00.+", Pattern.CASE_INSENSITIVE))
.execute().actionGet();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
assertThat(bucket, notNullValue());
assertThat(key(bucket), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
// include and exclude
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
// with multi-flag masking on the exclude regex
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.collectMode(randomFrom(SubAggCollectionMode.values())).include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
.execute().actionGet();
assertSearchResponse(response);
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(8));
for (int i = 2; i < 10; i++) {
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
assertThat(bucket, notNullValue());
assertThat(key(bucket), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
// exclude without include
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
// with a "no flag" flag
response = client().prepareSearch("idx").setTypes("high_card_type")
.addAggregation(terms("terms")
.executionHint(randomExecutionHint())
.field(SINGLE_VALUED_FIELD_NAME)
.collectMode(randomFrom(SubAggCollectionMode.values())).exclude("val0[1-9]+.+", 0))
.execute().actionGet();
assertSearchResponse(response);
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
assertThat(bucket, notNullValue());
assertThat(key(bucket), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1l));
}
}
@Test @Test
public void singleValueField_WithExactTermFiltering() throws Exception { public void singleValueField_WithExactTermFiltering() throws Exception {
// include without exclude // include without exclude