- Added support for term filtering based on include/exclude regex on the terms agg
- Added javadoc to the TermsBuilder Closes #4267
This commit is contained in:
parent
afb0d119e4
commit
0d6a35b9a7
|
@ -175,3 +175,65 @@ Generating the terms using a script:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
==== Filtering Values
|
||||
|
||||
It is possible to filter the values for which buckets will be created. This can be done using the `include` and
|
||||
`exclude` parameters which are based on regular expressions.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"tags" : {
|
||||
"terms" : {
|
||||
"field" : "tags",
|
||||
"include" : ".*sport.*",
|
||||
"exclude" : "water_.*"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
In the above example, buckets will be created for all the tags that has the word `sport` in them, except those starting
|
||||
with `water_` (so the tag `water_sports` will no be aggregated). The `include` regular expression will determine what
|
||||
values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
|
||||
both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
|
||||
|
||||
The regular expression are based on the Java(TM) http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html[Pattern],
|
||||
and as such, they it is also possible to pass in flags that will determine how the compiled regular expression will work:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"tags" : {
|
||||
"terms" : {
|
||||
"field" : "tags",
|
||||
"include" : {
|
||||
"pattern" : ".*sport.*",
|
||||
"flags" : "CANON_EQ|CASE_INSENSITIVE" <1>
|
||||
},
|
||||
"exclude" : {
|
||||
"pattern" : "water_.*",
|
||||
"flags" : "CANON_EQ|CASE_INSENSITIVE"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
<1> the flags are concatenated using the `|` character as a separator
|
||||
|
||||
The possible flags that can be used are:
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CANON_EQ[`CANON_EQ`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CASE_INSENSITIVE[`CASE_INSENSITIVE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#COMMENTS[`COMMENTS`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#DOTALL[`DOTALL`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`LITERAL`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||
import org.elasticsearch.search.aggregations.AggregationStreams;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatter;
|
||||
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatterStreams;
|
||||
|
||||
|
|
|
@ -21,12 +21,13 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.elasticsearch.index.fielddata.DoubleValues;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.LongHash;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.elasticsearch.common.xcontent.ToXContent;
|
|||
import org.elasticsearch.search.aggregations.Aggregations;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||
import org.elasticsearch.search.aggregations.AggregationStreams;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatter;
|
||||
import org.elasticsearch.search.aggregations.support.numeric.ValueFormatterStreams;
|
||||
|
||||
|
|
|
@ -21,12 +21,13 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.elasticsearch.index.fielddata.LongValues;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.LongHash;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
|
|
@ -23,10 +23,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -45,15 +47,18 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final BytesRefHash bucketOrds;
|
||||
private final IncludeExclude includeExclude;
|
||||
|
||||
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) {
|
||||
InternalOrder order, int requiredSize, int shardSize,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) {
|
||||
|
||||
super(name, BucketAggregationMode.PER_BUCKET, factories, INITIAL_CAPACITY, aggregationContext, parent);
|
||||
this.valuesSource = valuesSource;
|
||||
this.order = order;
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
this.includeExclude = includeExclude;
|
||||
bucketOrds = new BytesRefHash();
|
||||
}
|
||||
|
||||
|
@ -70,6 +75,9 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
|
||||
for (int i = 0; i < valuesCount; ++i) {
|
||||
final BytesRef bytes = values.nextValue();
|
||||
if (includeExclude != null && !includeExclude.accept(bytes)) {
|
||||
continue;
|
||||
}
|
||||
final int hash = values.currentValueHash();
|
||||
int bucketOrdinal = bucketOrds.add(bytes, hash);
|
||||
if (bucketOrdinal < 0) { // already seen
|
||||
|
@ -122,3 +130,4 @@ public class StringTermsAggregator extends BucketsAggregator {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||
|
||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValueSourceAggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
|
@ -36,12 +37,14 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
|
|||
private final InternalOrder order;
|
||||
private final int requiredSize;
|
||||
private final int shardSize;
|
||||
private final IncludeExclude includeExclude;
|
||||
|
||||
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize) {
|
||||
public TermsAggregatorFactory(String name, ValuesSourceConfig valueSourceConfig, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude) {
|
||||
super(name, StringTerms.TYPE.name(), valueSourceConfig);
|
||||
this.order = order;
|
||||
this.requiredSize = requiredSize;
|
||||
this.shardSize = shardSize;
|
||||
this.includeExclude = includeExclude;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -52,7 +55,12 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory {
|
|||
@Override
|
||||
protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) {
|
||||
if (valuesSource instanceof BytesValuesSource) {
|
||||
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, aggregationContext, parent);
|
||||
return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent);
|
||||
}
|
||||
|
||||
if (includeExclude != null) {
|
||||
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support the include/exclude " +
|
||||
"settings as it can only be applied to string values");
|
||||
}
|
||||
|
||||
if (valuesSource instanceof NumericValuesSource) {
|
||||
|
|
|
@ -7,34 +7,96 @@ import java.io.IOException;
|
|||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
*
|
||||
* Builds a {@code terms} aggregation
|
||||
*/
|
||||
public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
||||
|
||||
|
||||
private int size = -1;
|
||||
private int shardSize = -1;
|
||||
private Terms.ValueType valueType;
|
||||
private Terms.Order order;
|
||||
private String includePattern;
|
||||
private int includeFlags;
|
||||
private String excludePattern;
|
||||
private int excludeFlags;
|
||||
|
||||
public TermsBuilder(String name) {
|
||||
super(name, "terms");
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the size - indicating how many term buckets should be returned (defaults to 10)
|
||||
*/
|
||||
public TermsBuilder size(int size) {
|
||||
this.size = size;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the shard_size - indicating the number of term buckets each shard will return to the coordinating node (the
|
||||
* node that coordinates the search execution). The higher the shard size is, the more accurate the results are.
|
||||
*/
|
||||
public TermsBuilder shardSize(int shardSize) {
|
||||
this.shardSize = shardSize;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||
* on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see #include(String, int)
|
||||
*/
|
||||
public TermsBuilder include(String regex) {
|
||||
return include(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||
* on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see java.util.regex.Pattern#compile(String, int)
|
||||
*/
|
||||
public TermsBuilder include(String regex, int flags) {
|
||||
this.includePattern = regex;
|
||||
this.includeFlags = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see #exclude(String, int)
|
||||
*/
|
||||
public TermsBuilder exclude(String regex) {
|
||||
return exclude(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see java.util.regex.Pattern#compile(String, int)
|
||||
*/
|
||||
public TermsBuilder exclude(String regex, int flags) {
|
||||
this.excludePattern = regex;
|
||||
this.excludeFlags = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* When using scripts, the value type indicates the types of the values the script is generating.
|
||||
*/
|
||||
public TermsBuilder valueType(Terms.ValueType valueType) {
|
||||
this.valueType = valueType;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the order in which the buckets will be returned.
|
||||
*/
|
||||
public TermsBuilder order(Terms.Order order) {
|
||||
this.order = order;
|
||||
return this;
|
||||
|
@ -55,6 +117,26 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
builder.field("order");
|
||||
order.toXContent(builder, params);
|
||||
}
|
||||
if (includePattern != null) {
|
||||
if (includeFlags == 0) {
|
||||
builder.field("include", includePattern);
|
||||
} else {
|
||||
builder.startObject("include")
|
||||
.field("pattern", includePattern)
|
||||
.field("flags", includeFlags)
|
||||
.endObject();
|
||||
}
|
||||
}
|
||||
if (excludePattern != null) {
|
||||
if (excludeFlags == 0) {
|
||||
builder.field("exclude", excludePattern);
|
||||
} else {
|
||||
builder.startObject("exclude")
|
||||
.field("pattern", excludePattern)
|
||||
.field("flags", excludeFlags)
|
||||
.endObject();
|
||||
}
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
|
@ -26,8 +27,10 @@ import org.elasticsearch.index.mapper.FieldMapper;
|
|||
import org.elasticsearch.index.mapper.core.DateFieldMapper;
|
||||
import org.elasticsearch.index.mapper.ip.IpFieldMapper;
|
||||
import org.elasticsearch.script.SearchScript;
|
||||
import org.elasticsearch.search.SearchParseException;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.support.FieldContext;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
||||
|
@ -39,6 +42,7 @@ import org.elasticsearch.search.internal.SearchContext;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -50,9 +54,6 @@ public class TermsParser implements Aggregator.Parser {
|
|||
return StringTerms.TYPE.name();
|
||||
}
|
||||
|
||||
// TODO add support for shard_size (vs. size) a la terms facets
|
||||
// TODO add support for term filtering (regexp/include/exclude) a la terms facets
|
||||
|
||||
@Override
|
||||
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
|
||||
|
||||
|
@ -67,6 +68,10 @@ public class TermsParser implements Aggregator.Parser {
|
|||
boolean orderAsc = false;
|
||||
String format = null;
|
||||
boolean assumeUnique = false;
|
||||
String include = null;
|
||||
int includeFlags = 0; // 0 means no flags
|
||||
String exclude = null;
|
||||
int excludeFlags = 0; // 0 means no flags
|
||||
|
||||
|
||||
XContentParser.Token token;
|
||||
|
@ -85,6 +90,10 @@ public class TermsParser implements Aggregator.Parser {
|
|||
valueType = Terms.ValueType.resolveType(parser.text());
|
||||
} else if ("format".equals(currentFieldName)) {
|
||||
format = parser.text();
|
||||
} else if ("include".equals(currentFieldName)) {
|
||||
include = parser.text();
|
||||
} else if ("exclude".equals(currentFieldName)) {
|
||||
exclude = parser.text();
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_BOOLEAN) {
|
||||
if ("script_values_unique".equals(currentFieldName)) {
|
||||
|
@ -105,8 +114,45 @@ public class TermsParser implements Aggregator.Parser {
|
|||
orderKey = parser.currentName();
|
||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
String dir = parser.text();
|
||||
orderAsc = "asc".equalsIgnoreCase(dir);
|
||||
//TODO: do we want to throw a parse error if the alternative is not "desc"???
|
||||
if ("asc".equalsIgnoreCase(dir)) {
|
||||
orderAsc = true;
|
||||
} else if ("desc".equalsIgnoreCase(dir)) {
|
||||
orderAsc = false;
|
||||
} else {
|
||||
throw new SearchParseException(context, "Unknown terms order direction [" + dir + "] in terms aggregation [" + aggregationName + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ("include".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
if ("pattern".equals(currentFieldName)) {
|
||||
include = parser.text();
|
||||
} else if ("flags".equals(currentFieldName)) {
|
||||
includeFlags = Regex.flagsFromString(parser.text());
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("flags".equals(currentFieldName)) {
|
||||
includeFlags = parser.intValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ("exclude".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
if ("pattern".equals(currentFieldName)) {
|
||||
exclude = parser.text();
|
||||
} else if ("flags".equals(currentFieldName)) {
|
||||
excludeFlags = Regex.flagsFromString(parser.text());
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("flags".equals(currentFieldName)) {
|
||||
excludeFlags = parser.intValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -118,6 +164,13 @@ public class TermsParser implements Aggregator.Parser {
|
|||
shardSize = requiredSize;
|
||||
}
|
||||
|
||||
IncludeExclude includeExclude = null;
|
||||
if (include != null || exclude != null) {
|
||||
Pattern includePattern = include != null ? Pattern.compile(include, includeFlags) : null;
|
||||
Pattern excludePattern = exclude != null ? Pattern.compile(exclude, excludeFlags) : null;
|
||||
includeExclude = new IncludeExclude(includePattern, excludePattern);
|
||||
}
|
||||
|
||||
InternalOrder order = resolveOrder(orderKey, orderAsc);
|
||||
SearchScript searchScript = null;
|
||||
if (script != null) {
|
||||
|
@ -139,14 +192,14 @@ public class TermsParser implements Aggregator.Parser {
|
|||
if (!assumeUnique) {
|
||||
config.ensureUnique(true);
|
||||
}
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
|
||||
}
|
||||
|
||||
FieldMapper<?> mapper = context.smartNameFieldMapper(field);
|
||||
if (mapper == null) {
|
||||
ValuesSourceConfig<?> config = new ValuesSourceConfig<BytesValuesSource>(BytesValuesSource.class);
|
||||
config.unmapped(true);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
|
||||
}
|
||||
IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
|
||||
|
||||
|
@ -188,7 +241,7 @@ public class TermsParser implements Aggregator.Parser {
|
|||
config.ensureUnique(true);
|
||||
}
|
||||
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize);
|
||||
return new TermsAggregatorFactory(aggregationName, config, order, requiredSize, shardSize, includeExclude);
|
||||
}
|
||||
|
||||
static InternalOrder resolveOrder(String key, boolean asc) {
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
|
|
@ -17,9 +17,10 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
package org.elasticsearch.search.aggregations.bucket.terms.support;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.aggregations.bucket.terms.support;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic,
|
||||
* exclusion has precedence, where the {@code include} is evaluated first and then the {@code exclude}.
|
||||
*/
|
||||
public class IncludeExclude {
|
||||
|
||||
private final Matcher include;
|
||||
private final Matcher exclude;
|
||||
private final CharsRef scratch = new CharsRef();
|
||||
|
||||
/**
|
||||
* @param include The regular expression pattern for the terms to be included
|
||||
* (may only be {@code null} if {@code exclude} is not {@code null}
|
||||
* @param exclude The regular expression pattern for the terms to be excluded
|
||||
* (may only be {@code null} if {@code include} is not {@code null}
|
||||
*/
|
||||
public IncludeExclude(Pattern include, Pattern exclude) {
|
||||
assert include != null || exclude != null : "include & exclude cannot both be null"; // otherwise IncludeExclude object should be null
|
||||
this.include = include != null ? include.matcher("") : null;
|
||||
this.exclude = exclude != null ? exclude.matcher("") : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
||||
*/
|
||||
public boolean accept(BytesRef value) {
|
||||
UnicodeUtil.UTF8toUTF16(value, scratch);
|
||||
if (include == null) {
|
||||
// exclude must not be null
|
||||
return !exclude.reset(scratch).matches();
|
||||
}
|
||||
if (!include.reset(scratch).matches()) {
|
||||
return false;
|
||||
}
|
||||
if (exclude == null) {
|
||||
return true;
|
||||
}
|
||||
return !exclude.reset(scratch).matches();
|
||||
}
|
||||
}
|
|
@ -34,6 +34,7 @@ import org.junit.Test;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
|
||||
|
@ -104,6 +105,150 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleValueField_WithRegexFiltering() throws Exception {
|
||||
|
||||
// include without exclude
|
||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
||||
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field("value").include("val00.+"))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
|
||||
Terms terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.buckets().size(), equalTo(10));
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getByTerm("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(bucket.getKey().string(), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
|
||||
// include and exclude
|
||||
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
|
||||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field("value").include("val00.+").exclude("(val000|val001)"))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
|
||||
terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.buckets().size(), equalTo(8));
|
||||
|
||||
for (int i = 2; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getByTerm("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(bucket.getKey().string(), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
|
||||
// exclude without include
|
||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
||||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field("value").exclude("val0[1-9]+.+"))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
|
||||
terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.buckets().size(), equalTo(10));
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getByTerm("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(bucket.getKey().string(), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleValueField_WithRegexFiltering_WithFlags() throws Exception {
|
||||
|
||||
// include without exclude
|
||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
||||
// with case insensitive flag on the include regex
|
||||
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field("value").include("VAL00.+", Pattern.CASE_INSENSITIVE))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
|
||||
Terms terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.buckets().size(), equalTo(10));
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getByTerm("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(bucket.getKey().string(), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
|
||||
// include and exclude
|
||||
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
|
||||
// with multi-flag masking on the exclude regex
|
||||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field("value").include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
|
||||
terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.buckets().size(), equalTo(8));
|
||||
|
||||
for (int i = 2; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getByTerm("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(bucket.getKey().string(), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
|
||||
// exclude without include
|
||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
||||
// with a "no flag" flag
|
||||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.field("value").exclude("val0[1-9]+.+", 0))
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(response.getFailedShards(), equalTo(0));
|
||||
|
||||
terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.buckets().size(), equalTo(10));
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getByTerm("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(bucket.getKey().string(), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void singleValueField_WithMaxSize() throws Exception {
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
|
|
Loading…
Reference in New Issue