mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-23 13:26:02 +00:00
Aggs enhancement - allow Include/Exclude clauses to use array of terms as alternative to a regex
Closes #6782
This commit is contained in:
parent
3e589cd25b
commit
3c8f8cc090
@ -449,67 +449,10 @@ WARNING: Use of background filters will slow the query as each term's postings m
|
|||||||
===== Filtering Values
|
===== Filtering Values
|
||||||
|
|
||||||
It is possible (although rarely required) to filter the values for which buckets will be created. This can be done using the `include` and
|
It is possible (although rarely required) to filter the values for which buckets will be created. This can be done using the `include` and
|
||||||
`exclude` parameters which are based on regular expressions. This functionality mirrors the features
|
`exclude` parameters which are based on a regular expression string or arrays of exact terms. This functionality mirrors the features
|
||||||
offered by the `terms` aggregation.
|
described in the <<search-aggregations-bucket-terms-aggregation,terms aggregation>> documentation.
|
||||||
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"aggs" : {
|
|
||||||
"tags" : {
|
|
||||||
"significant_terms" : {
|
|
||||||
"field" : "tags",
|
|
||||||
"include" : ".*sport.*",
|
|
||||||
"exclude" : "water_.*"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
In the above example, buckets will be created for all the tags that has the word `sport` in them, except those starting
|
|
||||||
with `water_` (so the tag `water_sports` will no be aggregated). The `include` regular expression will determine what
|
|
||||||
values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
|
|
||||||
both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
|
|
||||||
|
|
||||||
The regular expression are based on the Java(TM) http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html[Pattern],
|
|
||||||
and as such, they it is also possible to pass in flags that will determine how the compiled regular expression will work:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"aggs" : {
|
|
||||||
"tags" : {
|
|
||||||
"terms" : {
|
|
||||||
"field" : "tags",
|
|
||||||
"include" : {
|
|
||||||
"pattern" : ".*sport.*",
|
|
||||||
"flags" : "CANON_EQ|CASE_INSENSITIVE" <1>
|
|
||||||
},
|
|
||||||
"exclude" : {
|
|
||||||
"pattern" : "water_.*",
|
|
||||||
"flags" : "CANON_EQ|CASE_INSENSITIVE"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
<1> the flags are concatenated using the `|` character as a separator
|
|
||||||
|
|
||||||
The possible flags that can be used are:
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CANON_EQ[`CANON_EQ`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CASE_INSENSITIVE[`CASE_INSENSITIVE`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#COMMENTS[`COMMENTS`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#DOTALL[`DOTALL`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`LITERAL`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
|
||||||
|
|
||||||
===== Execution hint
|
===== Execution hint
|
||||||
|
|
||||||
There are two mechanisms by which terms aggregations can be executed: either by using field values directly in order to aggregate
|
There are two mechanisms by which terms aggregations can be executed: either by using field values directly in order to aggregate
|
||||||
|
@ -418,7 +418,7 @@ Generating the terms using a script:
|
|||||||
==== Filtering Values
|
==== Filtering Values
|
||||||
|
|
||||||
It is possible to filter the values for which buckets will be created. This can be done using the `include` and
|
It is possible to filter the values for which buckets will be created. This can be done using the `include` and
|
||||||
`exclude` parameters which are based on regular expressions.
|
`exclude` parameters which are based on regular expression strings or arrays of exact values.
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
@ -477,6 +477,29 @@ http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CA
|
|||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
|
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
||||||
|
|
||||||
|
For matching based on exact values the `include` and `exclude` parameters can simply take an array of
|
||||||
|
strings that represent the terms as they are found in the index:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"aggs" : {
|
||||||
|
"JapaneseCars" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "make",
|
||||||
|
"include" : ["mazda", "honda"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ActiveCarManufacturers" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "make",
|
||||||
|
"exclude" : ["rover", "jensen"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
==== Multi-field terms aggregation
|
==== Multi-field terms aggregation
|
||||||
|
|
||||||
The `terms` aggregation does not support collecting terms from multiple fields
|
The `terms` aggregation does not support collecting terms from multiple fields
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator;
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
||||||
@ -43,6 +44,8 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||||||
private String executionHint;
|
private String executionHint;
|
||||||
private SubAggCollectionMode collectionMode;
|
private SubAggCollectionMode collectionMode;
|
||||||
private Boolean showTermDocCountError;
|
private Boolean showTermDocCountError;
|
||||||
|
private String[] includeTerms = null;
|
||||||
|
private String[] excludeTerms = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sole constructor.
|
* Sole constructor.
|
||||||
@ -101,10 +104,24 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||||||
* @see java.util.regex.Pattern#compile(String, int)
|
* @see java.util.regex.Pattern#compile(String, int)
|
||||||
*/
|
*/
|
||||||
public TermsBuilder include(String regex, int flags) {
|
public TermsBuilder include(String regex, int flags) {
|
||||||
|
if (includeTerms != null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
|
||||||
|
}
|
||||||
this.includePattern = regex;
|
this.includePattern = regex;
|
||||||
this.includeFlags = flags;
|
this.includeFlags = flags;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Define a set of terms that should be aggregated.
|
||||||
|
*/
|
||||||
|
public TermsBuilder include(String [] terms) {
|
||||||
|
if (includePattern != null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("include clause must be an array of strings or a regex, not both");
|
||||||
|
}
|
||||||
|
this.includeTerms = terms;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||||
@ -123,10 +140,25 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||||||
* @see java.util.regex.Pattern#compile(String, int)
|
* @see java.util.regex.Pattern#compile(String, int)
|
||||||
*/
|
*/
|
||||||
public TermsBuilder exclude(String regex, int flags) {
|
public TermsBuilder exclude(String regex, int flags) {
|
||||||
|
if (excludeTerms != null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
|
||||||
|
}
|
||||||
this.excludePattern = regex;
|
this.excludePattern = regex;
|
||||||
this.excludeFlags = flags;
|
this.excludeFlags = flags;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Define a set of terms that should not be aggregated.
|
||||||
|
*/
|
||||||
|
public TermsBuilder exclude(String [] terms) {
|
||||||
|
if (excludePattern != null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
|
||||||
|
}
|
||||||
|
this.excludeTerms = terms;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When using scripts, the value type indicates the types of the values the script is generating.
|
* When using scripts, the value type indicates the types of the values the script is generating.
|
||||||
@ -189,6 +221,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||||||
if (collectionMode != null) {
|
if (collectionMode != null) {
|
||||||
builder.field(Aggregator.COLLECT_MODE.getPreferredName(), collectionMode.parseField().getPreferredName());
|
builder.field(Aggregator.COLLECT_MODE.getPreferredName(), collectionMode.parseField().getPreferredName());
|
||||||
}
|
}
|
||||||
|
if (includeTerms != null) {
|
||||||
|
builder.array("include", includeTerms);
|
||||||
|
}
|
||||||
if (includePattern != null) {
|
if (includePattern != null) {
|
||||||
if (includeFlags == 0) {
|
if (includeFlags == 0) {
|
||||||
builder.field("include", includePattern);
|
builder.field("include", includePattern);
|
||||||
@ -199,6 +234,9 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||||||
.endObject();
|
.endObject();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (excludeTerms != null) {
|
||||||
|
builder.array("exclude", excludeTerms);
|
||||||
|
}
|
||||||
if (excludePattern != null) {
|
if (excludePattern != null) {
|
||||||
if (excludeFlags == 0) {
|
if (excludeFlags == 0) {
|
||||||
builder.field("exclude", excludePattern);
|
builder.field("exclude", excludePattern);
|
||||||
|
@ -21,8 +21,10 @@ package org.elasticsearch.search.aggregations.bucket.terms.support;
|
|||||||
import org.apache.lucene.index.RandomAccessOrds;
|
import org.apache.lucene.index.RandomAccessOrds;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.CharsRefBuilder;
|
import org.apache.lucene.util.CharsRefBuilder;
|
||||||
import org.apache.lucene.util.LongBitSet;
|
import org.apache.lucene.util.LongBitSet;
|
||||||
|
import org.elasticsearch.ElasticsearchParseException;
|
||||||
import org.elasticsearch.ExceptionsHelper;
|
import org.elasticsearch.ExceptionsHelper;
|
||||||
import org.elasticsearch.common.regex.Regex;
|
import org.elasticsearch.common.regex.Regex;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
@ -31,6 +33,8 @@ import org.elasticsearch.search.aggregations.support.ValuesSource;
|
|||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@ -43,51 +47,127 @@ public class IncludeExclude {
|
|||||||
private final Matcher include;
|
private final Matcher include;
|
||||||
private final Matcher exclude;
|
private final Matcher exclude;
|
||||||
private final CharsRefBuilder scratch = new CharsRefBuilder();
|
private final CharsRefBuilder scratch = new CharsRefBuilder();
|
||||||
|
private Set<BytesRef> includeValues;
|
||||||
|
private Set<BytesRef> excludeValues;
|
||||||
|
private final boolean hasRegexTest;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param include The regular expression pattern for the terms to be included
|
* @param include The regular expression pattern for the terms to be included
|
||||||
* (may only be {@code null} if {@code exclude} is not {@code null}
|
* (may only be {@code null} if one of the other arguments is none-null.
|
||||||
|
* @param includeValues The terms to be included
|
||||||
|
* (may only be {@code null} if one of the other arguments is none-null.
|
||||||
* @param exclude The regular expression pattern for the terms to be excluded
|
* @param exclude The regular expression pattern for the terms to be excluded
|
||||||
* (may only be {@code null} if {@code include} is not {@code null}
|
* (may only be {@code null} if one of the other arguments is none-null.
|
||||||
|
* @param excludeValues The terms to be excluded
|
||||||
|
* (may only be {@code null} if one of the other arguments is none-null.
|
||||||
*/
|
*/
|
||||||
public IncludeExclude(Pattern include, Pattern exclude) {
|
public IncludeExclude(Pattern include, Pattern exclude, Set<BytesRef> includeValues, Set<BytesRef> excludeValues) {
|
||||||
assert include != null || exclude != null : "include & exclude cannot both be null"; // otherwise IncludeExclude object should be null
|
assert includeValues != null || include != null ||
|
||||||
|
exclude != null || excludeValues != null : "includes & excludes cannot both be null"; // otherwise IncludeExclude object should be null
|
||||||
this.include = include != null ? include.matcher("") : null;
|
this.include = include != null ? include.matcher("") : null;
|
||||||
this.exclude = exclude != null ? exclude.matcher("") : null;
|
this.exclude = exclude != null ? exclude.matcher("") : null;
|
||||||
|
hasRegexTest = include != null || exclude != null;
|
||||||
|
this.includeValues = includeValues;
|
||||||
|
this.excludeValues = excludeValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
||||||
*/
|
*/
|
||||||
public boolean accept(BytesRef value) {
|
public boolean accept(BytesRef value) {
|
||||||
scratch.copyUTF8Bytes(value);
|
|
||||||
if (include == null) {
|
if (hasRegexTest) {
|
||||||
// exclude must not be null
|
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
|
||||||
return !exclude.reset(scratch.get()).matches();
|
scratch.copyUTF8Bytes(value);
|
||||||
}
|
}
|
||||||
if (!include.reset(scratch.get()).matches()) {
|
return isIncluded(value, scratch.get()) && !isExcluded(value, scratch.get());
|
||||||
return false;
|
}
|
||||||
}
|
|
||||||
if (exclude == null) {
|
private boolean isIncluded(BytesRef value, CharsRef utf16Chars) {
|
||||||
|
|
||||||
|
if ((includeValues == null) && (include == null)) {
|
||||||
|
// No include criteria to be tested.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return !exclude.reset(scratch.get()).matches();
|
|
||||||
|
if (include != null) {
|
||||||
|
if (include.reset(scratch.get()).matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (includeValues != null) {
|
||||||
|
if (includeValues.contains(value)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Some include criteria was tested but no match found
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isExcluded(BytesRef value, CharsRef utf16Chars) {
|
||||||
|
if (exclude != null) {
|
||||||
|
if (exclude.reset(scratch.get()).matches()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (excludeValues != null) {
|
||||||
|
if (excludeValues.contains(value)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No exclude criteria was tested or no match found
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes which global ordinals are accepted by this IncludeExclude instance.
|
* Computes which global ordinals are accepted by this IncludeExclude instance.
|
||||||
*/
|
*/
|
||||||
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) {
|
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) {
|
||||||
TermsEnum globalTermsEnum = valueSource.globalOrdinalsValues().termsEnum();
|
|
||||||
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
||||||
try {
|
// There are 3 ways of populating this bitset:
|
||||||
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
|
// 1) Looking up the global ordinals for known "include" terms
|
||||||
if (accept(term)) {
|
// 2) Looking up the global ordinals for known "exclude" terms
|
||||||
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
|
// 3) Traversing the term enum for all terms and running past regexes
|
||||||
|
// Option 3 is known to be very slow in the case of high-cardinality fields and
|
||||||
|
// should be avoided if possible.
|
||||||
|
if (includeValues != null) {
|
||||||
|
// optimize for the case where the set of accepted values is a set
|
||||||
|
// of known terms, not a regex that would have to be tested against all terms in the index
|
||||||
|
for (BytesRef includeValue : includeValues) {
|
||||||
|
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
|
||||||
|
scratch.copyUTF8Bytes(includeValue);
|
||||||
|
if (!isExcluded(includeValue, scratch.get())) {
|
||||||
|
long ord = globalOrdinals.lookupTerm(includeValue);
|
||||||
|
if (ord >= 0) {
|
||||||
|
acceptedGlobalOrdinals.set(ord);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} else {
|
||||||
throw ExceptionsHelper.convertToElastic(e);
|
if(hasRegexTest) {
|
||||||
|
// We have includeVals that are a regex or only regex excludes - we need to do the potentially
|
||||||
|
// slow option of hitting termsEnum for every term in the index.
|
||||||
|
TermsEnum globalTermsEnum = valueSource.globalOrdinalsValues().termsEnum();
|
||||||
|
try {
|
||||||
|
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
|
||||||
|
if (accept(term)) {
|
||||||
|
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw ExceptionsHelper.convertToElastic(e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// we only have a set of known values to exclude - create a bitset with all good values and negate the known bads
|
||||||
|
acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length());
|
||||||
|
for (BytesRef excludeValue : excludeValues) {
|
||||||
|
long ord = globalOrdinals.lookupTerm(excludeValue);
|
||||||
|
if (ord >= 0) {
|
||||||
|
acceptedGlobalOrdinals.clear(ord);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return acceptedGlobalOrdinals;
|
return acceptedGlobalOrdinals;
|
||||||
}
|
}
|
||||||
@ -102,6 +182,8 @@ public class IncludeExclude {
|
|||||||
int includeFlags = 0; // 0 means no flags
|
int includeFlags = 0; // 0 means no flags
|
||||||
String exclude = null;
|
String exclude = null;
|
||||||
int excludeFlags = 0; // 0 means no flags
|
int excludeFlags = 0; // 0 means no flags
|
||||||
|
Set<BytesRef> includeValues;
|
||||||
|
Set<BytesRef> excludeValues;
|
||||||
|
|
||||||
public Parser(String aggName, InternalAggregation.Type aggType, SearchContext context) {
|
public Parser(String aggName, InternalAggregation.Type aggType, SearchContext context) {
|
||||||
this.aggName = aggName;
|
this.aggName = aggName;
|
||||||
@ -121,6 +203,18 @@ public class IncludeExclude {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (token == XContentParser.Token.START_ARRAY) {
|
||||||
|
if ("include".equals(currentFieldName)) {
|
||||||
|
includeValues = parseArrayToSet(parser);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if ("exclude".equals(currentFieldName)) {
|
||||||
|
excludeValues = parseArrayToSet(parser);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (token == XContentParser.Token.START_OBJECT) {
|
if (token == XContentParser.Token.START_OBJECT) {
|
||||||
if ("include".equals(currentFieldName)) {
|
if ("include".equals(currentFieldName)) {
|
||||||
@ -163,14 +257,27 @@ public class IncludeExclude {
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
private Set<BytesRef> parseArrayToSet(XContentParser parser) throws IOException {
|
||||||
|
final Set<BytesRef> set = new HashSet<>();
|
||||||
|
if (parser.currentToken() != XContentParser.Token.START_ARRAY) {
|
||||||
|
throw new ElasticsearchParseException("Missing start of array in include/exclude clause");
|
||||||
|
}
|
||||||
|
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
|
||||||
|
if (!parser.currentToken().isValue()) {
|
||||||
|
throw new ElasticsearchParseException("Array elements in include/exclude clauses should be string values");
|
||||||
|
}
|
||||||
|
set.add(new BytesRef(parser.text()));
|
||||||
|
}
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
public IncludeExclude includeExclude() {
|
public IncludeExclude includeExclude() {
|
||||||
if (include == null && exclude == null) {
|
if (include == null && exclude == null && includeValues == null && excludeValues == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
Pattern includePattern = include != null ? Pattern.compile(include, includeFlags) : null;
|
Pattern includePattern = include != null ? Pattern.compile(include, includeFlags) : null;
|
||||||
Pattern excludePattern = exclude != null ? Pattern.compile(exclude, excludeFlags) : null;
|
Pattern excludePattern = exclude != null ? Pattern.compile(exclude, excludeFlags) : null;
|
||||||
return new IncludeExclude(includePattern, excludePattern);
|
return new IncludeExclude(includePattern, excludePattern, includeValues, excludeValues);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,9 +38,11 @@ import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
|||||||
import org.hamcrest.Matchers;
|
import org.hamcrest.Matchers;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.text.NumberFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
@ -51,6 +53,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSear
|
|||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.is;
|
import static org.hamcrest.Matchers.is;
|
||||||
import static org.hamcrest.core.IsNull.notNullValue;
|
import static org.hamcrest.core.IsNull.notNullValue;
|
||||||
|
import static org.hamcrest.core.IsNull.nullValue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
@ -336,6 +339,94 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void singleValueField_WithExactTermFiltering() throws Exception {
|
||||||
|
// include without exclude
|
||||||
|
String incVals[] = { "val000", "val001", "val002", "val003", "val004", "val005", "val006", "val007", "val008", "val009" };
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
|
.addAggregation(terms("terms")
|
||||||
|
.executionHint(randomExecutionHint())
|
||||||
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||||
|
.include(incVals))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertSearchResponse(response);
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("terms");
|
||||||
|
assertThat(terms, notNullValue());
|
||||||
|
assertThat(terms.getName(), equalTo("terms"));
|
||||||
|
assertThat(terms.getBuckets().size(), equalTo(incVals.length));
|
||||||
|
|
||||||
|
for (String incVal : incVals) {
|
||||||
|
Terms.Bucket bucket = terms.getBucketByKey(incVal);
|
||||||
|
assertThat(bucket, notNullValue());
|
||||||
|
assertThat(key(bucket), equalTo(incVal));
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||||
|
}
|
||||||
|
|
||||||
|
// include and exclude
|
||||||
|
// Slightly illogical example with exact terms below as include and exclude sets
|
||||||
|
// are made to overlap but the exclude set should have priority over matches.
|
||||||
|
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
|
||||||
|
String excVals[] = { "val000", "val001" };
|
||||||
|
|
||||||
|
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
|
.addAggregation(terms("terms")
|
||||||
|
.executionHint(randomExecutionHint())
|
||||||
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||||
|
.include(incVals)
|
||||||
|
.exclude(excVals))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertSearchResponse(response);
|
||||||
|
|
||||||
|
terms = response.getAggregations().get("terms");
|
||||||
|
assertThat(terms, notNullValue());
|
||||||
|
assertThat(terms.getName(), equalTo("terms"));
|
||||||
|
assertThat(terms.getBuckets().size(), equalTo(8));
|
||||||
|
|
||||||
|
for (int i = 2; i < 10; i++) {
|
||||||
|
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
|
||||||
|
assertThat(bucket, notNullValue());
|
||||||
|
assertThat(key(bucket), equalTo("val00" + i));
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check case with only exact term exclude clauses
|
||||||
|
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
|
.addAggregation(terms("terms")
|
||||||
|
.executionHint(randomExecutionHint())
|
||||||
|
.field(SINGLE_VALUED_FIELD_NAME)
|
||||||
|
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
||||||
|
.exclude(excVals))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertSearchResponse(response);
|
||||||
|
|
||||||
|
terms = response.getAggregations().get("terms");
|
||||||
|
assertThat(terms, notNullValue());
|
||||||
|
assertThat(terms.getName(), equalTo("terms"));
|
||||||
|
assertThat(terms.getBuckets().size(), equalTo(10));
|
||||||
|
for (String key : excVals) {
|
||||||
|
Terms.Bucket bucket = terms.getBucketByKey(key);
|
||||||
|
assertThat(bucket, nullValue());
|
||||||
|
}
|
||||||
|
NumberFormat nf=NumberFormat.getIntegerInstance(Locale.ENGLISH);
|
||||||
|
nf.setMinimumIntegerDigits(3);
|
||||||
|
for (int i = 2; i < 12; i++) {
|
||||||
|
Terms.Bucket bucket = terms.getBucketByKey("val" + nf.format(i));
|
||||||
|
assertThat(bucket, notNullValue());
|
||||||
|
assertThat(key(bucket), equalTo("val" + nf.format(i)));
|
||||||
|
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
Loading…
x
Reference in New Issue
Block a user