Merge pull request #10418 from jpountz/enhancement/speed_up_aggs_include_exclude
Aggregations: Speed up include/exclude in terms aggregations with regexps. Close #10418
This commit is contained in:
commit
e25db222ee
|
@ -139,6 +139,9 @@ equivalent to the former `pre_zone` option. Setting `time_zone` to a value like
|
|||
being applied in the specified time zone but In addition to this, also the `pre_zone_adjust_large_interval` is removed because we
|
||||
now always return dates and bucket keys in UTC.
|
||||
|
||||
`include`/`exclude` filtering on the `terms` aggregation now uses the same syntax as regexp queries instead of the Java syntax. While simple
|
||||
regexps should still work, more complex ones might need some rewriting. Also, the `flags` parameter is not supported anymore.
|
||||
|
||||
=== Terms filter lookup caching
|
||||
|
||||
The terms filter lookup mechanism does not support the `cache` option anymore
|
||||
|
|
|
@ -482,42 +482,7 @@ with `water_` (so the tag `water_sports` will no be aggregated). The `include` r
|
|||
values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
|
||||
both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
|
||||
|
||||
The regular expression are based on the Java(TM) http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html[Pattern],
|
||||
and as such, they it is also possible to pass in flags that will determine how the compiled regular expression will work:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"aggs" : {
|
||||
"tags" : {
|
||||
"terms" : {
|
||||
"field" : "tags",
|
||||
"include" : {
|
||||
"pattern" : ".*sport.*",
|
||||
"flags" : "CANON_EQ|CASE_INSENSITIVE" <1>
|
||||
},
|
||||
"exclude" : {
|
||||
"pattern" : "water_.*",
|
||||
"flags" : "CANON_EQ|CASE_INSENSITIVE"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
<1> the flags are concatenated using the `|` character as a separator
|
||||
|
||||
The possible flags that can be used are:
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CANON_EQ[`CANON_EQ`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CASE_INSENSITIVE[`CASE_INSENSITIVE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#COMMENTS[`COMMENTS`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#DOTALL[`DOTALL`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`LITERAL`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
|
||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
||||
The syntax is the same as <<regexp-syntax,regexp queries>>.
|
||||
|
||||
For matching based on exact values the `include` and `exclude` parameters can simply take an array of
|
||||
strings that represent the terms as they are found in the index:
|
||||
|
|
|
@ -48,7 +48,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
|
||||
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||
BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
||||
IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
||||
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
||||
|
||||
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);
|
||||
|
@ -145,7 +145,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
|
||||
private final LongHash bucketOrds;
|
||||
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggFactory, metaData);
|
||||
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
|
|||
|
||||
public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
||||
IncludeExclude.StringFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
||||
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
||||
|
||||
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);
|
||||
|
|
|
@ -65,7 +65,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
||||
return new SignificantStringTermsAggregator(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
|
||||
return new SignificantStringTermsAggregator(name, factories, valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||
}
|
||||
|
||||
},
|
||||
|
@ -77,7 +78,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
||||
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
|
||||
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
|
||||
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||
}
|
||||
|
||||
},
|
||||
|
@ -87,7 +89,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
||||
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@ public class SignificantTermsParser implements Aggregator.Parser {
|
|||
.scriptable(false)
|
||||
.formattable(true)
|
||||
.build();
|
||||
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, SignificantStringTerms.TYPE, context);
|
||||
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser();
|
||||
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
||||
|
||||
TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();
|
||||
|
|
|
@ -57,7 +57,7 @@ import java.util.Map;
|
|||
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
|
||||
|
||||
protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource;
|
||||
protected final IncludeExclude includeExclude;
|
||||
protected final IncludeExclude.OrdinalsFilter includeExclude;
|
||||
|
||||
// TODO: cache the acceptedglobalValues per aggregation definition.
|
||||
// We can't cache this yet in ValuesSource, since ValuesSource is reused per field for aggs during the execution.
|
||||
|
@ -71,7 +71,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
|
||||
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||
Terms.Order order, BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
|
||||
this.valuesSource = valuesSource;
|
||||
this.includeExclude = includeExclude;
|
||||
|
@ -260,7 +260,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
private final LongHash bucketOrds;
|
||||
|
||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||
Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext,
|
||||
Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext,
|
||||
Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode, showTermDocCountError, metaData);
|
||||
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
|
||||
|
|
|
@ -45,11 +45,11 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
|||
|
||||
private final ValuesSource valuesSource;
|
||||
protected final BytesRefHash bucketOrds;
|
||||
private final IncludeExclude includeExclude;
|
||||
private final IncludeExclude.StringFilter includeExclude;
|
||||
|
||||
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
Terms.Order order, BucketCountThresholds bucketCountThresholds,
|
||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
IncludeExclude.StringFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
|
||||
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
|
||||
this.valuesSource = valuesSource;
|
||||
|
|
|
@ -50,7 +50,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
return new StringTermsAggregator(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
|
||||
return new StringTermsAggregator(name, factories, valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -65,7 +66,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -80,7 +82,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
||||
|
@ -37,9 +38,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
private Terms.ValueType valueType;
|
||||
private Terms.Order order;
|
||||
private String includePattern;
|
||||
private int includeFlags;
|
||||
private String excludePattern;
|
||||
private int excludeFlags;
|
||||
private String executionHint;
|
||||
private SubAggCollectionMode collectionMode;
|
||||
private Boolean showTermDocCountError;
|
||||
|
@ -88,26 +87,15 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
|
||||
/**
|
||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||
* on the {@link java.util.regex.Pattern} class.
|
||||
* on the {@link RegExp} class.
|
||||
*
|
||||
* @see #include(String, int)
|
||||
* @see {@link RegExp#RegExp(String)}
|
||||
*/
|
||||
public TermsBuilder include(String regex) {
|
||||
return include(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||
* on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see java.util.regex.Pattern#compile(String, int)
|
||||
*/
|
||||
public TermsBuilder include(String regex, int flags) {
|
||||
if (includeTerms != null) {
|
||||
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
|
||||
}
|
||||
this.includePattern = regex;
|
||||
this.includeFlags = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -163,26 +151,15 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
|
||||
/**
|
||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
||||
* expression is based on the {@link RegExp} class.
|
||||
*
|
||||
* @see #exclude(String, int)
|
||||
* @see {@link RegExp#RegExp(String)}
|
||||
*/
|
||||
public TermsBuilder exclude(String regex) {
|
||||
return exclude(regex, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
||||
*
|
||||
* @see java.util.regex.Pattern#compile(String, int)
|
||||
*/
|
||||
public TermsBuilder exclude(String regex, int flags) {
|
||||
if (excludeTerms != null) {
|
||||
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
|
||||
}
|
||||
this.excludePattern = regex;
|
||||
this.excludeFlags = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -287,27 +264,13 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
|||
builder.array("include", includeTerms);
|
||||
}
|
||||
if (includePattern != null) {
|
||||
if (includeFlags == 0) {
|
||||
builder.field("include", includePattern);
|
||||
} else {
|
||||
builder.startObject("include")
|
||||
.field("pattern", includePattern)
|
||||
.field("flags", includeFlags)
|
||||
.endObject();
|
||||
}
|
||||
builder.field("include", includePattern);
|
||||
}
|
||||
if (excludeTerms != null) {
|
||||
builder.array("exclude", excludeTerms);
|
||||
}
|
||||
if (excludePattern != null) {
|
||||
if (excludeFlags == 0) {
|
||||
builder.field("exclude", excludePattern);
|
||||
} else {
|
||||
builder.startObject("exclude")
|
||||
.field("pattern", excludePattern)
|
||||
.field("flags", excludeFlags)
|
||||
.endObject();
|
||||
}
|
||||
builder.field("exclude", excludePattern);
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ public class TermsParser implements Aggregator.Parser {
|
|||
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
|
||||
TermsParametersParser aggParser = new TermsParametersParser();
|
||||
ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true).build();
|
||||
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context);
|
||||
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser();
|
||||
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
||||
|
||||
List<OrderElement> orderElements = aggParser.getOrderElements();
|
||||
|
|
|
@ -20,22 +20,30 @@ package org.elasticsearch.search.aggregations.bucket.terms.support;
|
|||
|
||||
import com.carrotsearch.hppc.LongOpenHashSet;
|
||||
import com.carrotsearch.hppc.LongSet;
|
||||
|
||||
import org.apache.lucene.index.RandomAccessOrds;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic,
|
||||
|
@ -72,152 +80,145 @@ public class IncludeExclude {
|
|||
}
|
||||
}
|
||||
|
||||
private final Matcher include;
|
||||
private final Matcher exclude;
|
||||
private final CharsRefBuilder scratch = new CharsRefBuilder();
|
||||
private Set<BytesRef> includeValues;
|
||||
private Set<BytesRef> excludeValues;
|
||||
private final boolean hasRegexTest;
|
||||
// Only used for the 'map' execution mode (ie. scripts)
|
||||
public static class StringFilter {
|
||||
|
||||
private final ByteRunAutomaton runAutomaton;
|
||||
|
||||
private StringFilter(Automaton automaton) {
|
||||
this.runAutomaton = new ByteRunAutomaton(automaton);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
||||
*/
|
||||
public boolean accept(BytesRef value) {
|
||||
return runAutomaton.run(value.bytes, value.offset, value.length);
|
||||
}
|
||||
}
|
||||
|
||||
public static class OrdinalsFilter {
|
||||
|
||||
private final CompiledAutomaton compiled;
|
||||
|
||||
private OrdinalsFilter(Automaton automaton) {
|
||||
this.compiled = new CompiledAutomaton(automaton);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes which global ordinals are accepted by this IncludeExclude instance.
|
||||
*/
|
||||
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException {
|
||||
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
||||
TermsEnum globalTermsEnum;
|
||||
Terms globalTerms = new DocValuesTerms(globalOrdinals);
|
||||
// TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits.
|
||||
globalTermsEnum = compiled.getTermsEnum(globalTerms);
|
||||
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
|
||||
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
|
||||
}
|
||||
return acceptedGlobalOrdinals;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final RegExp include, exclude;
|
||||
private final SortedSet<BytesRef> includeValues, excludeValues;
|
||||
|
||||
/**
|
||||
* @param include The regular expression pattern for the terms to be included
|
||||
* (may only be {@code null} if one of the other arguments is none-null.
|
||||
* @param includeValues The terms to be included
|
||||
* (may only be {@code null} if one of the other arguments is none-null.
|
||||
* @param exclude The regular expression pattern for the terms to be excluded
|
||||
* (may only be {@code null} if one of the other arguments is none-null.
|
||||
* @param excludeValues The terms to be excluded
|
||||
* (may only be {@code null} if one of the other arguments is none-null.
|
||||
*/
|
||||
public IncludeExclude(Pattern include, Pattern exclude, Set<BytesRef> includeValues, Set<BytesRef> excludeValues) {
|
||||
assert includeValues != null || include != null ||
|
||||
exclude != null || excludeValues != null : "includes & excludes cannot both be null"; // otherwise IncludeExclude object should be null
|
||||
this.include = include != null ? include.matcher("") : null;
|
||||
this.exclude = exclude != null ? exclude.matcher("") : null;
|
||||
hasRegexTest = include != null || exclude != null;
|
||||
public IncludeExclude(RegExp include, RegExp exclude) {
|
||||
if (include == null && exclude == null) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
this.include = include;
|
||||
this.exclude = exclude;
|
||||
this.includeValues = null;
|
||||
this.excludeValues = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param includeValues The terms to be included
|
||||
* @param excludeValues The terms to be excluded
|
||||
*/
|
||||
public IncludeExclude(SortedSet<BytesRef> includeValues, SortedSet<BytesRef> excludeValues) {
|
||||
if (includeValues == null && excludeValues == null) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
this.include = null;
|
||||
this.exclude = null;
|
||||
this.includeValues = includeValues;
|
||||
this.excludeValues = excludeValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
||||
* Terms adapter around doc values.
|
||||
*/
|
||||
public boolean accept(BytesRef value) {
|
||||
private static class DocValuesTerms extends Terms {
|
||||
|
||||
if (hasRegexTest) {
|
||||
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
|
||||
scratch.copyUTF8Bytes(value);
|
||||
private final SortedSetDocValues values;
|
||||
|
||||
DocValuesTerms(SortedSetDocValues values) {
|
||||
this.values = values;
|
||||
}
|
||||
return isIncluded(value, scratch.get()) && !isExcluded(value, scratch.get());
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||
return values.termsEnum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumDocFreq() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasFreqs() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPositions() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean isIncluded(BytesRef value, CharsRef utf16Chars) {
|
||||
|
||||
if ((includeValues == null) && (include == null)) {
|
||||
// No include criteria to be tested.
|
||||
return true;
|
||||
}
|
||||
|
||||
if (include != null) {
|
||||
if (include.reset(scratch.get()).matches()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (includeValues != null) {
|
||||
if (includeValues.contains(value)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Some include criteria was tested but no match found
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isExcluded(BytesRef value, CharsRef utf16Chars) {
|
||||
if (exclude != null) {
|
||||
if (exclude.reset(scratch.get()).matches()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (excludeValues != null) {
|
||||
if (excludeValues.contains(value)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// No exclude criteria was tested or no match found
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes which global ordinals are accepted by this IncludeExclude instance.
|
||||
*/
|
||||
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) {
|
||||
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
||||
// There are 3 ways of populating this bitset:
|
||||
// 1) Looking up the global ordinals for known "include" terms
|
||||
// 2) Looking up the global ordinals for known "exclude" terms
|
||||
// 3) Traversing the term enum for all terms and running past regexes
|
||||
// Option 3 is known to be very slow in the case of high-cardinality fields and
|
||||
// should be avoided if possible.
|
||||
if (includeValues != null) {
|
||||
// optimize for the case where the set of accepted values is a set
|
||||
// of known terms, not a regex that would have to be tested against all terms in the index
|
||||
for (BytesRef includeValue : includeValues) {
|
||||
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
|
||||
scratch.copyUTF8Bytes(includeValue);
|
||||
if (!isExcluded(includeValue, scratch.get())) {
|
||||
long ord = globalOrdinals.lookupTerm(includeValue);
|
||||
if (ord >= 0) {
|
||||
acceptedGlobalOrdinals.set(ord);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(hasRegexTest) {
|
||||
// We have includeVals that are a regex or only regex excludes - we need to do the potentially
|
||||
// slow option of hitting termsEnum for every term in the index.
|
||||
TermsEnum globalTermsEnum = globalOrdinals.termsEnum();
|
||||
try {
|
||||
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
|
||||
if (accept(term)) {
|
||||
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw ExceptionsHelper.convertToElastic(e);
|
||||
}
|
||||
} else {
|
||||
// we only have a set of known values to exclude - create a bitset with all good values and negate the known bads
|
||||
acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length());
|
||||
for (BytesRef excludeValue : excludeValues) {
|
||||
long ord = globalOrdinals.lookupTerm(excludeValue);
|
||||
if (ord >= 0) {
|
||||
acceptedGlobalOrdinals.clear(ord);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return acceptedGlobalOrdinals;
|
||||
}
|
||||
|
||||
public static class Parser {
|
||||
|
||||
private final String aggName;
|
||||
private final InternalAggregation.Type aggType;
|
||||
private final SearchContext context;
|
||||
|
||||
String include = null;
|
||||
int includeFlags = 0; // 0 means no flags
|
||||
String exclude = null;
|
||||
int excludeFlags = 0; // 0 means no flags
|
||||
Set<BytesRef> includeValues;
|
||||
Set<BytesRef> excludeValues;
|
||||
|
||||
public Parser(String aggName, InternalAggregation.Type aggType, SearchContext context) {
|
||||
this.aggName = aggName;
|
||||
this.aggType = aggType;
|
||||
this.context = context;
|
||||
}
|
||||
SortedSet<BytesRef> includeValues;
|
||||
SortedSet<BytesRef> excludeValues;
|
||||
|
||||
public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser) throws IOException {
|
||||
|
||||
|
@ -234,11 +235,11 @@ public class IncludeExclude {
|
|||
|
||||
if (token == XContentParser.Token.START_ARRAY) {
|
||||
if ("include".equals(currentFieldName)) {
|
||||
includeValues = parseArrayToSet(parser);
|
||||
includeValues = new TreeSet<>(parseArrayToSet(parser));
|
||||
return true;
|
||||
}
|
||||
if ("exclude".equals(currentFieldName)) {
|
||||
excludeValues = parseArrayToSet(parser);
|
||||
excludeValues = new TreeSet<>(parseArrayToSet(parser));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -252,12 +253,6 @@ public class IncludeExclude {
|
|||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
if ("pattern".equals(currentFieldName)) {
|
||||
include = parser.text();
|
||||
} else if ("flags".equals(currentFieldName)) {
|
||||
includeFlags = Regex.flagsFromString(parser.text());
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("flags".equals(currentFieldName)) {
|
||||
includeFlags = parser.intValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -268,12 +263,6 @@ public class IncludeExclude {
|
|||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
if ("pattern".equals(currentFieldName)) {
|
||||
exclude = parser.text();
|
||||
} else if ("flags".equals(currentFieldName)) {
|
||||
excludeFlags = Regex.flagsFromString(parser.text());
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("flags".equals(currentFieldName)) {
|
||||
excludeFlags = parser.intValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -300,17 +289,48 @@ public class IncludeExclude {
|
|||
}
|
||||
|
||||
public IncludeExclude includeExclude() {
|
||||
if (include == null && exclude == null && includeValues == null && excludeValues == null) {
|
||||
RegExp includePattern = include != null ? new RegExp(include) : null;
|
||||
RegExp excludePattern = exclude != null ? new RegExp(exclude) : null;
|
||||
if (includePattern != null || excludePattern != null) {
|
||||
if (includeValues != null || excludeValues != null) {
|
||||
throw new ElasticsearchIllegalArgumentException("Can only use regular expression include/exclude or a set of values, not both");
|
||||
}
|
||||
return new IncludeExclude(includePattern, excludePattern);
|
||||
} else if (includeValues != null || excludeValues != null) {
|
||||
return new IncludeExclude(includeValues, excludeValues);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
Pattern includePattern = include != null ? Pattern.compile(include, includeFlags) : null;
|
||||
Pattern excludePattern = exclude != null ? Pattern.compile(exclude, excludeFlags) : null;
|
||||
return new IncludeExclude(includePattern, excludePattern, includeValues, excludeValues);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isRegexBased() {
|
||||
return hasRegexTest;
|
||||
return include != null || exclude != null;
|
||||
}
|
||||
|
||||
private Automaton toAutomaton() {
|
||||
Automaton a = null;
|
||||
if (include != null) {
|
||||
a = include.toAutomaton();
|
||||
} else if (includeValues != null) {
|
||||
a = Automata.makeStringUnion(includeValues);
|
||||
} else {
|
||||
a = Automata.makeAnyString();
|
||||
}
|
||||
if (exclude != null) {
|
||||
a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
} else if (excludeValues != null) {
|
||||
a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
public StringFilter convertToStringFilter() {
|
||||
return new StringFilter(toAutomaton());
|
||||
}
|
||||
|
||||
public OrdinalsFilter convertToOrdinalsFilter() {
|
||||
return new OrdinalsFilter(toAutomaton());
|
||||
}
|
||||
|
||||
public LongFilter convertToLongFilter() {
|
||||
|
@ -329,6 +349,7 @@ public class IncludeExclude {
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public LongFilter convertToDoubleFilter() {
|
||||
int numValids = includeValues == null ? 0 : includeValues.size();
|
||||
int numInvalids = excludeValues == null ? 0 : excludeValues.size();
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.benchmark.search.aggregations;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
|
||||
import org.elasticsearch.action.bulk.BulkRequestBuilder;
|
||||
import org.elasticsearch.action.bulk.BulkResponse;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.StopWatch;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.node.Node;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.elasticsearch.client.Requests.createIndexRequest;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
|
||||
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||
|
||||
public class IncludeExcludeAggregationSearchBenchmark {
|
||||
|
||||
private static final Random R = new Random();
|
||||
private static final String CLUSTER_NAME = IncludeExcludeAggregationSearchBenchmark.class.getSimpleName();
|
||||
private static final int NUM_DOCS = 10000000;
|
||||
private static final int BATCH = 100;
|
||||
private static final int WARM = 3;
|
||||
private static final int RUNS = 10;
|
||||
private static final int ITERS = 3;
|
||||
|
||||
public static void main(String[] args) {
|
||||
Settings settings = settingsBuilder()
|
||||
.put("index.refresh_interval", "-1")
|
||||
.put(SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.build();
|
||||
|
||||
Node[] nodes = new Node[1];
|
||||
for (int i = 0; i < nodes.length; i++) {
|
||||
nodes[i] = nodeBuilder().clusterName(CLUSTER_NAME)
|
||||
.settings(settingsBuilder().put(settings).put("name", "node" + i))
|
||||
.node();
|
||||
}
|
||||
|
||||
Node clientNode = nodeBuilder()
|
||||
.clusterName(CLUSTER_NAME)
|
||||
.settings(settingsBuilder().put(settings).put("name", "client")).client(true).node();
|
||||
|
||||
Client client = clientNode.client();
|
||||
|
||||
try {
|
||||
client.admin().indices().create(createIndexRequest("index").settings(settings).mapping("type",
|
||||
jsonBuilder().startObject().startObject("type").startObject("properties")
|
||||
.startObject("str")
|
||||
.field("type", "string")
|
||||
.field("index", "not_analyzed")
|
||||
.endObject()
|
||||
.endObject().endObject().endObject())).actionGet();
|
||||
|
||||
System.out.println("Indexing " + NUM_DOCS + " documents");
|
||||
|
||||
StopWatch stopWatch = new StopWatch().start();
|
||||
for (int i = 0; i < NUM_DOCS; ) {
|
||||
BulkRequestBuilder request = client.prepareBulk();
|
||||
for (int j = 0; j < BATCH && i < NUM_DOCS; ++j) {
|
||||
request.add(client.prepareIndex("index", "type", Integer.toString(i)).setSource("str", TestUtil.randomSimpleString(R)));
|
||||
++i;
|
||||
}
|
||||
BulkResponse response = request.execute().actionGet();
|
||||
if (response.hasFailures()) {
|
||||
System.err.println("--> failures...");
|
||||
System.err.println(response.buildFailureMessage());
|
||||
}
|
||||
if ((i % 100000) == 0) {
|
||||
System.out.println("--> Indexed " + i + " took " + stopWatch.stop().lastTaskTime());
|
||||
stopWatch.start();
|
||||
}
|
||||
}
|
||||
|
||||
client.admin().indices().prepareRefresh("index").execute().actionGet();
|
||||
} catch (Exception e) {
|
||||
System.out.println("Index already exists, skipping index creation");
|
||||
}
|
||||
|
||||
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
|
||||
if (clusterHealthResponse.isTimedOut()) {
|
||||
System.err.println("--> Timed out waiting for cluster health");
|
||||
}
|
||||
|
||||
for (int i = 0; i < WARM + RUNS; ++i) {
|
||||
if (i >= WARM) {
|
||||
System.out.println("RUN " + (i - WARM));
|
||||
}
|
||||
long start = System.nanoTime();
|
||||
SearchResponse resp = null;
|
||||
for (int j = 0; j < ITERS; ++j) {
|
||||
resp = client.prepareSearch("index").setQuery(QueryBuilders.prefixQuery("str", "sf")).setSize(0).addAggregation(terms("t").field("str").include("s.*")).execute().actionGet();
|
||||
}
|
||||
long end = System.nanoTime();
|
||||
if (i >= WARM) {
|
||||
System.out.println(new TimeValue((end - start) / ITERS, TimeUnit.NANOSECONDS));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -387,86 +387,6 @@ public class StringTermsTests extends AbstractTermsTests {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleValueField_WithRegexFiltering_WithFlags() throws Exception {
|
||||
|
||||
// include without exclude
|
||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
||||
// with case insensitive flag on the include regex
|
||||
|
||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field(SINGLE_VALUED_FIELD_NAME)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())).include("VAL00.+", Pattern.CASE_INSENSITIVE))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
Terms terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.getBuckets().size(), equalTo(10));
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(key(bucket), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
|
||||
// include and exclude
|
||||
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
|
||||
// with multi-flag masking on the exclude regex
|
||||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field(SINGLE_VALUED_FIELD_NAME)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())).include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.getBuckets().size(), equalTo(8));
|
||||
|
||||
for (int i = 2; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(key(bucket), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
|
||||
// exclude without include
|
||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
||||
// with a "no flag" flag
|
||||
|
||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||
.addAggregation(terms("terms")
|
||||
.executionHint(randomExecutionHint())
|
||||
.field(SINGLE_VALUED_FIELD_NAME)
|
||||
.collectMode(randomFrom(SubAggCollectionMode.values())).exclude("val0[1-9]+.+", 0))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
terms = response.getAggregations().get("terms");
|
||||
assertThat(terms, notNullValue());
|
||||
assertThat(terms.getName(), equalTo("terms"));
|
||||
assertThat(terms.getBuckets().size(), equalTo(10));
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(key(bucket), equalTo("val00" + i));
|
||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void singleValueField_WithExactTermFiltering() throws Exception {
|
||||
// include without exclude
|
||||
|
|
Loading…
Reference in New Issue