Merge pull request #10418 from jpountz/enhancement/speed_up_aggs_include_exclude
Aggregations: Speed up include/exclude in terms aggregations with regexps. Close #10418
This commit is contained in:
commit
e25db222ee
|
@ -139,6 +139,9 @@ equivalent to the former `pre_zone` option. Setting `time_zone` to a value like
|
||||||
being applied in the specified time zone but In addition to this, also the `pre_zone_adjust_large_interval` is removed because we
|
being applied in the specified time zone but In addition to this, also the `pre_zone_adjust_large_interval` is removed because we
|
||||||
now always return dates and bucket keys in UTC.
|
now always return dates and bucket keys in UTC.
|
||||||
|
|
||||||
|
`include`/`exclude` filtering on the `terms` aggregation now uses the same syntax as regexp queries instead of the Java syntax. While simple
|
||||||
|
regexps should still work, more complex ones might need some rewriting. Also, the `flags` parameter is not supported anymore.
|
||||||
|
|
||||||
=== Terms filter lookup caching
|
=== Terms filter lookup caching
|
||||||
|
|
||||||
The terms filter lookup mechanism does not support the `cache` option anymore
|
The terms filter lookup mechanism does not support the `cache` option anymore
|
||||||
|
|
|
@ -482,42 +482,7 @@ with `water_` (so the tag `water_sports` will no be aggregated). The `include` r
|
||||||
values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
|
values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
|
||||||
both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
|
both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
|
||||||
|
|
||||||
The regular expression are based on the Java(TM) http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html[Pattern],
|
The syntax is the same as <<regexp-syntax,regexp queries>>.
|
||||||
and as such, they it is also possible to pass in flags that will determine how the compiled regular expression will work:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"aggs" : {
|
|
||||||
"tags" : {
|
|
||||||
"terms" : {
|
|
||||||
"field" : "tags",
|
|
||||||
"include" : {
|
|
||||||
"pattern" : ".*sport.*",
|
|
||||||
"flags" : "CANON_EQ|CASE_INSENSITIVE" <1>
|
|
||||||
},
|
|
||||||
"exclude" : {
|
|
||||||
"pattern" : "water_.*",
|
|
||||||
"flags" : "CANON_EQ|CASE_INSENSITIVE"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
<1> the flags are concatenated using the `|` character as a separator
|
|
||||||
|
|
||||||
The possible flags that can be used are:
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CANON_EQ[`CANON_EQ`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#CASE_INSENSITIVE[`CASE_INSENSITIVE`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#COMMENTS[`COMMENTS`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#DOTALL[`DOTALL`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#LITERAL[`LITERAL`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#MULTILINE[`MULTILINE`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CASE[`UNICODE_CASE`],
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNICODE_CHARACTER_CLASS[`UNICODE_CHARACTER_CLASS`] and
|
|
||||||
http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#UNIX_LINES[`UNIX_LINES`]
|
|
||||||
|
|
||||||
For matching based on exact values the `include` and `exclude` parameters can simply take an array of
|
For matching based on exact values the `include` and `exclude` parameters can simply take an array of
|
||||||
strings that represent the terms as they are found in the index:
|
strings that represent the terms as they are found in the index:
|
||||||
|
|
|
@ -48,7 +48,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
||||||
|
|
||||||
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||||
BucketCountThresholds bucketCountThresholds,
|
BucketCountThresholds bucketCountThresholds,
|
||||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
||||||
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
||||||
|
|
||||||
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);
|
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);
|
||||||
|
@ -145,7 +145,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
||||||
|
|
||||||
private final LongHash bucketOrds;
|
private final LongHash bucketOrds;
|
||||||
|
|
||||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
||||||
super(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggFactory, metaData);
|
super(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggFactory, metaData);
|
||||||
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
|
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
|
||||||
|
|
||||||
public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
public SignificantStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||||
BucketCountThresholds bucketCountThresholds,
|
BucketCountThresholds bucketCountThresholds,
|
||||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
IncludeExclude.StringFilter includeExclude, AggregationContext aggregationContext, Aggregator parent,
|
||||||
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
SignificantTermsAggregatorFactory termsAggFactory, Map<String, Object> metaData) throws IOException {
|
||||||
|
|
||||||
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);
|
super(name, factories, valuesSource, null, bucketCountThresholds, includeExclude, aggregationContext, parent, SubAggCollectionMode.DEPTH_FIRST, false, metaData);
|
||||||
|
|
|
@ -65,7 +65,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
||||||
return new SignificantStringTermsAggregator(name, factories, valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData);
|
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
|
||||||
|
return new SignificantStringTermsAggregator(name, factories, valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
},
|
},
|
||||||
|
@ -77,7 +78,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
||||||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
||||||
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
|
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
|
||||||
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
|
IndexSearcher indexSearcher = aggregationContext.searchContext().searcher();
|
||||||
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData);
|
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||||
|
return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
},
|
},
|
||||||
|
@ -87,7 +89,8 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||||
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
AggregationContext aggregationContext, Aggregator parent, SignificantTermsAggregatorFactory termsAggregatorFactory, Map<String, Object> metaData) throws IOException {
|
||||||
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, includeExclude, aggregationContext, parent, termsAggregatorFactory, metaData);
|
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||||
|
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, bucketCountThresholds, filter, aggregationContext, parent, termsAggregatorFactory, metaData);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@ public class SignificantTermsParser implements Aggregator.Parser {
|
||||||
.scriptable(false)
|
.scriptable(false)
|
||||||
.formattable(true)
|
.formattable(true)
|
||||||
.build();
|
.build();
|
||||||
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, SignificantStringTerms.TYPE, context);
|
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser();
|
||||||
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
||||||
|
|
||||||
TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();
|
TermsAggregator.BucketCountThresholds bucketCountThresholds = aggParser.getBucketCountThresholds();
|
||||||
|
|
|
@ -57,7 +57,7 @@ import java.util.Map;
|
||||||
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
|
public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
|
||||||
|
|
||||||
protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource;
|
protected final ValuesSource.Bytes.WithOrdinals.FieldData valuesSource;
|
||||||
protected final IncludeExclude includeExclude;
|
protected final IncludeExclude.OrdinalsFilter includeExclude;
|
||||||
|
|
||||||
// TODO: cache the acceptedglobalValues per aggregation definition.
|
// TODO: cache the acceptedglobalValues per aggregation definition.
|
||||||
// We can't cache this yet in ValuesSource, since ValuesSource is reused per field for aggs during the execution.
|
// We can't cache this yet in ValuesSource, since ValuesSource is reused per field for aggs during the execution.
|
||||||
|
@ -71,7 +71,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
|
|
||||||
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||||
Terms.Order order, BucketCountThresholds bucketCountThresholds,
|
Terms.Order order, BucketCountThresholds bucketCountThresholds,
|
||||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||||
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
|
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
|
||||||
this.valuesSource = valuesSource;
|
this.valuesSource = valuesSource;
|
||||||
this.includeExclude = includeExclude;
|
this.includeExclude = includeExclude;
|
||||||
|
@ -260,7 +260,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
private final LongHash bucketOrds;
|
private final LongHash bucketOrds;
|
||||||
|
|
||||||
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource,
|
||||||
Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, AggregationContext aggregationContext,
|
Terms.Order order, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, AggregationContext aggregationContext,
|
||||||
Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||||
super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode, showTermDocCountError, metaData);
|
super(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, collectionMode, showTermDocCountError, metaData);
|
||||||
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
|
bucketOrds = new LongHash(1, aggregationContext.bigArrays());
|
||||||
|
|
|
@ -45,11 +45,11 @@ public class StringTermsAggregator extends AbstractStringTermsAggregator {
|
||||||
|
|
||||||
private final ValuesSource valuesSource;
|
private final ValuesSource valuesSource;
|
||||||
protected final BytesRefHash bucketOrds;
|
protected final BytesRefHash bucketOrds;
|
||||||
private final IncludeExclude includeExclude;
|
private final IncludeExclude.StringFilter includeExclude;
|
||||||
|
|
||||||
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||||
Terms.Order order, BucketCountThresholds bucketCountThresholds,
|
Terms.Order order, BucketCountThresholds bucketCountThresholds,
|
||||||
IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
IncludeExclude.StringFilter includeExclude, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||||
|
|
||||||
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
|
super(name, factories, aggregationContext, parent, order, bucketCountThresholds, collectionMode, showTermDocCountError, metaData);
|
||||||
this.valuesSource = valuesSource;
|
this.valuesSource = valuesSource;
|
||||||
|
|
|
@ -50,7 +50,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||||
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||||
return new StringTermsAggregator(name, factories, valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
|
||||||
|
return new StringTermsAggregator(name, factories, valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -65,7 +66,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||||
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||||
|
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -80,7 +82,8 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
||||||
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
|
||||||
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
Terms.Order order, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude,
|
||||||
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, Map<String, Object> metaData) throws IOException {
|
||||||
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, includeExclude, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
|
||||||
|
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, order, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
||||||
|
@ -37,9 +38,7 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
||||||
private Terms.ValueType valueType;
|
private Terms.ValueType valueType;
|
||||||
private Terms.Order order;
|
private Terms.Order order;
|
||||||
private String includePattern;
|
private String includePattern;
|
||||||
private int includeFlags;
|
|
||||||
private String excludePattern;
|
private String excludePattern;
|
||||||
private int excludeFlags;
|
|
||||||
private String executionHint;
|
private String executionHint;
|
||||||
private SubAggCollectionMode collectionMode;
|
private SubAggCollectionMode collectionMode;
|
||||||
private Boolean showTermDocCountError;
|
private Boolean showTermDocCountError;
|
||||||
|
@ -88,26 +87,15 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
||||||
* on the {@link java.util.regex.Pattern} class.
|
* on the {@link RegExp} class.
|
||||||
*
|
*
|
||||||
* @see #include(String, int)
|
* @see {@link RegExp#RegExp(String)}
|
||||||
*/
|
*/
|
||||||
public TermsBuilder include(String regex) {
|
public TermsBuilder include(String regex) {
|
||||||
return include(regex, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Define a regular expression that will determine what terms should be aggregated. The regular expression is based
|
|
||||||
* on the {@link java.util.regex.Pattern} class.
|
|
||||||
*
|
|
||||||
* @see java.util.regex.Pattern#compile(String, int)
|
|
||||||
*/
|
|
||||||
public TermsBuilder include(String regex, int flags) {
|
|
||||||
if (includeTerms != null) {
|
if (includeTerms != null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
|
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of strings or a regex, not both");
|
||||||
}
|
}
|
||||||
this.includePattern = regex;
|
this.includePattern = regex;
|
||||||
this.includeFlags = flags;
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -160,29 +148,18 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
||||||
}
|
}
|
||||||
return termsAsString;
|
return termsAsString;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
|
||||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
|
||||||
*
|
|
||||||
* @see #exclude(String, int)
|
|
||||||
*/
|
|
||||||
public TermsBuilder exclude(String regex) {
|
|
||||||
return exclude(regex, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
* Define a regular expression that will filter out terms that should be excluded from the aggregation. The regular
|
||||||
* expression is based on the {@link java.util.regex.Pattern} class.
|
* expression is based on the {@link RegExp} class.
|
||||||
*
|
*
|
||||||
* @see java.util.regex.Pattern#compile(String, int)
|
* @see {@link RegExp#RegExp(String)}
|
||||||
*/
|
*/
|
||||||
public TermsBuilder exclude(String regex, int flags) {
|
public TermsBuilder exclude(String regex) {
|
||||||
if (excludeTerms != null) {
|
if (excludeTerms != null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
|
throw new ElasticsearchIllegalArgumentException("exclude clause must be an array of exact values or a regex, not both");
|
||||||
}
|
}
|
||||||
this.excludePattern = regex;
|
this.excludePattern = regex;
|
||||||
this.excludeFlags = flags;
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -287,27 +264,13 @@ public class TermsBuilder extends ValuesSourceAggregationBuilder<TermsBuilder> {
|
||||||
builder.array("include", includeTerms);
|
builder.array("include", includeTerms);
|
||||||
}
|
}
|
||||||
if (includePattern != null) {
|
if (includePattern != null) {
|
||||||
if (includeFlags == 0) {
|
builder.field("include", includePattern);
|
||||||
builder.field("include", includePattern);
|
|
||||||
} else {
|
|
||||||
builder.startObject("include")
|
|
||||||
.field("pattern", includePattern)
|
|
||||||
.field("flags", includeFlags)
|
|
||||||
.endObject();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (excludeTerms != null) {
|
if (excludeTerms != null) {
|
||||||
builder.array("exclude", excludeTerms);
|
builder.array("exclude", excludeTerms);
|
||||||
}
|
}
|
||||||
if (excludePattern != null) {
|
if (excludePattern != null) {
|
||||||
if (excludeFlags == 0) {
|
builder.field("exclude", excludePattern);
|
||||||
builder.field("exclude", excludePattern);
|
|
||||||
} else {
|
|
||||||
builder.startObject("exclude")
|
|
||||||
.field("pattern", excludePattern)
|
|
||||||
.field("flags", excludeFlags)
|
|
||||||
.endObject();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@ public class TermsParser implements Aggregator.Parser {
|
||||||
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
|
public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException {
|
||||||
TermsParametersParser aggParser = new TermsParametersParser();
|
TermsParametersParser aggParser = new TermsParametersParser();
|
||||||
ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true).build();
|
ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context).scriptable(true).formattable(true).build();
|
||||||
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context);
|
IncludeExclude.Parser incExcParser = new IncludeExclude.Parser();
|
||||||
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
aggParser.parse(aggregationName, parser, context, vsParser, incExcParser);
|
||||||
|
|
||||||
List<OrderElement> orderElements = aggParser.getOrderElements();
|
List<OrderElement> orderElements = aggParser.getOrderElements();
|
||||||
|
|
|
@ -20,22 +20,30 @@ package org.elasticsearch.search.aggregations.bucket.terms.support;
|
||||||
|
|
||||||
import com.carrotsearch.hppc.LongOpenHashSet;
|
import com.carrotsearch.hppc.LongOpenHashSet;
|
||||||
import com.carrotsearch.hppc.LongSet;
|
import com.carrotsearch.hppc.LongSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.RandomAccessOrds;
|
import org.apache.lucene.index.RandomAccessOrds;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LongBitSet;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
|
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||||
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
import org.apache.lucene.util.automaton.Operations;
|
||||||
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
import org.elasticsearch.ElasticsearchParseException;
|
import org.elasticsearch.ElasticsearchParseException;
|
||||||
import org.elasticsearch.ExceptionsHelper;
|
|
||||||
import org.elasticsearch.common.regex.Regex;
|
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Matcher;
|
import java.util.SortedSet;
|
||||||
import java.util.regex.Pattern;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic,
|
* Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic,
|
||||||
|
@ -43,8 +51,8 @@ import java.util.regex.Pattern;
|
||||||
*/
|
*/
|
||||||
public class IncludeExclude {
|
public class IncludeExclude {
|
||||||
|
|
||||||
// The includeValue and excludeValue ByteRefs which are the result of the parsing
|
// The includeValue and excludeValue ByteRefs which are the result of the parsing
|
||||||
// process are converted into a LongFilter when used on numeric fields
|
// process are converted into a LongFilter when used on numeric fields
|
||||||
// in the index.
|
// in the index.
|
||||||
public static class LongFilter {
|
public static class LongFilter {
|
||||||
private LongSet valids;
|
private LongSet valids;
|
||||||
|
@ -72,152 +80,145 @@ public class IncludeExclude {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Matcher include;
|
// Only used for the 'map' execution mode (ie. scripts)
|
||||||
private final Matcher exclude;
|
public static class StringFilter {
|
||||||
private final CharsRefBuilder scratch = new CharsRefBuilder();
|
|
||||||
private Set<BytesRef> includeValues;
|
private final ByteRunAutomaton runAutomaton;
|
||||||
private Set<BytesRef> excludeValues;
|
|
||||||
private final boolean hasRegexTest;
|
private StringFilter(Automaton automaton) {
|
||||||
|
this.runAutomaton = new ByteRunAutomaton(automaton);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
||||||
|
*/
|
||||||
|
public boolean accept(BytesRef value) {
|
||||||
|
return runAutomaton.run(value.bytes, value.offset, value.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class OrdinalsFilter {
|
||||||
|
|
||||||
|
private final CompiledAutomaton compiled;
|
||||||
|
|
||||||
|
private OrdinalsFilter(Automaton automaton) {
|
||||||
|
this.compiled = new CompiledAutomaton(automaton);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes which global ordinals are accepted by this IncludeExclude instance.
|
||||||
|
*/
|
||||||
|
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException {
|
||||||
|
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
||||||
|
TermsEnum globalTermsEnum;
|
||||||
|
Terms globalTerms = new DocValuesTerms(globalOrdinals);
|
||||||
|
// TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits.
|
||||||
|
globalTermsEnum = compiled.getTermsEnum(globalTerms);
|
||||||
|
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
|
||||||
|
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
|
||||||
|
}
|
||||||
|
return acceptedGlobalOrdinals;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private final RegExp include, exclude;
|
||||||
|
private final SortedSet<BytesRef> includeValues, excludeValues;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param include The regular expression pattern for the terms to be included
|
* @param include The regular expression pattern for the terms to be included
|
||||||
* (may only be {@code null} if one of the other arguments is none-null.
|
|
||||||
* @param includeValues The terms to be included
|
|
||||||
* (may only be {@code null} if one of the other arguments is none-null.
|
|
||||||
* @param exclude The regular expression pattern for the terms to be excluded
|
* @param exclude The regular expression pattern for the terms to be excluded
|
||||||
* (may only be {@code null} if one of the other arguments is none-null.
|
|
||||||
* @param excludeValues The terms to be excluded
|
|
||||||
* (may only be {@code null} if one of the other arguments is none-null.
|
|
||||||
*/
|
*/
|
||||||
public IncludeExclude(Pattern include, Pattern exclude, Set<BytesRef> includeValues, Set<BytesRef> excludeValues) {
|
public IncludeExclude(RegExp include, RegExp exclude) {
|
||||||
assert includeValues != null || include != null ||
|
if (include == null && exclude == null) {
|
||||||
exclude != null || excludeValues != null : "includes & excludes cannot both be null"; // otherwise IncludeExclude object should be null
|
throw new IllegalArgumentException();
|
||||||
this.include = include != null ? include.matcher("") : null;
|
}
|
||||||
this.exclude = exclude != null ? exclude.matcher("") : null;
|
this.include = include;
|
||||||
hasRegexTest = include != null || exclude != null;
|
this.exclude = exclude;
|
||||||
|
this.includeValues = null;
|
||||||
|
this.excludeValues = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param includeValues The terms to be included
|
||||||
|
* @param excludeValues The terms to be excluded
|
||||||
|
*/
|
||||||
|
public IncludeExclude(SortedSet<BytesRef> includeValues, SortedSet<BytesRef> excludeValues) {
|
||||||
|
if (includeValues == null && excludeValues == null) {
|
||||||
|
throw new IllegalArgumentException();
|
||||||
|
}
|
||||||
|
this.include = null;
|
||||||
|
this.exclude = null;
|
||||||
this.includeValues = includeValues;
|
this.includeValues = includeValues;
|
||||||
this.excludeValues = excludeValues;
|
this.excludeValues = excludeValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
|
* Terms adapter around doc values.
|
||||||
*/
|
*/
|
||||||
public boolean accept(BytesRef value) {
|
private static class DocValuesTerms extends Terms {
|
||||||
|
|
||||||
if (hasRegexTest) {
|
private final SortedSetDocValues values;
|
||||||
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
|
|
||||||
scratch.copyUTF8Bytes(value);
|
|
||||||
}
|
|
||||||
return isIncluded(value, scratch.get()) && !isExcluded(value, scratch.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isIncluded(BytesRef value, CharsRef utf16Chars) {
|
|
||||||
|
|
||||||
if ((includeValues == null) && (include == null)) {
|
DocValuesTerms(SortedSetDocValues values) {
|
||||||
// No include criteria to be tested.
|
this.values = values;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (include != null) {
|
@Override
|
||||||
if (include.reset(scratch.get()).matches()) {
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
return true;
|
return values.termsEnum();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (includeValues != null) {
|
|
||||||
if (includeValues.contains(value)) {
|
@Override
|
||||||
return true;
|
public long size() throws IOException {
|
||||||
}
|
return -1;
|
||||||
}
|
}
|
||||||
// Some include criteria was tested but no match found
|
|
||||||
return false;
|
@Override
|
||||||
}
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
|
return -1;
|
||||||
private boolean isExcluded(BytesRef value, CharsRef utf16Chars) {
|
|
||||||
if (exclude != null) {
|
|
||||||
if (exclude.reset(scratch.get()).matches()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (excludeValues != null) {
|
|
||||||
if (excludeValues.contains(value)) {
|
@Override
|
||||||
return true;
|
public long getSumDocFreq() throws IOException {
|
||||||
}
|
return -1;
|
||||||
}
|
}
|
||||||
// No exclude criteria was tested or no match found
|
|
||||||
return false;
|
@Override
|
||||||
|
public int getDocCount() throws IOException {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasFreqs() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes which global ordinals are accepted by this IncludeExclude instance.
|
|
||||||
*/
|
|
||||||
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) {
|
|
||||||
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
|
|
||||||
// There are 3 ways of populating this bitset:
|
|
||||||
// 1) Looking up the global ordinals for known "include" terms
|
|
||||||
// 2) Looking up the global ordinals for known "exclude" terms
|
|
||||||
// 3) Traversing the term enum for all terms and running past regexes
|
|
||||||
// Option 3 is known to be very slow in the case of high-cardinality fields and
|
|
||||||
// should be avoided if possible.
|
|
||||||
if (includeValues != null) {
|
|
||||||
// optimize for the case where the set of accepted values is a set
|
|
||||||
// of known terms, not a regex that would have to be tested against all terms in the index
|
|
||||||
for (BytesRef includeValue : includeValues) {
|
|
||||||
// We need to perform UTF8 to UTF16 conversion for use in the regex matching
|
|
||||||
scratch.copyUTF8Bytes(includeValue);
|
|
||||||
if (!isExcluded(includeValue, scratch.get())) {
|
|
||||||
long ord = globalOrdinals.lookupTerm(includeValue);
|
|
||||||
if (ord >= 0) {
|
|
||||||
acceptedGlobalOrdinals.set(ord);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if(hasRegexTest) {
|
|
||||||
// We have includeVals that are a regex or only regex excludes - we need to do the potentially
|
|
||||||
// slow option of hitting termsEnum for every term in the index.
|
|
||||||
TermsEnum globalTermsEnum = globalOrdinals.termsEnum();
|
|
||||||
try {
|
|
||||||
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
|
|
||||||
if (accept(term)) {
|
|
||||||
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw ExceptionsHelper.convertToElastic(e);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// we only have a set of known values to exclude - create a bitset with all good values and negate the known bads
|
|
||||||
acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length());
|
|
||||||
for (BytesRef excludeValue : excludeValues) {
|
|
||||||
long ord = globalOrdinals.lookupTerm(excludeValue);
|
|
||||||
if (ord >= 0) {
|
|
||||||
acceptedGlobalOrdinals.clear(ord);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return acceptedGlobalOrdinals;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class Parser {
|
public static class Parser {
|
||||||
|
|
||||||
private final String aggName;
|
|
||||||
private final InternalAggregation.Type aggType;
|
|
||||||
private final SearchContext context;
|
|
||||||
|
|
||||||
String include = null;
|
String include = null;
|
||||||
int includeFlags = 0; // 0 means no flags
|
|
||||||
String exclude = null;
|
String exclude = null;
|
||||||
int excludeFlags = 0; // 0 means no flags
|
SortedSet<BytesRef> includeValues;
|
||||||
Set<BytesRef> includeValues;
|
SortedSet<BytesRef> excludeValues;
|
||||||
Set<BytesRef> excludeValues;
|
|
||||||
|
|
||||||
public Parser(String aggName, InternalAggregation.Type aggType, SearchContext context) {
|
|
||||||
this.aggName = aggName;
|
|
||||||
this.aggType = aggType;
|
|
||||||
this.context = context;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser) throws IOException {
|
public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser) throws IOException {
|
||||||
|
|
||||||
|
@ -231,14 +232,14 @@ public class IncludeExclude {
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (token == XContentParser.Token.START_ARRAY) {
|
if (token == XContentParser.Token.START_ARRAY) {
|
||||||
if ("include".equals(currentFieldName)) {
|
if ("include".equals(currentFieldName)) {
|
||||||
includeValues = parseArrayToSet(parser);
|
includeValues = new TreeSet<>(parseArrayToSet(parser));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if ("exclude".equals(currentFieldName)) {
|
if ("exclude".equals(currentFieldName)) {
|
||||||
excludeValues = parseArrayToSet(parser);
|
excludeValues = new TreeSet<>(parseArrayToSet(parser));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -252,12 +253,6 @@ public class IncludeExclude {
|
||||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||||
if ("pattern".equals(currentFieldName)) {
|
if ("pattern".equals(currentFieldName)) {
|
||||||
include = parser.text();
|
include = parser.text();
|
||||||
} else if ("flags".equals(currentFieldName)) {
|
|
||||||
includeFlags = Regex.flagsFromString(parser.text());
|
|
||||||
}
|
|
||||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
|
||||||
if ("flags".equals(currentFieldName)) {
|
|
||||||
includeFlags = parser.intValue();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -268,12 +263,6 @@ public class IncludeExclude {
|
||||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||||
if ("pattern".equals(currentFieldName)) {
|
if ("pattern".equals(currentFieldName)) {
|
||||||
exclude = parser.text();
|
exclude = parser.text();
|
||||||
} else if ("flags".equals(currentFieldName)) {
|
|
||||||
excludeFlags = Regex.flagsFromString(parser.text());
|
|
||||||
}
|
|
||||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
|
||||||
if ("flags".equals(currentFieldName)) {
|
|
||||||
excludeFlags = parser.intValue();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -298,19 +287,50 @@ public class IncludeExclude {
|
||||||
}
|
}
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
|
||||||
public IncludeExclude includeExclude() {
|
public IncludeExclude includeExclude() {
|
||||||
if (include == null && exclude == null && includeValues == null && excludeValues == null) {
|
RegExp includePattern = include != null ? new RegExp(include) : null;
|
||||||
|
RegExp excludePattern = exclude != null ? new RegExp(exclude) : null;
|
||||||
|
if (includePattern != null || excludePattern != null) {
|
||||||
|
if (includeValues != null || excludeValues != null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("Can only use regular expression include/exclude or a set of values, not both");
|
||||||
|
}
|
||||||
|
return new IncludeExclude(includePattern, excludePattern);
|
||||||
|
} else if (includeValues != null || excludeValues != null) {
|
||||||
|
return new IncludeExclude(includeValues, excludeValues);
|
||||||
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
Pattern includePattern = include != null ? Pattern.compile(include, includeFlags) : null;
|
|
||||||
Pattern excludePattern = exclude != null ? Pattern.compile(exclude, excludeFlags) : null;
|
|
||||||
return new IncludeExclude(includePattern, excludePattern, includeValues, excludeValues);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isRegexBased() {
|
public boolean isRegexBased() {
|
||||||
return hasRegexTest;
|
return include != null || exclude != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Automaton toAutomaton() {
|
||||||
|
Automaton a = null;
|
||||||
|
if (include != null) {
|
||||||
|
a = include.toAutomaton();
|
||||||
|
} else if (includeValues != null) {
|
||||||
|
a = Automata.makeStringUnion(includeValues);
|
||||||
|
} else {
|
||||||
|
a = Automata.makeAnyString();
|
||||||
|
}
|
||||||
|
if (exclude != null) {
|
||||||
|
a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||||
|
} else if (excludeValues != null) {
|
||||||
|
a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
public StringFilter convertToStringFilter() {
|
||||||
|
return new StringFilter(toAutomaton());
|
||||||
|
}
|
||||||
|
|
||||||
|
public OrdinalsFilter convertToOrdinalsFilter() {
|
||||||
|
return new OrdinalsFilter(toAutomaton());
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongFilter convertToLongFilter() {
|
public LongFilter convertToLongFilter() {
|
||||||
|
@ -329,6 +349,7 @@ public class IncludeExclude {
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LongFilter convertToDoubleFilter() {
|
public LongFilter convertToDoubleFilter() {
|
||||||
int numValids = includeValues == null ? 0 : includeValues.size();
|
int numValids = includeValues == null ? 0 : includeValues.size();
|
||||||
int numInvalids = excludeValues == null ? 0 : excludeValues.size();
|
int numInvalids = excludeValues == null ? 0 : excludeValues.size();
|
||||||
|
|
|
@ -0,0 +1,130 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.benchmark.search.aggregations;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
|
||||||
|
import org.elasticsearch.action.bulk.BulkRequestBuilder;
|
||||||
|
import org.elasticsearch.action.bulk.BulkResponse;
|
||||||
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.common.StopWatch;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
|
import org.elasticsearch.node.Node;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import static org.elasticsearch.client.Requests.createIndexRequest;
|
||||||
|
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||||
|
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||||
|
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||||
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
|
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
|
||||||
|
import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
|
||||||
|
|
||||||
|
public class IncludeExcludeAggregationSearchBenchmark {
|
||||||
|
|
||||||
|
private static final Random R = new Random();
|
||||||
|
private static final String CLUSTER_NAME = IncludeExcludeAggregationSearchBenchmark.class.getSimpleName();
|
||||||
|
private static final int NUM_DOCS = 10000000;
|
||||||
|
private static final int BATCH = 100;
|
||||||
|
private static final int WARM = 3;
|
||||||
|
private static final int RUNS = 10;
|
||||||
|
private static final int ITERS = 3;
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
Settings settings = settingsBuilder()
|
||||||
|
.put("index.refresh_interval", "-1")
|
||||||
|
.put(SETTING_NUMBER_OF_SHARDS, 1)
|
||||||
|
.put(SETTING_NUMBER_OF_REPLICAS, 0)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Node[] nodes = new Node[1];
|
||||||
|
for (int i = 0; i < nodes.length; i++) {
|
||||||
|
nodes[i] = nodeBuilder().clusterName(CLUSTER_NAME)
|
||||||
|
.settings(settingsBuilder().put(settings).put("name", "node" + i))
|
||||||
|
.node();
|
||||||
|
}
|
||||||
|
|
||||||
|
Node clientNode = nodeBuilder()
|
||||||
|
.clusterName(CLUSTER_NAME)
|
||||||
|
.settings(settingsBuilder().put(settings).put("name", "client")).client(true).node();
|
||||||
|
|
||||||
|
Client client = clientNode.client();
|
||||||
|
|
||||||
|
try {
|
||||||
|
client.admin().indices().create(createIndexRequest("index").settings(settings).mapping("type",
|
||||||
|
jsonBuilder().startObject().startObject("type").startObject("properties")
|
||||||
|
.startObject("str")
|
||||||
|
.field("type", "string")
|
||||||
|
.field("index", "not_analyzed")
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject().endObject())).actionGet();
|
||||||
|
|
||||||
|
System.out.println("Indexing " + NUM_DOCS + " documents");
|
||||||
|
|
||||||
|
StopWatch stopWatch = new StopWatch().start();
|
||||||
|
for (int i = 0; i < NUM_DOCS; ) {
|
||||||
|
BulkRequestBuilder request = client.prepareBulk();
|
||||||
|
for (int j = 0; j < BATCH && i < NUM_DOCS; ++j) {
|
||||||
|
request.add(client.prepareIndex("index", "type", Integer.toString(i)).setSource("str", TestUtil.randomSimpleString(R)));
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
BulkResponse response = request.execute().actionGet();
|
||||||
|
if (response.hasFailures()) {
|
||||||
|
System.err.println("--> failures...");
|
||||||
|
System.err.println(response.buildFailureMessage());
|
||||||
|
}
|
||||||
|
if ((i % 100000) == 0) {
|
||||||
|
System.out.println("--> Indexed " + i + " took " + stopWatch.stop().lastTaskTime());
|
||||||
|
stopWatch.start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin().indices().prepareRefresh("index").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.out.println("Index already exists, skipping index creation");
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
|
||||||
|
if (clusterHealthResponse.isTimedOut()) {
|
||||||
|
System.err.println("--> Timed out waiting for cluster health");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < WARM + RUNS; ++i) {
|
||||||
|
if (i >= WARM) {
|
||||||
|
System.out.println("RUN " + (i - WARM));
|
||||||
|
}
|
||||||
|
long start = System.nanoTime();
|
||||||
|
SearchResponse resp = null;
|
||||||
|
for (int j = 0; j < ITERS; ++j) {
|
||||||
|
resp = client.prepareSearch("index").setQuery(QueryBuilders.prefixQuery("str", "sf")).setSize(0).addAggregation(terms("t").field("str").include("s.*")).execute().actionGet();
|
||||||
|
}
|
||||||
|
long end = System.nanoTime();
|
||||||
|
if (i >= WARM) {
|
||||||
|
System.out.println(new TimeValue((end - start) / ITERS, TimeUnit.NANOSECONDS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -387,86 +387,6 @@ public class StringTermsTests extends AbstractTermsTests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void singleValueField_WithRegexFiltering_WithFlags() throws Exception {
|
|
||||||
|
|
||||||
// include without exclude
|
|
||||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
|
||||||
// with case insensitive flag on the include regex
|
|
||||||
|
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
|
||||||
.addAggregation(terms("terms")
|
|
||||||
.executionHint(randomExecutionHint())
|
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
|
||||||
.collectMode(randomFrom(SubAggCollectionMode.values())).include("VAL00.+", Pattern.CASE_INSENSITIVE))
|
|
||||||
.execute().actionGet();
|
|
||||||
|
|
||||||
assertSearchResponse(response);
|
|
||||||
|
|
||||||
Terms terms = response.getAggregations().get("terms");
|
|
||||||
assertThat(terms, notNullValue());
|
|
||||||
assertThat(terms.getName(), equalTo("terms"));
|
|
||||||
assertThat(terms.getBuckets().size(), equalTo(10));
|
|
||||||
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
|
|
||||||
assertThat(bucket, notNullValue());
|
|
||||||
assertThat(key(bucket), equalTo("val00" + i));
|
|
||||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
|
||||||
}
|
|
||||||
|
|
||||||
// include and exclude
|
|
||||||
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
|
|
||||||
// with multi-flag masking on the exclude regex
|
|
||||||
|
|
||||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
|
||||||
.addAggregation(terms("terms")
|
|
||||||
.executionHint(randomExecutionHint())
|
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
|
||||||
.collectMode(randomFrom(SubAggCollectionMode.values())).include("val00.+").exclude("( val000 | VAL001 )#this is a comment", Pattern.CASE_INSENSITIVE | Pattern.COMMENTS))
|
|
||||||
.execute().actionGet();
|
|
||||||
|
|
||||||
assertSearchResponse(response);
|
|
||||||
|
|
||||||
terms = response.getAggregations().get("terms");
|
|
||||||
assertThat(terms, notNullValue());
|
|
||||||
assertThat(terms.getName(), equalTo("terms"));
|
|
||||||
assertThat(terms.getBuckets().size(), equalTo(8));
|
|
||||||
|
|
||||||
for (int i = 2; i < 10; i++) {
|
|
||||||
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
|
|
||||||
assertThat(bucket, notNullValue());
|
|
||||||
assertThat(key(bucket), equalTo("val00" + i));
|
|
||||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
|
||||||
}
|
|
||||||
|
|
||||||
// exclude without include
|
|
||||||
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
|
|
||||||
// with a "no flag" flag
|
|
||||||
|
|
||||||
response = client().prepareSearch("idx").setTypes("high_card_type")
|
|
||||||
.addAggregation(terms("terms")
|
|
||||||
.executionHint(randomExecutionHint())
|
|
||||||
.field(SINGLE_VALUED_FIELD_NAME)
|
|
||||||
.collectMode(randomFrom(SubAggCollectionMode.values())).exclude("val0[1-9]+.+", 0))
|
|
||||||
.execute().actionGet();
|
|
||||||
|
|
||||||
assertSearchResponse(response);
|
|
||||||
|
|
||||||
terms = response.getAggregations().get("terms");
|
|
||||||
assertThat(terms, notNullValue());
|
|
||||||
assertThat(terms.getName(), equalTo("terms"));
|
|
||||||
assertThat(terms.getBuckets().size(), equalTo(10));
|
|
||||||
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
|
|
||||||
assertThat(bucket, notNullValue());
|
|
||||||
assertThat(key(bucket), equalTo("val00" + i));
|
|
||||||
assertThat(bucket.getDocCount(), equalTo(1l));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleValueField_WithExactTermFiltering() throws Exception {
|
public void singleValueField_WithExactTermFiltering() throws Exception {
|
||||||
// include without exclude
|
// include without exclude
|
||||||
|
|
Loading…
Reference in New Issue