Aggregations fix: support include/exclude strings formatted for IP and date fields in terms and significant_terms aggregations.

Closes #17705
This commit is contained in:
markharwood 2016-05-17 14:15:53 +01:00
parent db4809d906
commit a846ff93e9
5 changed files with 125 additions and 22 deletions

View File

@ -211,7 +211,14 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
}
}
assert execution != null;
return execution.create(name, factories, valuesSource, config.format(), bucketCountThresholds, includeExclude, context, parent,
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
+ "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}
return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent,
significanceHeuristic, this, pipelineAggregators, metaData);
}
@ -227,7 +234,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
}
IncludeExclude.LongFilter longFilter = null;
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(),
bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators,
@ -248,7 +255,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
}
@ -262,7 +269,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsSignificantTermsAggregator(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);
@ -277,7 +284,7 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac
AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic,
SignificantTermsAggregatorFactory termsAggregatorFactory, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories,
(ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter,
aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData);

View File

@ -150,8 +150,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
}
}
}
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
+ "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}
return execution.create(name, factories, valuesSource, order, config.format(), bucketCountThresholds, includeExclude, context, parent,
return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent,
collectMode, showTermDocCountError, pipelineAggregators, metaData);
}
@ -171,7 +176,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
pipelineAggregators, metaData);
}
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter();
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order,
bucketCountThresholds, context, parent, collectMode, showTermDocCountError, longFilter, pipelineAggregators,
@ -192,7 +197,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter();
final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter,
aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData);
}
@ -211,7 +216,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError,
pipelineAggregators, metaData);
@ -231,7 +236,7 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode,
boolean showTermDocCountError, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource,
order, format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError,
pipelineAggregators, metaData);

View File

@ -43,6 +43,7 @@ import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;
@ -135,7 +136,8 @@ public class IncludeExclude implements Writeable, ToXContent {
}
public static abstract class OrdinalsFilter {
public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException;
public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource)
throws IOException;
}
@ -152,7 +154,8 @@ public class IncludeExclude implements Writeable, ToXContent {
*
*/
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException {
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource)
throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
TermsEnum globalTermsEnum;
Terms globalTerms = new DocValuesTerms(globalOrdinals);
@ -179,7 +182,7 @@ public class IncludeExclude implements Writeable, ToXContent {
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, WithOrdinals valueSource) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
if(includeValues!=null){
if (includeValues != null) {
for (BytesRef term : includeValues) {
long ord = globalOrdinals.lookupTerm(term);
if (ord >= 0) {
@ -534,33 +537,46 @@ public class IncludeExclude implements Writeable, ToXContent {
return a;
}
public StringFilter convertToStringFilter() {
public StringFilter convertToStringFilter(DocValueFormat format) {
if (isRegexBased()) {
return new AutomatonBackedStringFilter(toAutomaton());
}
return new TermListBackedStringFilter(includeValues, excludeValues);
return new TermListBackedStringFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
}
public OrdinalsFilter convertToOrdinalsFilter() {
private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUserFormattedValues, DocValueFormat format) {
SortedSet<BytesRef> result = endUserFormattedValues;
if (endUserFormattedValues != null) {
if (format != DocValueFormat.RAW) {
result = new TreeSet<>();
for (BytesRef formattedVal : endUserFormattedValues) {
result.add(format.parseBytesRef(formattedVal.utf8ToString()));
}
}
}
return result;
}
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
if (isRegexBased()) {
return new AutomatonBackedOrdinalsFilter(toAutomaton());
}
return new TermListBackedOrdinalsFilter(includeValues, excludeValues);
return new TermListBackedOrdinalsFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
}
public LongFilter convertToLongFilter() {
public LongFilter convertToLongFilter(DocValueFormat format) {
int numValids = includeValues == null ? 0 : includeValues.size();
int numInvalids = excludeValues == null ? 0 : excludeValues.size();
LongFilter result = new LongFilter(numValids, numInvalids);
if (includeValues != null) {
for (BytesRef val : includeValues) {
result.addAccept(Long.parseLong(val.utf8ToString()));
result.addAccept(format.parseLong(val.utf8ToString(), false, null));
}
}
if (excludeValues != null) {
for (BytesRef val : excludeValues) {
result.addReject(Long.parseLong(val.utf8ToString()));
result.addReject(format.parseLong(val.utf8ToString(), false, null));
}
}
return result;
@ -572,13 +588,13 @@ public class IncludeExclude implements Writeable, ToXContent {
LongFilter result = new LongFilter(numValids, numInvalids);
if (includeValues != null) {
for (BytesRef val : includeValues) {
double dval=Double.parseDouble(val.utf8ToString());
double dval = Double.parseDouble(val.utf8ToString());
result.addAccept(NumericUtils.doubleToSortableLong(dval));
}
}
if (excludeValues != null) {
for (BytesRef val : excludeValues) {
double dval=Double.parseDouble(val.utf8ToString());
double dval = Double.parseDouble(val.utf8ToString());
result.addReject(NumericUtils.doubleToSortableLong(dval));
}
}

View File

@ -117,6 +117,33 @@ setup:
- match: { aggregations.ip_terms.buckets.1.doc_count: 1 }
- do:
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "include" : [ "127.0.0.1" ] } } } }
- match: { hits.total: 3 }
- length: { aggregations.ip_terms.buckets: 1 }
- match: { aggregations.ip_terms.buckets.0.key: "127.0.0.1" }
- do:
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : [ "127.0.0.1" ] } } } }
- match: { hits.total: 3 }
- length: { aggregations.ip_terms.buckets: 1 }
- match: { aggregations.ip_terms.buckets.0.key: "::1" }
- do:
catch: request
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : "127.*" } } } }
---
"Boolean test":
- do:
@ -300,4 +327,27 @@ setup:
- match: { aggregations.date_terms.buckets.1.key_as_string: "2014-09-01T00:00:00.000Z" }
- match: { aggregations.date_terms.buckets.1.doc_count: 1 }
- do:
search:
body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "include" : [ "2016-05-03" ] } } } }
- match: { hits.total: 3 }
- length: { aggregations.date_terms.buckets: 1 }
- match: { aggregations.date_terms.buckets.0.key_as_string: "2016-05-03T00:00:00.000Z" }
- match: { aggregations.date_terms.buckets.0.doc_count: 2 }
- do:
search:
body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "exclude" : [ "2016-05-03" ] } } } }
- match: { hits.total: 3 }
- length: { aggregations.date_terms.buckets: 1 }
- match: { aggregations.date_terms.buckets.0.key_as_string: "2014-09-01T00:00:00.000Z" }
- match: { aggregations.date_terms.buckets.0.doc_count: 1 }

View File

@ -121,3 +121,28 @@
- is_false: aggregations.ip_terms.buckets.0.key_as_string
- match: { aggregations.ip_terms.buckets.0.doc_count: 1 }
- do:
search:
body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "include" : [ "::1" ] } } } }
- match: { hits.total: 1 }
- length: { aggregations.ip_terms.buckets: 1 }
- match: { aggregations.ip_terms.buckets.0.key: "::1" }
- do:
search:
body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "exclude" : [ "::1" ] } } } }
- match: { hits.total: 1 }
- length: { aggregations.ip_terms.buckets: 0 }
- do:
catch: request
search:
body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" : "127.*" } } } }