value_type is useful regardless of scripting. (#22160)

Today we only expose `value_type` in scriptable aggregations, however it is
also useful with unmapped fields. I suspect we never noticed because
`value_type` was not documented (fixed) and most aggregations are scriptable.

Closes #20163
This commit is contained in:
Adrien Grand 2016-12-22 14:35:12 +01:00 committed by GitHub
parent fd6e1a30de
commit e39942fc02
5 changed files with 155 additions and 17 deletions

View File

@ -80,7 +80,7 @@ public enum MissingValues {
}
public static ValuesSource.Numeric replaceMissing(final ValuesSource.Numeric valuesSource, final Number missing) {
final boolean missingIsFloat = missing.longValue() != (long) missing.doubleValue();
final boolean missingIsFloat = missing.doubleValue() % 1 != 0;
final boolean isFloatingPoint = valuesSource.isFloatingPoint() || missingIsFloat;
return new ValuesSource.Numeric() {

View File

@ -67,6 +67,17 @@ public final class ValuesSourceParserHelper {
objectParser.declareField(ValuesSourceAggregationBuilder::missing, XContentParser::objectText,
new ParseField("missing"), ObjectParser.ValueType.VALUE);
objectParser.declareField(ValuesSourceAggregationBuilder::valueType, p -> {
ValueType valueType = ValueType.resolveForScript(p.text());
if (targetValueType != null && valueType.isNotA(targetValueType)) {
throw new ParsingException(p.getTokenLocation(),
"Aggregation [" + objectParser.getName() + "] was configured with an incompatible value type ["
+ valueType + "]. It can only work on value of type ["
+ targetValueType + "]");
}
return valueType;
}, new ParseField("value_type", "valueType"), ObjectParser.ValueType.STRING);
if (formattable) {
objectParser.declareField(ValuesSourceAggregationBuilder::format, XContentParser::text,
new ParseField("format"), ObjectParser.ValueType.STRING);
@ -75,17 +86,6 @@ public final class ValuesSourceParserHelper {
if (scriptable) {
objectParser.declareField(ValuesSourceAggregationBuilder::script, org.elasticsearch.script.Script::parse,
Script.SCRIPT_PARSE_FIELD, ObjectParser.ValueType.OBJECT_OR_STRING);
objectParser.declareField(ValuesSourceAggregationBuilder::valueType, p -> {
ValueType valueType = ValueType.resolveForScript(p.text());
if (targetValueType != null && valueType.isNotA(targetValueType)) {
throw new ParsingException(p.getTokenLocation(),
"Aggregation [" + objectParser.getName() + "] was configured with an incompatible value type ["
+ valueType + "]. It can only work on value of type ["
+ targetValueType + "]");
}
return valueType;
}, new ParseField("value_type", "valueType"), ObjectParser.ValueType.STRING);
}
if (timezoneAware) {

View File

@ -293,4 +293,9 @@ public class MissingValuesTests extends ESTestCase {
}
}
}
public void testFloatingPointDetection() {
assertFalse(MissingValues.replaceMissing(ValuesSource.Numeric.EMPTY, 3).isFloatingPoint());
assertTrue(MissingValues.replaceMissing(ValuesSource.Numeric.EMPTY, 3.5).isFloatingPoint());
}
}

View File

@ -92,11 +92,15 @@ enables defining all the "dynamic" expressions in the script as parameters, whic
between calls (this will ensure the use of the cached compiled scripts in Elasticsearch).
===============================
Scripts can generate a single value or multiple values per document. When generating multiple values, one can use the
`script_values_sorted` settings to indicate whether these values are sorted or not. Internally, Elasticsearch can
perform optimizations when dealing with sorted values (for example, with the `min` aggregations, knowing the values are
sorted, Elasticsearch will skip the iterations over all the values and rely on the first value in the list to be the
minimum value among all other values associated with the same document).
Elasticsearch uses the type of the field in the mapping in order to figure out
how to run the aggregation and format the response. However there are two cases
in which Elasticsearch cannot figure out this information: unmapped fields (for
instance in the case of a search request across multiple indices, and only some
of them have a mapping for the field) and pure scripts. For those cases, it is
possible to give Elasticsearch a hint using the `value_type` option, which
accepts the following values: `string`, `long` (works for all integer types),
`double` (works for all decimal types like `float` or `scaled_float`), `date`,
`ip` and `boolean`.
--

View File

@ -465,3 +465,132 @@ setup:
- match: { aggregations.str_terms.buckets.0.key: 1234 }
- match: { aggregations.str_terms.buckets.0.doc_count: 2 }
---
"Unmapped strings":
- do:
index:
index: test_1
type: test
id: 1
body: {}
- do:
indices.refresh: {}
- do:
search:
body: { "size" : 0, "aggs" : { "string_terms" : { "terms" : { "field" : "unmapped_string", "value_type" : "string", "missing": "abc" } } } }
- match: { hits.total: 1 }
- length: { aggregations.string_terms.buckets: 1 }
- match: { aggregations.string_terms.buckets.0.key: "abc" }
- match: { aggregations.string_terms.buckets.0.doc_count: 1 }
---
"Unmapped booleans":
- do:
index:
index: test_1
type: test
id: 1
body: {}
- do:
indices.refresh: {}
- do:
search:
body: { "size" : 0, "aggs" : { "boolean_terms" : { "terms" : { "field" : "unmapped_boolean", "value_type" : "boolean", "missing": true } } } }
- match: { hits.total: 1 }
- length: { aggregations.boolean_terms.buckets: 1 }
- match: { aggregations.boolean_terms.buckets.0.key: 1 }
- match: { aggregations.boolean_terms.buckets.0.key_as_string: "true" }
- match: { aggregations.boolean_terms.buckets.0.doc_count: 1 }
---
"Unmapped dates":
- do:
index:
index: test_1
type: test
id: 1
body: {}
- do:
indices.refresh: {}
- do:
search:
body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "unmapped_date", "value_type" : "date", "missing": "2016-05-11" } } } }
- match: { hits.total: 1 }
- length: { aggregations.date_terms.buckets: 1 }
- match: { aggregations.date_terms.buckets.0.key: 1462924800000 }
- match: { aggregations.date_terms.buckets.0.key_as_string: "2016-05-11T00:00:00.000Z" }
- match: { aggregations.date_terms.buckets.0.doc_count: 1 }
---
"Unmapped longs":
- do:
index:
index: test_1
type: test
id: 1
body: {}
- do:
indices.refresh: {}
- do:
search:
body: { "size" : 0, "aggs" : { "long_terms" : { "terms" : { "field" : "unmapped_long", "value_type" : "long", "missing": 3 } } } }
- match: { hits.total: 1 }
- length: { aggregations.long_terms.buckets: 1 }
- match: { aggregations.long_terms.buckets.0.key: 3 }
- match: { aggregations.long_terms.buckets.0.doc_count: 1 }
---
"Unmapped doubles":
- do:
index:
index: test_1
type: test
id: 1
body: {}
- do:
indices.refresh: {}
- do:
search:
body: { "size" : 0, "aggs" : { "double_terms" : { "terms" : { "field" : "unmapped_double", "value_type" : "double", "missing": 3.5 } } } }
- match: { hits.total: 1 }
- length: { aggregations.double_terms.buckets: 1 }
- match: { aggregations.double_terms.buckets.0.key: 3.5 }
- match: { aggregations.double_terms.buckets.0.doc_count: 1 }