Change parsing of numeric `to` and `from` parameters in `date_range` aggregation (#25376)

Currently the `to` and `from` parameter in the `date_range` aggregation is not
parsed with the correct date field format from the mappings or the aggregation
if the argument is numeric, but always treated as a long value specifying
`epoch_millis`. This leads to problems e.g. when the format is `epoch_second`,
but the `to` and `from` are currently treated as millis.

With this change, we interpret these parameters according to the `format` of the target field.
If the `format` in the mappings is not compatible with numeric input values,
a compatible `format` (e.g. `epoch_millis`, `epoch_second`) must be specified in
the `date_range` aggregation itself, otherwise an error is thrown.

#Closes #17920
This commit is contained in:
Christoph Büscher 2017-07-18 09:45:28 +02:00 committed by GitHub
parent f347bd4a4e
commit a6e3d356ed
7 changed files with 255 additions and 24 deletions

View File

@ -26,15 +26,13 @@ import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator.Range;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
public abstract class AbstractRangeBuilder<AB extends AbstractRangeBuilder<AB, R>, R extends Range>
extends ValuesSourceAggregationBuilder<ValuesSource.Numeric, AB> {
@ -63,10 +61,10 @@ public abstract class AbstractRangeBuilder<AB extends AbstractRangeBuilder<AB, R
* Resolve any strings in the ranges so we have a number value for the from
* and to of each range. The ranges are also sorted before being returned.
*/
protected Range[] processRanges(SearchContext context, ValuesSourceConfig<Numeric> config) {
protected Range[] processRanges(Function<Range, Range> rangeProcessor) {
Range[] ranges = new Range[this.ranges.size()];
for (int i = 0; i < ranges.length; i++) {
ranges[i] = this.ranges.get(i).process(config.format(), context);
ranges[i] = rangeProcessor.apply(this.ranges.get(i));
}
sortRanges(ranges);
return ranges;

View File

@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.range;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.AggregatorFactory;
@ -218,7 +219,7 @@ public class DateRangeAggregationBuilder extends AbstractRangeBuilder<DateRangeA
return this;
}
private Double convertDateTime(DateTime dateTime) {
private static Double convertDateTime(DateTime dateTime) {
if (dateTime == null) {
return null;
} else {
@ -281,7 +282,27 @@ public class DateRangeAggregationBuilder extends AbstractRangeBuilder<DateRangeA
AggregatorFactory<?> parent, Builder subFactoriesBuilder) throws IOException {
// We need to call processRanges here so they are parsed and we know whether `now` has been used before we make
// the decision of whether to cache the request
RangeAggregator.Range[] ranges = processRanges(context, config);
RangeAggregator.Range[] ranges = processRanges(range -> {
DocValueFormat parser = config.format();
assert parser != null;
double from = range.getFrom();
double to = range.getTo();
String fromAsString = range.getFromAsString();
String toAsString = range.getToAsString();
if (fromAsString != null) {
from = parser.parseDouble(fromAsString, false, context.getQueryShardContext()::nowInMillis);
} else if (Double.isFinite(from)) {
// from/to provided as double should be converted to string and parsed regardless to support
// different formats like `epoch_millis` vs. `epoch_second` with numeric input
from = parser.parseDouble(Long.toString((long) from), false, context.getQueryShardContext()::nowInMillis);
}
if (toAsString != null) {
to = parser.parseDouble(toAsString, false, context.getQueryShardContext()::nowInMillis);
} else if (Double.isFinite(to)) {
to = parser.parseDouble(Long.toString((long) to), false, context.getQueryShardContext()::nowInMillis);
}
return new RangeAggregator.Range(range.getKey(), from, fromAsString, to, toAsString);
});
if (ranges.length == 0) {
throw new IllegalArgumentException("No [ranges] specified for the [" + this.getName() + "] aggregation");
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.range;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.AggregatorFactory;
@ -138,7 +139,19 @@ public class RangeAggregationBuilder extends AbstractRangeBuilder<RangeAggregati
protected RangeAggregatorFactory innerBuild(SearchContext context, ValuesSourceConfig<Numeric> config,
AggregatorFactory<?> parent, Builder subFactoriesBuilder) throws IOException {
// We need to call processRanges here so they are parsed before we make the decision of whether to cache the request
Range[] ranges = processRanges(context, config);
Range[] ranges = processRanges(range -> {
DocValueFormat parser = config.format();
assert parser != null;
Double from = range.from;
Double to = range.to;
if (range.fromAsStr != null) {
from = parser.parseDouble(range.fromAsStr, false, context.getQueryShardContext()::nowInMillis);
}
if (range.toAsStr != null) {
to = parser.parseDouble(range.toAsStr, false, context.getQueryShardContext()::nowInMillis);
}
return new Range(range.key, from, range.fromAsStr, to, range.toAsStr);
});
if (ranges.length == 0) {
throw new IllegalArgumentException("No [ranges] specified for the [" + this.getName() + "] aggregation");
}

View File

@ -90,8 +90,27 @@ public class RangeAggregator extends BucketsAggregator {
out.writeDouble(to);
}
public double getFrom() {
return this.from;
}
protected Range(String key, Double from, String fromAsStr, Double to, String toAsStr) {
public double getTo() {
return this.to;
}
public String getFromAsString() {
return this.fromAsStr;
}
public String getToAsString() {
return this.toAsStr;
}
public String getKey() {
return this.key;
}
public Range(String key, Double from, String fromAsStr, Double to, String toAsStr) {
this.key = key;
this.from = from == null ? Double.NEGATIVE_INFINITY : from;
this.fromAsStr = fromAsStr;
@ -108,19 +127,6 @@ public class RangeAggregator extends BucketsAggregator {
return "[" + from + " to " + to + ")";
}
public Range process(DocValueFormat parser, SearchContext context) {
assert parser != null;
Double from = this.from;
Double to = this.to;
if (fromAsStr != null) {
from = parser.parseDouble(fromAsStr, false, context.getQueryShardContext()::nowInMillis);
}
if (toAsStr != null) {
to = parser.parseDouble(toAsStr, false, context.getQueryShardContext()::nowInMillis);
}
return new Range(key, from, fromAsStr, to, toAsStr);
}
public static Range fromXContent(XContentParser parser) throws IOException {
XContentParser.Token token;
String currentFieldName = null;

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.aggregations.bucket;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
@ -136,7 +137,6 @@ public class DateRangeIT extends ESIntegTestCase {
assertThat(range.getName(), equalTo("range"));
assertThat(range.getBuckets().size(), equalTo(3));
// TODO: use diamond once JI-9019884 is fixed
List<Range.Bucket> buckets = new ArrayList<>(range.getBuckets());
Range.Bucket bucket = buckets.get(0);
@ -855,7 +855,6 @@ public class DateRangeIT extends ESIntegTestCase {
assertThat(bucket, Matchers.notNullValue());
Range dateRange = bucket.getAggregations().get("date_range");
// TODO: use diamond once JI-9019884 is fixed
List<Range.Bucket> buckets = new ArrayList<>(dateRange.getBuckets());
assertThat(dateRange, Matchers.notNullValue());
assertThat(dateRange.getName(), equalTo("date_range"));
@ -926,4 +925,142 @@ public class DateRangeIT extends ESIntegTestCase {
assertThat(client().admin().indices().prepareStats("cache_test_idx").setRequestCache(true).get().getTotal().getRequestCache()
.getMissCount(), equalTo(1L));
}
/**
* Test querying ranges on date mapping specifying a format with to/from
* values specified as Strings
*/
public void testRangeWithFormatStringValue() throws Exception {
String indexName = "dateformat_test_idx";
assertAcked(prepareCreate(indexName).addMapping("type", "date", "type=date,format=strict_hour_minute_second"));
indexRandom(true,
client().prepareIndex(indexName, "type", "1").setSource(jsonBuilder().startObject().field("date", "00:16:40").endObject()),
client().prepareIndex(indexName, "type", "2").setSource(jsonBuilder().startObject().field("date", "00:33:20").endObject()),
client().prepareIndex(indexName, "type", "3").setSource(jsonBuilder().startObject().field("date", "00:50:00").endObject()));
// using no format should work when to/from is compatible with format in
// mapping
SearchResponse searchResponse = client().prepareSearch(indexName).setSize(0)
.addAggregation(dateRange("date_range").field("date").addRange("00:16:40", "00:50:00").addRange("00:50:00", "01:06:40"))
.get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
List<Range.Bucket> buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "00:16:40-00:50:00", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "00:50:00-01:06:40", 3000000L, 4000000L);
// using different format should work when to/from is compatible with
// format in aggregation
searchResponse = client().prepareSearch(indexName).setSize(0).addAggregation(
dateRange("date_range").field("date").addRange("00.16.40", "00.50.00").addRange("00.50.00", "01.06.40").format("HH.mm.ss"))
.get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "00.16.40-00.50.00", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "00.50.00-01.06.40", 3000000L, 4000000L);
// providing numeric input with format should work, but bucket keys are
// different now
searchResponse = client().prepareSearch(indexName).setSize(0)
.addAggregation(
dateRange("date_range").field("date").addRange(1000000, 3000000).addRange(3000000, 4000000).format("epoch_millis"))
.get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "1000000-3000000", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "3000000-4000000", 3000000L, 4000000L);
// providing numeric input without format should throw an exception
Exception e = expectThrows(Exception.class, () -> client().prepareSearch(indexName).setSize(0)
.addAggregation(dateRange("date_range").field("date").addRange(1000000, 3000000).addRange(3000000, 4000000)).get());
Throwable cause = e.getCause();
assertThat(cause, instanceOf(ElasticsearchParseException.class));
assertEquals("failed to parse date field [1000000] with format [strict_hour_minute_second]", cause.getMessage());
}
/**
* Test querying ranges on date mapping specifying a format with to/from
* values specified as numeric value
*/
public void testRangeWithFormatNumericValue() throws Exception {
String indexName = "dateformat_numeric_test_idx";
assertAcked(prepareCreate(indexName).addMapping("type", "date", "type=date,format=epoch_second"));
indexRandom(true,
client().prepareIndex(indexName, "type", "1").setSource(jsonBuilder().startObject().field("date", 1000).endObject()),
client().prepareIndex(indexName, "type", "2").setSource(jsonBuilder().startObject().field("date", 2000).endObject()),
client().prepareIndex(indexName, "type", "3").setSource(jsonBuilder().startObject().field("date", 3000).endObject()));
// using no format should work when to/from is compatible with format in
// mapping
SearchResponse searchResponse = client().prepareSearch(indexName).setSize(0)
.addAggregation(dateRange("date_range").field("date").addRange(1000, 3000).addRange(3000, 4000)).get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
List<Bucket> buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "1000-3000", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "3000-4000", 3000000L, 4000000L);
// using no format should also work when and to/from are string values
searchResponse = client().prepareSearch(indexName).setSize(0)
.addAggregation(dateRange("date_range").field("date").addRange("1000", "3000").addRange("3000", "4000")).get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "1000-3000", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "3000-4000", 3000000L, 4000000L);
// also e-notation should work, fractional parts should be truncated
searchResponse = client().prepareSearch(indexName).setSize(0)
.addAggregation(dateRange("date_range").field("date").addRange(1.0e3, 3000.8123).addRange(3000.8123, 4.0e3)).get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "1000-3000", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "3000-4000", 3000000L, 4000000L);
// however, e-notation should and fractional parts provided as string
// should be parsed and error if not compatible
Exception e = expectThrows(Exception.class, () -> client().prepareSearch(indexName).setSize(0)
.addAggregation(dateRange("date_range").field("date").addRange("1.0e3", "3.0e3").addRange("3.0e3", "4.0e3")).get());
assertThat(e.getCause(), instanceOf(ElasticsearchParseException.class));
assertEquals("failed to parse date field [1.0e3] with format [epoch_second]", e.getCause().getMessage());
e = expectThrows(Exception.class, () -> client().prepareSearch(indexName).setSize(0)
.addAggregation(dateRange("date_range").field("date").addRange("1000.123", "3000.8").addRange("3000.8", "4000.3")).get());
assertThat(e.getCause(), instanceOf(ElasticsearchParseException.class));
assertEquals("failed to parse date field [1000.123] with format [epoch_second]", e.getCause().getMessage());
// using different format should work when to/from is compatible with
// format in aggregation
searchResponse = client().prepareSearch(indexName).setSize(0).addAggregation(
dateRange("date_range").field("date").addRange("00.16.40", "00.50.00").addRange("00.50.00", "01.06.40").format("HH.mm.ss"))
.get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "00.16.40-00.50.00", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "00.50.00-01.06.40", 3000000L, 4000000L);
// providing different numeric input with format should work, but bucket
// keys are different now
searchResponse = client().prepareSearch(indexName).setSize(0)
.addAggregation(
dateRange("date_range").field("date").addRange(1000000, 3000000).addRange(3000000, 4000000).format("epoch_millis"))
.get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(3L));
buckets = checkBuckets(searchResponse.getAggregations().get("date_range"), "date_range", 2);
assertBucket(buckets.get(0), 2L, "1000000-3000000", 1000000L, 3000000L);
assertBucket(buckets.get(1), 1L, "3000000-4000000", 3000000L, 4000000L);
}
private static List<Range.Bucket> checkBuckets(Range dateRange, String expectedAggName, long expectedBucketsSize) {
assertThat(dateRange, Matchers.notNullValue());
assertThat(dateRange.getName(), equalTo(expectedAggName));
List<Range.Bucket> buckets = new ArrayList<>(dateRange.getBuckets());
assertThat(buckets.size(), is(2));
return buckets;
}
private static void assertBucket(Bucket bucket, long bucketSize, String expectedKey, long expectedFrom, long expectedTo) {
assertThat(bucket.getDocCount(), equalTo(bucketSize));
assertThat((String) bucket.getKey(), equalTo(expectedKey));
assertThat(((DateTime) bucket.getFrom()).getMillis(), equalTo(expectedFrom));
assertThat(((DateTime) bucket.getTo()).getMillis(), equalTo(expectedTo));
assertThat(bucket.getAggregations().asList().isEmpty(), is(true));
}
}

View File

@ -49,3 +49,11 @@ POST /twitter/_search?size=0
--------------------------------------------------
// CONSOLE
// TEST[setup:twitter]
==== Numeric `to` and `from` parameters in `date_range` aggregation are interpreted according to `format` now
Numeric `to` and `from` parameters in `date_range` aggregations used to always be interpreted as `epoch_millis`,
making other numeric formats like `epoch_seconds` unusable for numeric input values.
Now we interpret these parameters according to the `format` of the target field.
If the `format` in the mappings is not compatible with the numeric input value, a compatible
`format` (e.g. `epoch_millis`, `epoch_second`) must be specified in the `date_range` aggregation, otherwise an error is thrown.

View File

@ -14,6 +14,7 @@ setup:
type: double
date:
type: date
format: epoch_second
- do:
cluster.health:
@ -225,3 +226,50 @@ setup:
- match: { aggregations.ip_range.buckets.1.doc_count: 2 }
---
"Date range":
- skip:
version: " - 5.99.99"
reason: before 6.0, numeric date_range to/from parameters were always parsed as if they are epoch_millis (#17920)
- do:
index:
index: test
type: test
id: 1
body: { "date" : 1000 }
- do:
index:
index: test
type: test
id: 2
body: { "date" : 2000 }
- do:
index:
index: test
type: test
id: 3
body: { "date" : 3000 }
- do:
indices.refresh: {}
- do:
search:
body: { "size" : 0, "aggs" : { "date_range" : { "date_range" : { "field" : "date", "ranges": [ { "from" : 1000, "to": 3000 }, { "from": 3000, "to": 4000 } ] } } } }
- match: { hits.total: 3 }
- length: { aggregations.date_range.buckets: 2 }
- match: { aggregations.date_range.buckets.0.doc_count: 2 }
- match: { aggregations.date_range.buckets.0.key: "1000-3000" }
- match: { aggregations.date_range.buckets.0.from: 1000000 }
- match: { aggregations.date_range.buckets.0.to: 3000000 }
- match: { aggregations.date_range.buckets.1.doc_count: 1 }
- match: { aggregations.date_range.buckets.1.key: "3000-4000" }
- match: { aggregations.date_range.buckets.1.from: 3000000 }
- match: { aggregations.date_range.buckets.1.to: 4000000 }