From df10704ffc7f92dbb64e0660e6604f2b426f65e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 20 Jun 2018 10:23:39 +0200 Subject: [PATCH] Fix use of time zone in date_histogram rewrite (#31407) Currently, DateHistogramAggregationBuilder#rewriteTimeZone uses the aggregation date math parser and time zone to check whether all values in a read have the same timezone to speed up computation. However, the upper and lower bounds to check are retrieved as longs in epoch_millis, so they don't need to get parsed using a time zone or a parser other than "epoch_millis". This changes this behaviour that was causing problems when the field type mapping was specifying only "epoch_millis" as a format but a different timezone than UTC was used. Closes #31392 --- .../DateHistogramAggregationBuilder.java | 14 ++++---- .../aggregations/bucket/DateHistogramIT.java | 34 ++++++++++++++++++- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java index bb391f21f1e..bb785efde48 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java @@ -25,6 +25,8 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.joda.DateMathParser; +import org.elasticsearch.common.joda.Joda; import org.elasticsearch.common.rounding.DateTimeUnit; import org.elasticsearch.common.rounding.Rounding; import org.elasticsearch.common.unit.TimeValue; @@ -36,7 +38,6 @@ import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType.Relation; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorFactories.Builder; import org.elasticsearch.search.aggregations.AggregatorFactory; @@ -59,6 +60,7 @@ import org.joda.time.DateTimeZone; import java.io.IOException; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -70,6 +72,7 @@ import static java.util.Collections.unmodifiableMap; public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuilder implements MultiBucketAggregationBuilder { public static final String NAME = "date_histogram"; + private static DateMathParser EPOCH_MILLIS_PARSER = new DateMathParser(Joda.forPattern("epoch_millis", Locale.ROOT)); public static final Map DATE_FIELD_UNITS; @@ -380,7 +383,7 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuil Long anyInstant = null; final IndexNumericFieldData fieldData = context.getForField(ft); for (LeafReaderContext ctx : reader.leaves()) { - AtomicNumericFieldData leafFD = ((IndexNumericFieldData) fieldData).load(ctx); + AtomicNumericFieldData leafFD = fieldData.load(ctx); SortedNumericDocValues values = leafFD.getLongValues(); if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { anyInstant = values.nextValue(); @@ -406,11 +409,8 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuil // rounding rounds down, so 'nextTransition' is a good upper bound final long high = nextTransition; - final DocValueFormat format = ft.docValueFormat(null, null); - final Object formattedLow = format.format(low); - final Object formattedHigh = format.format(high); - if (ft.isFieldWithinQuery(reader, formattedLow, formattedHigh, - true, false, tz, null, context) == Relation.WITHIN) { + if (ft.isFieldWithinQuery(reader, low, high, true, false, DateTimeZone.UTC, EPOCH_MILLIS_PARSER, + context) == Relation.WITHIN) { // All values in this reader have the same offset despite daylight saving times. // This is very common for location-based timezones such as Europe/Paris in // combination with time-based indices. diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java index a4a561cfee3..26e6f4c0765 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java @@ -34,6 +34,7 @@ import org.elasticsearch.plugins.Plugin; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.aggregations.AggregationExecutionException; +import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.search.aggregations.bucket.histogram.ExtendedBounds; @@ -41,7 +42,6 @@ import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket; import org.elasticsearch.search.aggregations.metrics.avg.Avg; import org.elasticsearch.search.aggregations.metrics.sum.Sum; -import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.test.ESIntegTestCase; import org.hamcrest.Matchers; import org.joda.time.DateTime; @@ -1341,6 +1341,38 @@ public class DateHistogramIT extends ESIntegTestCase { } } + /** + * https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with + * "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone + * with daylight savings time is used. This test was added to check this is working now + * @throws ExecutionException + * @throws InterruptedException + */ + public void testRewriteTimeZone_EpochMillisFormat() throws InterruptedException, ExecutionException { + String index = "test31392"; + assertAcked(client().admin().indices().prepareCreate(index).addMapping("type", "d", "type=date,format=epoch_millis").get()); + indexRandom(true, client().prepareIndex(index, "type").setSource("d", "1477954800000")); + ensureSearchable(index); + SearchResponse response = client().prepareSearch(index).addAggregation(dateHistogram("histo").field("d") + .dateHistogramInterval(DateHistogramInterval.MONTH).timeZone(DateTimeZone.forID("Europe/Berlin"))).execute().actionGet(); + assertSearchResponse(response); + Histogram histo = response.getAggregations().get("histo"); + assertThat(histo.getBuckets().size(), equalTo(1)); + assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("1477954800000")); + assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(1L)); + + response = client().prepareSearch(index).addAggregation(dateHistogram("histo").field("d") + .dateHistogramInterval(DateHistogramInterval.MONTH).timeZone(DateTimeZone.forID("Europe/Berlin")).format("yyyy-MM-dd")) + .execute().actionGet(); + assertSearchResponse(response); + histo = response.getAggregations().get("histo"); + assertThat(histo.getBuckets().size(), equalTo(1)); + assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2016-11-01")); + assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(1L)); + + internalCluster().wipeIndices(index); + } + /** * When DST ends, local time turns back one hour, so between 2am and 4am wall time we should have four buckets: * "2015-10-25T02:00:00.000+02:00",