Fix use of time zone in date_histogram rewrite (#31407)
Currently, DateHistogramAggregationBuilder#rewriteTimeZone uses the aggregation date math parser and time zone to check whether all values in a read have the same timezone to speed up computation. However, the upper and lower bounds to check are retrieved as longs in epoch_millis, so they don't need to get parsed using a time zone or a parser other than "epoch_millis". This changes this behaviour that was causing problems when the field type mapping was specifying only "epoch_millis" as a format but a different timezone than UTC was used. Closes #31392
This commit is contained in:
parent
401800d958
commit
df10704ffc
|
@ -25,6 +25,8 @@ import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.common.joda.DateMathParser;
|
||||||
|
import org.elasticsearch.common.joda.Joda;
|
||||||
import org.elasticsearch.common.rounding.DateTimeUnit;
|
import org.elasticsearch.common.rounding.DateTimeUnit;
|
||||||
import org.elasticsearch.common.rounding.Rounding;
|
import org.elasticsearch.common.rounding.Rounding;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
@ -36,7 +38,6 @@ import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
||||||
import org.elasticsearch.index.query.QueryShardContext;
|
import org.elasticsearch.index.query.QueryShardContext;
|
||||||
import org.elasticsearch.search.DocValueFormat;
|
|
||||||
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
import org.elasticsearch.search.aggregations.AggregationBuilder;
|
||||||
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
|
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
|
||||||
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
||||||
|
@ -59,6 +60,7 @@ import org.joda.time.DateTimeZone;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
@ -70,6 +72,7 @@ import static java.util.Collections.unmodifiableMap;
|
||||||
public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuilder<ValuesSource.Numeric, DateHistogramAggregationBuilder>
|
public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuilder<ValuesSource.Numeric, DateHistogramAggregationBuilder>
|
||||||
implements MultiBucketAggregationBuilder {
|
implements MultiBucketAggregationBuilder {
|
||||||
public static final String NAME = "date_histogram";
|
public static final String NAME = "date_histogram";
|
||||||
|
private static DateMathParser EPOCH_MILLIS_PARSER = new DateMathParser(Joda.forPattern("epoch_millis", Locale.ROOT));
|
||||||
|
|
||||||
public static final Map<String, DateTimeUnit> DATE_FIELD_UNITS;
|
public static final Map<String, DateTimeUnit> DATE_FIELD_UNITS;
|
||||||
|
|
||||||
|
@ -380,7 +383,7 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuil
|
||||||
Long anyInstant = null;
|
Long anyInstant = null;
|
||||||
final IndexNumericFieldData fieldData = context.getForField(ft);
|
final IndexNumericFieldData fieldData = context.getForField(ft);
|
||||||
for (LeafReaderContext ctx : reader.leaves()) {
|
for (LeafReaderContext ctx : reader.leaves()) {
|
||||||
AtomicNumericFieldData leafFD = ((IndexNumericFieldData) fieldData).load(ctx);
|
AtomicNumericFieldData leafFD = fieldData.load(ctx);
|
||||||
SortedNumericDocValues values = leafFD.getLongValues();
|
SortedNumericDocValues values = leafFD.getLongValues();
|
||||||
if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
anyInstant = values.nextValue();
|
anyInstant = values.nextValue();
|
||||||
|
@ -406,11 +409,8 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuil
|
||||||
// rounding rounds down, so 'nextTransition' is a good upper bound
|
// rounding rounds down, so 'nextTransition' is a good upper bound
|
||||||
final long high = nextTransition;
|
final long high = nextTransition;
|
||||||
|
|
||||||
final DocValueFormat format = ft.docValueFormat(null, null);
|
if (ft.isFieldWithinQuery(reader, low, high, true, false, DateTimeZone.UTC, EPOCH_MILLIS_PARSER,
|
||||||
final Object formattedLow = format.format(low);
|
context) == Relation.WITHIN) {
|
||||||
final Object formattedHigh = format.format(high);
|
|
||||||
if (ft.isFieldWithinQuery(reader, formattedLow, formattedHigh,
|
|
||||||
true, false, tz, null, context) == Relation.WITHIN) {
|
|
||||||
// All values in this reader have the same offset despite daylight saving times.
|
// All values in this reader have the same offset despite daylight saving times.
|
||||||
// This is very common for location-based timezones such as Europe/Paris in
|
// This is very common for location-based timezones such as Europe/Paris in
|
||||||
// combination with time-based indices.
|
// combination with time-based indices.
|
||||||
|
|
|
@ -34,6 +34,7 @@ import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.script.Script;
|
import org.elasticsearch.script.Script;
|
||||||
import org.elasticsearch.script.ScriptType;
|
import org.elasticsearch.script.ScriptType;
|
||||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||||
|
import org.elasticsearch.search.aggregations.BucketOrder;
|
||||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||||
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
|
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
|
||||||
import org.elasticsearch.search.aggregations.bucket.histogram.ExtendedBounds;
|
import org.elasticsearch.search.aggregations.bucket.histogram.ExtendedBounds;
|
||||||
|
@ -41,7 +42,6 @@ import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
||||||
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket;
|
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket;
|
||||||
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
|
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
|
||||||
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
|
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
|
||||||
import org.elasticsearch.search.aggregations.BucketOrder;
|
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
import org.hamcrest.Matchers;
|
import org.hamcrest.Matchers;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
@ -1341,6 +1341,38 @@ public class DateHistogramIT extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with
|
||||||
|
* "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone
|
||||||
|
* with daylight savings time is used. This test was added to check this is working now
|
||||||
|
* @throws ExecutionException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public void testRewriteTimeZone_EpochMillisFormat() throws InterruptedException, ExecutionException {
|
||||||
|
String index = "test31392";
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping("type", "d", "type=date,format=epoch_millis").get());
|
||||||
|
indexRandom(true, client().prepareIndex(index, "type").setSource("d", "1477954800000"));
|
||||||
|
ensureSearchable(index);
|
||||||
|
SearchResponse response = client().prepareSearch(index).addAggregation(dateHistogram("histo").field("d")
|
||||||
|
.dateHistogramInterval(DateHistogramInterval.MONTH).timeZone(DateTimeZone.forID("Europe/Berlin"))).execute().actionGet();
|
||||||
|
assertSearchResponse(response);
|
||||||
|
Histogram histo = response.getAggregations().get("histo");
|
||||||
|
assertThat(histo.getBuckets().size(), equalTo(1));
|
||||||
|
assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("1477954800000"));
|
||||||
|
assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(1L));
|
||||||
|
|
||||||
|
response = client().prepareSearch(index).addAggregation(dateHistogram("histo").field("d")
|
||||||
|
.dateHistogramInterval(DateHistogramInterval.MONTH).timeZone(DateTimeZone.forID("Europe/Berlin")).format("yyyy-MM-dd"))
|
||||||
|
.execute().actionGet();
|
||||||
|
assertSearchResponse(response);
|
||||||
|
histo = response.getAggregations().get("histo");
|
||||||
|
assertThat(histo.getBuckets().size(), equalTo(1));
|
||||||
|
assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2016-11-01"));
|
||||||
|
assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(1L));
|
||||||
|
|
||||||
|
internalCluster().wipeIndices(index);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When DST ends, local time turns back one hour, so between 2am and 4am wall time we should have four buckets:
|
* When DST ends, local time turns back one hour, so between 2am and 4am wall time we should have four buckets:
|
||||||
* "2015-10-25T02:00:00.000+02:00",
|
* "2015-10-25T02:00:00.000+02:00",
|
||||||
|
|
Loading…
Reference in New Issue