Preserve `date_histogram` format when aggregating on unmapped fields (#35254)

Currently when aggregating on an unmapped date field (e.g. using a
date_histogram) we don't preserve the aggregations `format` setting but instead
use the default format. This can lead to loosing the aggregations `format` when
aggregating over several indices where some of them contain unmapped date fields
and are encountered first in the reduce phase.

Related to #31760
This commit is contained in:
Christoph Büscher 2018-11-08 10:22:25 +01:00 committed by GitHub
parent 0b24c4f8e7
commit 14b811446f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 4 deletions

View File

@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.support;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.joda.Joda;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
@ -53,7 +54,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
if (field == null) {
if (script == null) {
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(ValuesSourceType.ANY);
config.format(resolveFormat(null, valueType));
config.format(resolveFormat(null, valueType, timeZone));
return config;
}
ValuesSourceType valuesSourceType = valueType != null ? valueType.getValuesSourceType() : ValuesSourceType.ANY;
@ -67,7 +68,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
config.missing(missing);
config.timezone(timeZone);
config.format(resolveFormat(format, valueType));
config.format(resolveFormat(format, valueType, timeZone));
config.script(createScript(script, context));
config.scriptValueType(valueType);
return config;
@ -79,7 +80,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
config.missing(missing);
config.timezone(timeZone);
config.format(resolveFormat(format, valueType));
config.format(resolveFormat(format, valueType, timeZone));
config.unmapped(true);
if (valueType != null) {
// todo do we really need this for unmapped?
@ -120,7 +121,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
}
}
private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType) {
private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType, @Nullable DateTimeZone tz) {
if (valueType == null) {
return DocValueFormat.RAW; // we can't figure it out
}
@ -128,6 +129,9 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
if (valueFormat instanceof DocValueFormat.Decimal && format != null) {
valueFormat = new DocValueFormat.Decimal(format);
}
if (valueFormat instanceof DocValueFormat.DateTime && format != null) {
valueFormat = new DocValueFormat.DateTime(Joda.forPattern(format), tz != null ? tz : DateTimeZone.UTC);
}
return valueFormat;
}

View File

@ -1341,6 +1341,31 @@ public class DateHistogramIT extends ESIntegTestCase {
}
}
/**
* https://github.com/elastic/elasticsearch/issues/31760 shows an edge case where an unmapped "date" field in two indices
* that are queried simultaneously can lead to the "format" parameter in the aggregation not being preserved correctly.
*
* The error happens when the bucket from the "unmapped" index is received first in the reduce phase, however the case can
* be recreated when aggregating about a single index with an unmapped date field and also getting "empty" buckets.
*/
public void testFormatIndexUnmapped() throws InterruptedException, ExecutionException {
String indexDateUnmapped = "test31760";
indexRandom(true, client().prepareIndex(indexDateUnmapped, "_doc").setSource("foo", "bar"));
ensureSearchable(indexDateUnmapped);
SearchResponse response = client().prepareSearch(indexDateUnmapped)
.addAggregation(
dateHistogram("histo").field("dateField").dateHistogramInterval(DateHistogramInterval.MONTH).format("YYYY-MM")
.minDocCount(0).extendedBounds(new ExtendedBounds("2018-01", "2018-01")))
.execute().actionGet();
assertSearchResponse(response);
Histogram histo = response.getAggregations().get("histo");
assertThat(histo.getBuckets().size(), equalTo(1));
assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2018-01"));
assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(0L));
internalCluster().wipeIndices(indexDateUnmapped);
}
/**
* https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with
* "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone