Preserve `date_histogram` format when aggregating on unmapped fields (#35254)
Currently when aggregating on an unmapped date field (e.g. using a date_histogram) we don't preserve the aggregations `format` setting but instead use the default format. This can lead to loosing the aggregations `format` when aggregating over several indices where some of them contain unmapped date fields and are encountered first in the reduce phase. Related to #31760
This commit is contained in:
parent
0b24c4f8e7
commit
14b811446f
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.aggregations.support;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.common.joda.Joda;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
|
@ -53,7 +54,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
|
|||
if (field == null) {
|
||||
if (script == null) {
|
||||
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(ValuesSourceType.ANY);
|
||||
config.format(resolveFormat(null, valueType));
|
||||
config.format(resolveFormat(null, valueType, timeZone));
|
||||
return config;
|
||||
}
|
||||
ValuesSourceType valuesSourceType = valueType != null ? valueType.getValuesSourceType() : ValuesSourceType.ANY;
|
||||
|
@ -67,7 +68,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
|
|||
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
|
||||
config.missing(missing);
|
||||
config.timezone(timeZone);
|
||||
config.format(resolveFormat(format, valueType));
|
||||
config.format(resolveFormat(format, valueType, timeZone));
|
||||
config.script(createScript(script, context));
|
||||
config.scriptValueType(valueType);
|
||||
return config;
|
||||
|
@ -79,7 +80,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
|
|||
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
|
||||
config.missing(missing);
|
||||
config.timezone(timeZone);
|
||||
config.format(resolveFormat(format, valueType));
|
||||
config.format(resolveFormat(format, valueType, timeZone));
|
||||
config.unmapped(true);
|
||||
if (valueType != null) {
|
||||
// todo do we really need this for unmapped?
|
||||
|
@ -120,7 +121,7 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
|
|||
}
|
||||
}
|
||||
|
||||
private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType) {
|
||||
private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType, @Nullable DateTimeZone tz) {
|
||||
if (valueType == null) {
|
||||
return DocValueFormat.RAW; // we can't figure it out
|
||||
}
|
||||
|
@ -128,6 +129,9 @@ public class ValuesSourceConfig<VS extends ValuesSource> {
|
|||
if (valueFormat instanceof DocValueFormat.Decimal && format != null) {
|
||||
valueFormat = new DocValueFormat.Decimal(format);
|
||||
}
|
||||
if (valueFormat instanceof DocValueFormat.DateTime && format != null) {
|
||||
valueFormat = new DocValueFormat.DateTime(Joda.forPattern(format), tz != null ? tz : DateTimeZone.UTC);
|
||||
}
|
||||
return valueFormat;
|
||||
}
|
||||
|
||||
|
|
|
@ -1341,6 +1341,31 @@ public class DateHistogramIT extends ESIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* https://github.com/elastic/elasticsearch/issues/31760 shows an edge case where an unmapped "date" field in two indices
|
||||
* that are queried simultaneously can lead to the "format" parameter in the aggregation not being preserved correctly.
|
||||
*
|
||||
* The error happens when the bucket from the "unmapped" index is received first in the reduce phase, however the case can
|
||||
* be recreated when aggregating about a single index with an unmapped date field and also getting "empty" buckets.
|
||||
*/
|
||||
public void testFormatIndexUnmapped() throws InterruptedException, ExecutionException {
|
||||
String indexDateUnmapped = "test31760";
|
||||
indexRandom(true, client().prepareIndex(indexDateUnmapped, "_doc").setSource("foo", "bar"));
|
||||
ensureSearchable(indexDateUnmapped);
|
||||
|
||||
SearchResponse response = client().prepareSearch(indexDateUnmapped)
|
||||
.addAggregation(
|
||||
dateHistogram("histo").field("dateField").dateHistogramInterval(DateHistogramInterval.MONTH).format("YYYY-MM")
|
||||
.minDocCount(0).extendedBounds(new ExtendedBounds("2018-01", "2018-01")))
|
||||
.execute().actionGet();
|
||||
assertSearchResponse(response);
|
||||
Histogram histo = response.getAggregations().get("histo");
|
||||
assertThat(histo.getBuckets().size(), equalTo(1));
|
||||
assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2018-01"));
|
||||
assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(0L));
|
||||
internalCluster().wipeIndices(indexDateUnmapped);
|
||||
}
|
||||
|
||||
/**
|
||||
* https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with
|
||||
* "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone
|
||||
|
|
Loading…
Reference in New Issue