[7.x] Week based parsing for ingest date processor (#58597) (#58802)

Date processor was incorrectly parsing week based dates because when a
weekbased year was provided ingest module was thinking year was not
on a date and was trying to applying the logic for dd/MM type of
dates.
Date Processor is also allowing users to specify locale parameter. It
should be taken into account when parsing dates - currently only used
for formatting. If someone specifies 'en-us' locale, then calendar data
rules for that locale should be used.
The exception is iso8601 format. If someone is using that format,
then locale should not override calendar data rules.
closes #58479
This commit is contained in:
Przemyslaw Gomulka 2020-07-01 15:15:56 +02:00 committed by GitHub
parent 4f1da31158
commit 2c275913b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 219 additions and 42 deletions

View File

@ -29,6 +29,7 @@ import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.WeekFields;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
@ -45,8 +46,12 @@ enum DateFormat {
Iso8601 {
@Override
Function<String, ZonedDateTime> getFunction(String format, ZoneId timezone, Locale locale) {
return (date) -> DateFormatters.from(DateFormatter.forPattern("iso8601").parse(date), timezone)
return (date) -> {
TemporalAccessor accessor = DateFormatter.forPattern("iso8601").parse(date);
//even though locale could be set to en-us, Locale.ROOT (following iso8601 calendar data rules) should be used
return DateFormatters.from(accessor, Locale.ROOT, timezone)
.withZoneSameInstant(timezone);
};
}
},
@ -97,7 +102,9 @@ enum DateFormat {
TemporalAccessor accessor = formatter.parse(text);
// if there is no year nor year-of-era, we fall back to the current one and
// fill the rest of the date up with the parsed date
if (accessor.isSupported(ChronoField.YEAR) == false && accessor.isSupported(ChronoField.YEAR_OF_ERA) == false ) {
if (accessor.isSupported(ChronoField.YEAR) == false
&& accessor.isSupported(ChronoField.YEAR_OF_ERA) == false
&& accessor.isSupported(WeekFields.of(locale).weekOfWeekBasedYear()) == false) {
int year = LocalDate.now(ZoneOffset.UTC).getYear();
ZonedDateTime newTime = Instant.EPOCH.atZone(ZoneOffset.UTC).withYear(year);
for (ChronoField field : FIELDS) {
@ -110,9 +117,9 @@ enum DateFormat {
}
if (isUtc) {
return DateFormatters.from(accessor).withZoneSameInstant(ZoneOffset.UTC);
return DateFormatters.from(accessor, locale).withZoneSameInstant(ZoneOffset.UTC);
} else {
return DateFormatters.from(accessor);
return DateFormatters.from(accessor, locale);
}
};
}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.ingest.common;
import org.elasticsearch.bootstrap.JavaVersion;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.test.ESTestCase;
@ -69,6 +70,29 @@ public class DateFormatTests extends ESTestCase {
assertThat(dateTime.getYear(), is(year));
}
public void testParseWeekBased() {
assumeFalse("won't work in jdk8 " +
"because SPI mechanism is not looking at classpath - needs ISOCalendarDataProvider in jre's ext/libs",
JavaVersion.current().equals(JavaVersion.parse("8")));
String format = randomFrom("YYYY-ww");
ZoneId timezone = DateUtils.of("Europe/Amsterdam");
Function<String, ZonedDateTime> javaFunction = DateFormat.Java.getFunction(format, timezone, Locale.ROOT);
ZonedDateTime dateTime = javaFunction.apply("2020-33");
assertThat(dateTime, equalTo(ZonedDateTime.of(2020,8,10,0,0,0,0,timezone)));
}
public void testParseWeekBasedWithLocale() {
assumeFalse("won't work in jdk8 " +
"because SPI mechanism is not looking at classpath - needs ISOCalendarDataProvider in jre's ext/libs",
JavaVersion.current().equals(JavaVersion.parse("8")));
String format = randomFrom("YYYY-ww");
ZoneId timezone = DateUtils.of("Europe/Amsterdam");
Function<String, ZonedDateTime> javaFunction = DateFormat.Java.getFunction(format, timezone, Locale.US);
ZonedDateTime dateTime = javaFunction.apply("2020-33");
//33rd week of 2020 starts on 9th August 2020 as per US locale
assertThat(dateTime, equalTo(ZonedDateTime.of(2020,8,9,0,0,0,0,timezone)));
}
public void testParseUnixMs() {
assertThat(DateFormat.UnixMs.getFunction(null, ZoneOffset.UTC, null).apply("1000500").toInstant().toEpochMilli(),
equalTo(1000500L));

View File

@ -183,3 +183,138 @@ teardown:
- match: { _source.date_source_7: "2018-02-05T13:44:56.657+0100" }
- match: { _source.date_target_7: "2018-02-05T12:44:56.657Z" }
---
"Test week based date parsing":
- skip:
reason: "Week based calculations require JDK9"
features: "spi_on_classpath_jdk9"
- do:
indices.create:
index: test
body:
mappings:
properties:
date_source_field:
type: date
format: YYYY-ww
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"date" : {
"field" : "date_source_field",
"target_field" : "date_target_field",
"formats" : ["YYYY-ww"]
}
}
]
}
- match: { acknowledged: true }
- do:
ingest.simulate:
id: "my_pipeline"
body: >
{
"docs": [
{
"_source": {
"date_source_field": "2020-33"
}
}
]
}
- length: { docs: 1 }
- match: { docs.0.doc._source.date_source_field: "2020-33" }
- match: { docs.0.doc._source.date_target_field: "2020-08-10T00:00:00.000Z" }
- length: { docs.0.doc._ingest: 1 }
- is_true: docs.0.doc._ingest.timestamp
- do:
index:
index: test
id: 1
pipeline: "my_pipeline"
body: {date_source_field: "2020-33"}
- do:
get:
index: test
id: 1
- match: { _source.date_source_field: "2020-33" }
- match: { _source.date_target_field: "2020-08-10T00:00:00.000Z" }
---
"Test week based date parsing with locale":
- skip:
reason: "Week based calculations require JDK9"
features: "spi_on_classpath_jdk9"
#locale is used when parsing as well on a pipeline. As per US locale, start of the 33rd week 2020 is on 09August2020 (sunday)
- do:
indices.create:
index: test
body:
mappings:
properties:
date_source_field:
type: date
format: YYYY-ww
locale: en-US
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"date" : {
"field" : "date_source_field",
"target_field" : "date_target_field",
"formats" : ["YYYY-ww"],
"locale" : "en-US"
}
}
]
}
- match: { acknowledged: true }
- do:
ingest.simulate:
id: "my_pipeline"
body: >
{
"docs": [
{
"_source": {
"date_source_field": "2020-33"
}
}
]
}
- length: { docs: 1 }
- match: { docs.0.doc._source.date_source_field: "2020-33" }
- match: { docs.0.doc._source.date_target_field: "2020-08-09T00:00:00.000Z" }
- length: { docs.0.doc._ingest: 1 }
- is_true: docs.0.doc._ingest.timestamp
- do:
index:
index: test
id: 1
pipeline: "my_pipeline"
body: {date_source_field: "2020-33"}
- do:
get:
index: test
id: 1
- match: { _source.date_source_field: "2020-33" }
- match: { _source.date_target_field: "2020-08-09T00:00:00.000Z" }

View File

@ -37,6 +37,7 @@ import java.time.format.SignStyle;
import java.time.temporal.ChronoField;
import java.time.temporal.IsoFields;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalAdjusters;
import java.time.temporal.TemporalQueries;
import java.time.temporal.TemporalQuery;
import java.time.temporal.WeekFields;
@ -52,7 +53,8 @@ import static java.time.temporal.ChronoField.NANO_OF_SECOND;
import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
public class DateFormatters {
public static final WeekFields WEEK_FIELDS = WeekFields.of(DayOfWeek.MONDAY,4);
// when run with JDK8, WeekFields for Locale.ROOT would return WeekFields.of(DayOfWeek.SUNDAY,1)
public static final WeekFields WEEK_FIELDS_ROOT = WeekFields.of(DayOfWeek.MONDAY,4);
private static final DateTimeFormatter TIME_ZONE_FORMATTER_NO_COLON = new DateTimeFormatterBuilder()
.appendOffset("+HHmm", "Z")
@ -946,14 +948,14 @@ public class DateFormatters {
* Returns a formatter for a four digit weekyear
*/
private static final DateFormatter STRICT_WEEKYEAR = new JavaDateFormatter("strict_weekyear", new DateTimeFormatterBuilder()
.appendValue(WEEK_FIELDS.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT));
private static final DateTimeFormatter STRICT_WEEKYEAR_WEEK_FORMATTER = new DateTimeFormatterBuilder()
.appendValue(WEEK_FIELDS.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
.appendLiteral("-W")
.appendValue(WEEK_FIELDS.weekOfWeekBasedYear(), 2, 2, SignStyle.NOT_NEGATIVE)
.appendValue(WEEK_FIELDS_ROOT.weekOfWeekBasedYear(), 2, 2, SignStyle.NOT_NEGATIVE)
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT);
@ -972,7 +974,7 @@ public class DateFormatters {
new DateTimeFormatterBuilder()
.append(STRICT_WEEKYEAR_WEEK_FORMATTER)
.appendLiteral("-")
.appendValue(WEEK_FIELDS.dayOfWeek())
.appendValue(WEEK_FIELDS_ROOT.dayOfWeek())
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT));
@ -1162,7 +1164,7 @@ public class DateFormatters {
* Returns a formatter for a four digit weekyear. (YYYY)
*/
private static final DateFormatter WEEK_YEAR = new JavaDateFormatter("week_year",
new DateTimeFormatterBuilder().appendValue(WEEK_FIELDS.weekBasedYear()).toFormatter(Locale.ROOT)
new DateTimeFormatterBuilder().appendValue(WEEK_FIELDS_ROOT.weekBasedYear()).toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT));
/*
@ -1591,9 +1593,9 @@ public class DateFormatters {
*/
private static final DateFormatter WEEKYEAR_WEEK = new JavaDateFormatter("weekyear_week", STRICT_WEEKYEAR_WEEK_FORMATTER,
new DateTimeFormatterBuilder()
.appendValue(WEEK_FIELDS.weekBasedYear())
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear())
.appendLiteral("-W")
.appendValue(WEEK_FIELDS.weekOfWeekBasedYear())
.appendValue(WEEK_FIELDS_ROOT.weekOfWeekBasedYear())
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT)
);
@ -1606,15 +1608,15 @@ public class DateFormatters {
new DateTimeFormatterBuilder()
.append(STRICT_WEEKYEAR_WEEK_FORMATTER)
.appendLiteral("-")
.appendValue(WEEK_FIELDS.dayOfWeek())
.appendValue(WEEK_FIELDS_ROOT.dayOfWeek())
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT),
new DateTimeFormatterBuilder()
.appendValue(WEEK_FIELDS.weekBasedYear())
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear())
.appendLiteral("-W")
.appendValue(WEEK_FIELDS.weekOfWeekBasedYear())
.appendValue(WEEK_FIELDS_ROOT.weekOfWeekBasedYear())
.appendLiteral("-")
.appendValue(WEEK_FIELDS.dayOfWeek())
.appendValue(WEEK_FIELDS_ROOT.dayOfWeek())
.toFormatter(Locale.ROOT)
.withResolverStyle(ResolverStyle.STRICT)
);
@ -1836,10 +1838,14 @@ public class DateFormatters {
* @return The converted zoned date time
*/
public static ZonedDateTime from(TemporalAccessor accessor) {
return from(accessor, ZoneOffset.UTC);
return from(accessor, Locale.ROOT, ZoneOffset.UTC);
}
public static ZonedDateTime from(TemporalAccessor accessor, ZoneId defaultZone) {
public static ZonedDateTime from(TemporalAccessor accessor, Locale locale) {
return from(accessor, locale, ZoneOffset.UTC);
}
public static ZonedDateTime from(TemporalAccessor accessor, Locale locale, ZoneId defaultZone) {
if (accessor instanceof ZonedDateTime) {
return (ZonedDateTime) accessor;
}
@ -1862,7 +1868,7 @@ public class DateFormatters {
} else if (isLocalDateSet) {
return localDate.atStartOfDay(zoneId);
} else if (isLocalTimeSet) {
return of(getLocalDate(accessor), localTime, zoneId);
return of(getLocalDate(accessor, locale), localTime, zoneId);
} else if (accessor.isSupported(ChronoField.YEAR) || accessor.isSupported(ChronoField.YEAR_OF_ERA) ) {
if (accessor.isSupported(MONTH_OF_YEAR)) {
return getFirstOfMonth(accessor).atStartOfDay(zoneId);
@ -1872,9 +1878,9 @@ public class DateFormatters {
}
} else if (accessor.isSupported(MONTH_OF_YEAR)) {
// missing year, falling back to the epoch and then filling
return getLocalDate(accessor).atStartOfDay(zoneId);
} else if (accessor.isSupported(WEEK_FIELDS.weekBasedYear())) {
return localDateFromWeekBasedDate(accessor).atStartOfDay(zoneId);
return getLocalDate(accessor, locale).atStartOfDay(zoneId);
} else if (accessor.isSupported(WeekFields.of(locale).weekBasedYear())) {
return localDateFromWeekBasedDate(accessor, locale).atStartOfDay(zoneId);
}
// we should not reach this piece of code, everything being parsed we should be able to
@ -1882,16 +1888,18 @@ public class DateFormatters {
throw new IllegalArgumentException("temporal accessor [" + accessor + "] cannot be converted to zoned date time");
}
private static LocalDate localDateFromWeekBasedDate(TemporalAccessor accessor) {
if (accessor.isSupported(WEEK_FIELDS.weekOfWeekBasedYear())) {
private static LocalDate localDateFromWeekBasedDate(TemporalAccessor accessor, Locale locale) {
WeekFields weekFields = WeekFields.of(locale);
if (accessor.isSupported(weekFields.weekOfWeekBasedYear())) {
return LocalDate.ofEpochDay(0)
.with(WEEK_FIELDS.weekBasedYear(), accessor.get(WEEK_FIELDS.weekBasedYear()))
.with(WEEK_FIELDS.weekOfWeekBasedYear(), accessor.get(WEEK_FIELDS.weekOfWeekBasedYear()))
.with(ChronoField.DAY_OF_WEEK, WEEK_FIELDS.getFirstDayOfWeek().getValue());
.with(weekFields.weekBasedYear(), accessor.get(weekFields.weekBasedYear()))
.with(weekFields.weekOfWeekBasedYear(), accessor.get(weekFields.weekOfWeekBasedYear()))
.with(TemporalAdjusters.previousOrSame(weekFields.getFirstDayOfWeek()));
} else {
return LocalDate.ofEpochDay(0)
.with(WEEK_FIELDS.weekBasedYear(), accessor.get(WEEK_FIELDS.weekBasedYear()))
.with(ChronoField.DAY_OF_WEEK, WEEK_FIELDS.getFirstDayOfWeek().getValue());
.with(weekFields.weekBasedYear(), accessor.get(weekFields.weekBasedYear()))
.with(TemporalAdjusters.previousOrSame(weekFields.getFirstDayOfWeek()));
}
}
@ -1922,9 +1930,9 @@ public class DateFormatters {
}
};
private static LocalDate getLocalDate(TemporalAccessor accessor) {
if (accessor.isSupported(WEEK_FIELDS.weekBasedYear())) {
return localDateFromWeekBasedDate(accessor);
private static LocalDate getLocalDate(TemporalAccessor accessor, Locale locale) {
if (accessor.isSupported(WeekFields.of(locale).weekBasedYear())) {
return localDateFromWeekBasedDate(accessor, locale);
} else if (accessor.isSupported(MONTH_OF_YEAR)) {
int year = getYear(accessor);
if (accessor.isSupported(DAY_OF_MONTH)) {

View File

@ -363,7 +363,7 @@ public final class DateFieldMapper extends FieldMapper {
}
public long parse(String value) {
return resolution.convert(DateFormatters.from(dateTimeFormatter().parse(value)).toInstant());
return resolution.convert(DateFormatters.from(dateTimeFormatter().parse(value), dateTimeFormatter().locale()).toInstant());
}
@Override

View File

@ -474,14 +474,14 @@ public class JodaCompatibleZonedDateTime
@Deprecated
public int getWeekOfWeekyear() {
logDeprecatedMethod("getWeekOfWeekyear()", "get(DateFormatters.WEEK_FIELDS.weekOfWeekBasedYear())");
return dt.get(DateFormatters.WEEK_FIELDS.weekOfWeekBasedYear());
logDeprecatedMethod("getWeekOfWeekyear()", "get(DateFormatters.WEEK_FIELDS_ROOT.weekOfWeekBasedYear())");
return dt.get(DateFormatters.WEEK_FIELDS_ROOT.weekOfWeekBasedYear());
}
@Deprecated
public int getWeekyear() {
logDeprecatedMethod("getWeekyear()", "get(DateFormatters.WEEK_FIELDS.weekBasedYear())");
return dt.get(DateFormatters.WEEK_FIELDS.weekBasedYear());
logDeprecatedMethod("getWeekyear()", "get(DateFormatters.WEEK_FIELDS_ROOT.weekBasedYear())");
return dt.get(DateFormatters.WEEK_FIELDS_ROOT.weekBasedYear());
}
@Deprecated

View File

@ -54,6 +54,9 @@ public class JavaJodaTimeDuellingTests extends ESTestCase {
assert (runtimeJdk8 && ("SPI,JRE".equals(System.getProperty("java.locale.providers"))))
|| (false == runtimeJdk8 && ("SPI,COMPAT".equals(System.getProperty("java.locale.providers"))))
: "`-Djava.locale.providers` needs to be set";
assumeFalse("won't work in jdk8 " +
"because SPI mechanism is not looking at classpath - needs ISOCalendarDataProvider in jre's ext/libs",
runtimeJdk8);
}
public void testTimezoneParsing() {

View File

@ -213,12 +213,12 @@ public class JodaCompatibleZonedDateTimeTests extends ESTestCase {
public void testWeekOfWeekyear() {
assertMethodDeprecation(() -> assertThat(javaTime.getWeekOfWeekyear(), equalTo(jodaTime.getWeekOfWeekyear())),
"getWeekOfWeekyear()", "get(DateFormatters.WEEK_FIELDS.weekOfWeekBasedYear())");
"getWeekOfWeekyear()", "get(DateFormatters.WEEK_FIELDS_ROOT.weekOfWeekBasedYear())");
}
public void testWeekyear() {
assertMethodDeprecation(() -> assertThat(javaTime.getWeekyear(), equalTo(jodaTime.getWeekyear())),
"getWeekyear()", "get(DateFormatters.WEEK_FIELDS.weekBasedYear())");
"getWeekyear()", "get(DateFormatters.WEEK_FIELDS_ROOT.weekBasedYear())");
}
public void testYearOfCentury() {