Fix timezone fallback in ingest processor (#38407) (#38664)

If no timezone was specified in the date processor, then the conversion
would lead to wrong time, as UTC was assumed by default, leading to
incorrectly parsed dates.

This commit does not assume a default timezone and will thus not format
the dates in a wrong way.
This commit is contained in:
Alexander Reelsen 2019-02-09 20:28:59 +01:00 committed by GitHub
parent 5ab5a0a529
commit 56edc8e37f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 132 additions and 5 deletions

View File

@ -87,10 +87,16 @@ enum DateFormat {
format = format.substring(1); format = format.substring(1);
} }
boolean isUtc = ZoneOffset.UTC.equals(zoneId);
int year = LocalDate.now(ZoneOffset.UTC).getYear(); int year = LocalDate.now(ZoneOffset.UTC).getYear();
DateFormatter formatter = DateFormatter.forPattern(format) DateFormatter dateFormatter = DateFormatter.forPattern(format)
.withLocale(locale) .withLocale(locale);
.withZone(zoneId); // if UTC zone is set here, the the time zone specified in the format will be ignored, leading to wrong dates
if (isUtc == false) {
dateFormatter = dateFormatter.withZone(zoneId);
}
final DateFormatter formatter = dateFormatter;
return text -> { return text -> {
TemporalAccessor accessor = formatter.parse(text); TemporalAccessor accessor = formatter.parse(text);
// if there is no year, we fall back to the current one and // if there is no year, we fall back to the current one and
@ -106,7 +112,11 @@ enum DateFormat {
accessor = newTime.withZoneSameLocal(zoneId); accessor = newTime.withZoneSameLocal(zoneId);
} }
return DateFormatters.from(accessor); if (isUtc) {
return DateFormatters.from(accessor).withZoneSameInstant(ZoneOffset.UTC);
} else {
return DateFormatters.from(accessor);
}
}; };
} }
}; };

View File

@ -43,7 +43,7 @@ public final class DateProcessor extends AbstractProcessor {
public static final String TYPE = "date"; public static final String TYPE = "date";
static final String DEFAULT_TARGET_FIELD = "@timestamp"; static final String DEFAULT_TARGET_FIELD = "@timestamp";
public static final DateFormatter FORMATTER = DateFormatter.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); private static final DateFormatter FORMATTER = DateFormatter.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
private final TemplateScript.Factory timezone; private final TemplateScript.Factory timezone;
private final TemplateScript.Factory locale; private final TemplateScript.Factory locale;

View File

@ -19,6 +19,7 @@
package org.elasticsearch.ingest.common; package org.elasticsearch.ingest.common;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
@ -43,6 +44,14 @@ public class DateFormatTests extends ESTestCase {
equalTo("11 24 01:29:01")); equalTo("11 24 01:29:01"));
} }
public void testParseJavaWithTimeZone() {
Function<String, ZonedDateTime> javaFunction = DateFormat.Java.getFunction("yyyy-MM-dd'T'HH:mm:ss.SSSZZ",
ZoneOffset.UTC, Locale.ROOT);
ZonedDateTime datetime = javaFunction.apply("2018-02-05T13:44:56.657+0100");
String expectedDateTime = DateFormatter.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSXXX").withZone(ZoneOffset.UTC).format(datetime);
assertThat(expectedDateTime, is("2018-02-05T12:44:56.657Z"));
}
public void testParseJavaDefaultYear() { public void testParseJavaDefaultYear() {
String format = randomFrom("8dd/MM", "dd/MM"); String format = randomFrom("8dd/MM", "dd/MM");
ZoneId timezone = DateUtils.of("Europe/Amsterdam"); ZoneId timezone = DateUtils.of("Europe/Amsterdam");
@ -70,6 +79,10 @@ public class DateFormatTests extends ESTestCase {
public void testParseISO8601() { public void testParseISO8601() {
assertThat(DateFormat.Iso8601.getFunction(null, ZoneOffset.UTC, null).apply("2001-01-01T00:00:00-0800").toInstant().toEpochMilli(), assertThat(DateFormat.Iso8601.getFunction(null, ZoneOffset.UTC, null).apply("2001-01-01T00:00:00-0800").toInstant().toEpochMilli(),
equalTo(978336000000L)); equalTo(978336000000L));
assertThat(DateFormat.Iso8601.getFunction(null, ZoneOffset.UTC, null).apply("2001-01-01T00:00:00-0800").toString(),
equalTo("2001-01-01T08:00Z"));
assertThat(DateFormat.Iso8601.getFunction(null, ZoneOffset.UTC, null).apply("2001-01-01T00:00:00-0800").toString(),
equalTo("2001-01-01T08:00Z"));
} }
public void testParseISO8601Failure() { public void testParseISO8601Failure() {

View File

@ -29,6 +29,7 @@ import java.time.ZoneId;
import java.time.ZoneOffset; import java.time.ZoneOffset;
import java.time.ZonedDateTime; import java.time.ZonedDateTime;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -97,6 +98,18 @@ public class DateProcessorTests extends ESTestCase {
} }
} }
public void testJavaPatternNoTimezone() {
DateProcessor dateProcessor = new DateProcessor(randomAlphaOfLength(10),
null, null,
"date_as_string", Arrays.asList("yyyy dd MM HH:mm:ss XXX"), "date_as_date");
Map<String, Object> document = new HashMap<>();
document.put("date_as_string", "2010 12 06 00:00:00 -02:00");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
dateProcessor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue("date_as_date", String.class), equalTo("2010-06-12T02:00:00.000Z"));
}
public void testInvalidJavaPattern() { public void testInvalidJavaPattern() {
try { try {
DateProcessor processor = new DateProcessor(randomAlphaOfLength(10), DateProcessor processor = new DateProcessor(randomAlphaOfLength(10),

View File

@ -39,3 +39,94 @@ teardown:
id: 1 id: 1
- match: { _source.date_source_field: "12/06/2010" } - match: { _source.date_source_field: "12/06/2010" }
- match: { _source.date_target_field: "2010-06-12T00:00:00.000+02:00" } - match: { _source.date_target_field: "2010-06-12T00:00:00.000+02:00" }
---
"Test date processor with no timezone configured":
- do:
ingest.put_pipeline:
id: "my_pipeline"
# sample formats from beats, featuring mongodb, icinga, apache
body: >
{
"description": "_description",
"processors": [
{
"date" : {
"field" : "date_source_1",
"target_field" : "date_target_1",
"formats" : ["yyyy-MM-dd'T'HH:mm:ss.SSSZZ" ]
}
},
{
"date" : {
"field" : "date_source_2",
"target_field" : "date_target_2",
"formats" : ["yyyy-MM-dd HH:mm:ss Z" ]
}
},
{
"date" : {
"field" : "date_source_3",
"target_field" : "date_target_3",
"formats" : [ "dd/MMM/yyyy:H:m:s Z" ]
}
},
{
"date" : {
"field" : "date_source_4",
"target_field" : "date_target_4",
"formats" : [ "UNIX" ]
}
},
{
"date" : {
"field" : "date_source_5",
"target_field" : "date_target_5",
"formats" : [ "UNIX_MS" ]
}
},
{
"date" : {
"field" : "date_source_6",
"target_field" : "date_target_6",
"formats" : [ "TAI64N" ]
}
},
{
"date" : {
"field" : "date_source_7",
"target_field" : "date_target_7",
"formats" : [ "ISO8601" ]
}
}
]
}
- match: { acknowledged: true }
- do:
index:
index: test
id: 1
pipeline: "my_pipeline"
body: { date_source_1: "2018-02-05T13:44:56.657+0100", date_source_2: "2017-04-04 13:43:09 +0200", date_source_3: "10/Aug/2018:09:45:56 +0200", date_source_4: "1", date_source_5: "1", date_source_6: "4000000050d506482dbdf024", date_source_7: "2018-02-05T13:44:56.657+0100" }
- do:
get:
index: test
id: 1
- match: { _source.date_source_1: "2018-02-05T13:44:56.657+0100" }
- match: { _source.date_target_1: "2018-02-05T12:44:56.657Z" }
- match: { _source.date_source_2: "2017-04-04 13:43:09 +0200" }
- match: { _source.date_target_2: "2017-04-04T11:43:09.000Z" }
- match: { _source.date_source_3: "10/Aug/2018:09:45:56 +0200" }
- match: { _source.date_target_3: "2018-08-10T07:45:56.000Z" }
- match: { _source.date_source_4: "1" }
- match: { _source.date_target_4: "1970-01-01T00:00:01.000Z" }
- match: { _source.date_source_5: "1" }
- match: { _source.date_target_5: "1970-01-01T00:00:00.001Z" }
- match: { _source.date_source_6: "4000000050d506482dbdf024" }
- match: { _source.date_target_6: "2012-12-22T01:00:46.767Z" }
- match: { _source.date_source_7: "2018-02-05T13:44:56.657+0100" }
- match: { _source.date_target_7: "2018-02-05T12:44:56.657Z" }