SQL: Make CASTing string to DATETIME more lenient (#57451) (#57509)

Some BI tools (i.e. Tableau) would try to cast strings where the time
part is separated from the date part with a whitespace instead of `T`.
Adjust type conversion used by CAST to support this.

(cherry picked from commit 0e18321e7ad9f779c42855efbf93f171b9128a5e)
This commit is contained in:
Marios Trivyzas 2020-06-02 10:54:03 +02:00 committed by GitHub
parent b8a13de20f
commit 52c555e286
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 99 additions and 15 deletions

View File

@ -6,19 +6,40 @@
package org.elasticsearch.xpack.ql.type; package org.elasticsearch.xpack.ql.type;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.DateFormatters; import org.elasticsearch.common.time.DateFormatters;
import java.time.Instant; import java.time.Instant;
import java.time.ZoneId; import java.time.ZoneId;
import java.time.ZonedDateTime; import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_TIME;
//NB: Taken from sql-proto. //NB: Taken from sql-proto.
public final class DateUtils { public final class DateUtils {
public static final ZoneId UTC = ZoneId.of("Z"); public static final ZoneId UTC = ZoneId.of("Z");
private static final DateFormatter UTC_DATE_TIME_FORMATTER = DateFormatter.forPattern("date_optional_time").withZone(UTC); private static final DateTimeFormatter DATE_OPTIONAL_TIME_FORMATTER_WHITESPACE = new DateTimeFormatterBuilder()
.append(ISO_LOCAL_DATE)
.optionalStart()
.appendLiteral(' ')
.append(ISO_LOCAL_TIME)
.optionalStart()
.appendZoneOrOffsetId()
.optionalEnd()
.toFormatter().withZone(UTC);
private static final DateTimeFormatter DATE_OPTIONAL_TIME_FORMATTER_T_LITERAL = new DateTimeFormatterBuilder()
.append(ISO_LOCAL_DATE)
.optionalStart()
.appendLiteral('T')
.append(ISO_LOCAL_TIME)
.optionalStart()
.appendZoneOrOffsetId()
.optionalEnd()
.toFormatter().withZone(UTC);
private DateUtils() {} private DateUtils() {}
@ -33,10 +54,25 @@ public final class DateUtils {
* Parses the given string into a DateTime using UTC as a default timezone. * Parses the given string into a DateTime using UTC as a default timezone.
*/ */
public static ZonedDateTime asDateTime(String dateFormat) { public static ZonedDateTime asDateTime(String dateFormat) {
return DateFormatters.from(UTC_DATE_TIME_FORMATTER.parse(dateFormat)).withZoneSameInstant(UTC); int separatorIdx = dateFormat.indexOf('-'); // Find the first `-` date separator
if (separatorIdx == 0) { // first char = `-` denotes a negative year
separatorIdx = dateFormat.indexOf('-', 1); // Find the first `-` date separator past the negative year
}
// Find the second `-` date separator and move 3 places past the dayOfYear to find the time separator
// e.g. 2020-06-01T10:20:30....
// ^
// +3 = ^
separatorIdx = dateFormat.indexOf('-', separatorIdx + 1) + 3;
// Avoid index out of bounds - it will lead to DateTimeParseException anyways
if (separatorIdx >= dateFormat.length() || dateFormat.charAt(separatorIdx) == 'T') {
return DateFormatters.from(DATE_OPTIONAL_TIME_FORMATTER_T_LITERAL.parse(dateFormat)).withZoneSameInstant(UTC);
} else {
return DateFormatters.from(DATE_OPTIONAL_TIME_FORMATTER_WHITESPACE.parse(dateFormat)).withZoneSameInstant(UTC);
}
} }
public static String toString(ZonedDateTime dateTime) { public static String toString(ZonedDateTime dateTime) {
return StringUtils.toString(dateTime); return StringUtils.toString(dateTime);
} }
} }

View File

@ -119,9 +119,18 @@ public class DataTypeConversionTests extends ESTestCase {
assertEquals(asDateTime(0L), conversion.convert("1970-01-01")); assertEquals(asDateTime(0L), conversion.convert("1970-01-01"));
assertEquals(asDateTime(1000L), conversion.convert("1970-01-01T00:00:01Z")); assertEquals(asDateTime(1000L), conversion.convert("1970-01-01T00:00:01Z"));
assertEquals(asDateTime(1483228800000L), conversion.convert("2017-01-01T00:00:00Z"));
assertEquals(asDateTime(1483228800000L), conversion.convert("2017-01-01T00:00:00Z")); assertEquals(asDateTime(1483228800123L), conversion.convert("2017-01-01T00:00:00.123Z"));
assertEquals(asDateTime(18000000L), conversion.convert("1970-01-01T00:00:00-05:00")); assertEquals(asDateTime(1483228800123L), conversion.convert("2017-01-01 00:00:00.123Z"));
assertEquals(asDateTime(18000321L), conversion.convert("1970-01-01T00:00:00.321-05:00"));
assertEquals(asDateTime(18000321L), conversion.convert("1970-01-01 00:00:00.321-05:00"));
assertEquals(asDateTime(3849948162000321L), conversion.convert("+123970-01-01T00:00:00.321-05:00"));
assertEquals(asDateTime(3849948162000321L), conversion.convert("+123970-01-01 00:00:00.321-05:00"));
assertEquals(asDateTime(-818587277999679L), conversion.convert("-23970-01-01T00:00:00.321-05:00"));
assertEquals(asDateTime(-818587277999679L), conversion.convert("-23970-01-01 00:00:00.321-05:00"));
// double check back and forth conversion // double check back and forth conversion
ZonedDateTime dt = org.elasticsearch.common.time.DateUtils.nowWithMillisResolution(); ZonedDateTime dt = org.elasticsearch.common.time.DateUtils.nowWithMillisResolution();
@ -129,7 +138,7 @@ public class DataTypeConversionTests extends ESTestCase {
Converter back = converterFor(KEYWORD, DATETIME); Converter back = converterFor(KEYWORD, DATETIME);
assertEquals(dt, back.convert(forward.convert(dt))); assertEquals(dt, back.convert(forward.convert(dt)));
Exception e = expectThrows(QlIllegalArgumentException.class, () -> conversion.convert("0xff")); Exception e = expectThrows(QlIllegalArgumentException.class, () -> conversion.convert("0xff"));
assertEquals("cannot cast [0xff] to [datetime]: failed to parse date field [0xff] with format [date_optional_time]", assertEquals("cannot cast [0xff] to [datetime]: Text '0xff' could not be parsed at index 0",
e.getMessage()); e.getMessage());
} }
} }
@ -405,4 +414,4 @@ public class DataTypeConversionTests extends ESTestCase {
Converter stringToIp = converterFor(KEYWORD, IP); Converter stringToIp = converterFor(KEYWORD, IP);
assertEquals("10.0.0.1", ipToString.convert(stringToIp.convert(new Literal(s, "10.0.0.1", KEYWORD)))); assertEquals("10.0.0.1", ipToString.convert(stringToIp.convert(new Literal(s, "10.0.0.1", KEYWORD))));
} }
} }

View File

@ -9,6 +9,27 @@
// //
// //
castStringToDateTime
SELECT CAST('2020-06-01T10:20:30Z' AS DATETIME) AS cast1, CAST('2020-06-01 10:20:30.000Z' AS DATETIME) AS cast2;
cast1 | cast2
--------------------------+-------------------------
2020-06-01T10:20:30.000Z | 2020-06-01T10:20:30.000Z
;
castStringToDateTimeWithField
SELECT CAST(CAST(birth_date AS STRING) AS DATETIME) AS cast1, CAST(REPLACE(CAST(birth_date AS STRING), 'T', ' ') AS DATETIME) AS cast2
FROM test_emp ORDER BY emp_no LIMIT 5;
cast1 | cast2
--------------------------+-------------------------
1953-09-02T00:00:00.000Z | 1953-09-02T00:00:00.000Z
1964-06-02T00:00:00.000Z | 1964-06-02T00:00:00.000Z
1959-12-03T00:00:00.000Z | 1959-12-03T00:00:00.000Z
1954-05-01T00:00:00.000Z | 1954-05-01T00:00:00.000Z
1955-01-21T00:00:00.000Z | 1955-01-21T00:00:00.000Z
;
dateTimeSecond dateTimeSecond
SELECT SECOND(birth_date) d, last_name l FROM "test_emp" WHERE emp_no < 10010 ORDER BY emp_no; SELECT SECOND(birth_date) d, last_name l FROM "test_emp" WHERE emp_no < 10010 ORDER BY emp_no;

View File

@ -120,10 +120,14 @@ public final class DateUtils {
* Parses the given string into a Date (SQL DATE type) using UTC as a default timezone. * Parses the given string into a Date (SQL DATE type) using UTC as a default timezone.
*/ */
public static ZonedDateTime asDateOnly(String dateFormat) { public static ZonedDateTime asDateOnly(String dateFormat) {
int separatorIdx = dateFormat.indexOf('-'); int separatorIdx = dateFormat.indexOf('-'); // Find the first `-` date separator
if (separatorIdx == 0) { // negative year if (separatorIdx == 0) { // first char = `-` denotes a negative year
separatorIdx = dateFormat.indexOf('-', 1); separatorIdx = dateFormat.indexOf('-', 1); // Find the first `-` date separator past the negative year
} }
// Find the second `-` date separator and move 3 places past the dayOfYear to find the time separator
// e.g. 2020-06-01T10:20:30....
// ^
// +3 = ^
separatorIdx = dateFormat.indexOf('-', separatorIdx + 1) + 3; separatorIdx = dateFormat.indexOf('-', separatorIdx + 1) + 3;
// Avoid index out of bounds - it will lead to DateTimeParseException anyways // Avoid index out of bounds - it will lead to DateTimeParseException anyways
if (separatorIdx >= dateFormat.length() || dateFormat.charAt(separatorIdx) == 'T') { if (separatorIdx >= dateFormat.length() || dateFormat.charAt(separatorIdx) == 'T') {

View File

@ -189,6 +189,8 @@ public class SqlDataTypeConverterTests extends ESTestCase {
assertEquals(date(-125908819200000L), conversion.convert("-2020-02-10 10:20:30.123-06:00")); assertEquals(date(-125908819200000L), conversion.convert("-2020-02-10 10:20:30.123-06:00"));
assertEquals(date(1581292800000L), conversion.convert("2020-02-10 10:20:30.123456789+03:00")); assertEquals(date(1581292800000L), conversion.convert("2020-02-10 10:20:30.123456789+03:00"));
assertEquals(date(11046514492800000L), conversion.convert("+352020-02-10 10:20:30.123456789+03:00"));
// double check back and forth conversion // double check back and forth conversion
ZonedDateTime zdt = org.elasticsearch.common.time.DateUtils.nowWithMillisResolution(); ZonedDateTime zdt = org.elasticsearch.common.time.DateUtils.nowWithMillisResolution();
Converter forward = converterFor(DATE, KEYWORD); Converter forward = converterFor(DATE, KEYWORD);
@ -299,9 +301,21 @@ public class SqlDataTypeConverterTests extends ESTestCase {
assertEquals(dateTime(0L), conversion.convert("1970-01-01")); assertEquals(dateTime(0L), conversion.convert("1970-01-01"));
assertEquals(dateTime(1000L), conversion.convert("1970-01-01T00:00:01Z")); assertEquals(dateTime(1000L), conversion.convert("1970-01-01T00:00:01Z"));
assertEquals(dateTime(1483228800000L), conversion.convert("2017-01-01T00:00:00Z")); assertEquals(dateTime(1483228800000L), conversion.convert("2017-01-01T00:00:00Z"));
assertEquals(dateTime(1483228800000L), conversion.convert("2017-01-01T00:00:00Z")); assertEquals(dateTime(1483228800000L), conversion.convert("2017-01-01 00:00:00Z"));
assertEquals(dateTime(18000000L), conversion.convert("1970-01-01T00:00:00-05:00"));
assertEquals(dateTime(1483228800123L), conversion.convert("2017-01-01T00:00:00.123Z"));
assertEquals(dateTime(1483228800123L), conversion.convert("2017-01-01 00:00:00.123Z"));
assertEquals(dateTime(18000321L), conversion.convert("1970-01-01T00:00:00.321-05:00"));
assertEquals(dateTime(18000321L), conversion.convert("1970-01-01 00:00:00.321-05:00"));
assertEquals(dateTime(3849948162000321L), conversion.convert("+123970-01-01T00:00:00.321-05:00"));
assertEquals(dateTime(3849948162000321L), conversion.convert("+123970-01-01 00:00:00.321-05:00"));
assertEquals(dateTime(-818587277999679L), conversion.convert("-23970-01-01T00:00:00.321-05:00"));
assertEquals(dateTime(-818587277999679L), conversion.convert("-23970-01-01 00:00:00.321-05:00"));
// double check back and forth conversion // double check back and forth conversion
ZonedDateTime dt = org.elasticsearch.common.time.DateUtils.nowWithMillisResolution(); ZonedDateTime dt = org.elasticsearch.common.time.DateUtils.nowWithMillisResolution();
@ -309,7 +323,7 @@ public class SqlDataTypeConverterTests extends ESTestCase {
Converter back = converterFor(KEYWORD, DATETIME); Converter back = converterFor(KEYWORD, DATETIME);
assertEquals(dt, back.convert(forward.convert(dt))); assertEquals(dt, back.convert(forward.convert(dt)));
Exception e = expectThrows(QlIllegalArgumentException.class, () -> conversion.convert("0xff")); Exception e = expectThrows(QlIllegalArgumentException.class, () -> conversion.convert("0xff"));
assertEquals("cannot cast [0xff] to [datetime]: failed to parse date field [0xff] with format [date_optional_time]", assertEquals("cannot cast [0xff] to [datetime]: Text '0xff' could not be parsed at index 0",
e.getMessage()); e.getMessage());
} }
} }