From 9444da5038067eedbabe936c95d95080659240db Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 10 Nov 2017 00:43:22 -0800 Subject: [PATCH] SQL: Improved behavior when implicitly casting strings to date/time literals. (#5023) * SQL: Improved behavior when implicitly casting strings to date/time literals. - Handle all flavors of ISO8601 and SQL literals. - Throw errors on other literals instead of silently transforming them to 0. * Respect timeZone when format is null. --- docs/content/misc/math-expr.md | 2 +- .../expression/TimestampParseExprMacro.java | 37 +++++++++++++++- .../druid/query/expression/ExprMacroTest.java | 11 ++++- .../builtin/CastOperatorConversion.java | 4 +- .../sql/calcite/planner/DruidRexExecutor.java | 9 ++++ .../druid/sql/calcite/CalciteQueryTest.java | 42 +++++++++++++++++-- .../calcite/expression/ExpressionsTest.java | 4 +- 7 files changed, 98 insertions(+), 11 deletions(-) diff --git a/docs/content/misc/math-expr.md b/docs/content/misc/math-expr.md index f4d0db7f9e3..07dcc501808 100644 --- a/docs/content/misc/math-expr.md +++ b/docs/content/misc/math-expr.md @@ -63,7 +63,7 @@ The following built-in functions are available. |timestamp_floor|timestamp_floor(expr, period, \[origin, [timezone\]\]) rounds down a timestamp, returning it as a new timestamp. Period can be any ISO8601 period, like P3M (quarters) or PT12H (half-days). The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00".| |timestamp_shift|timestamp_shift(expr, period, step, \[timezone\]) shifts a timestamp by a period (step times), returning it as a new timestamp. Period can be any ISO8601 period. Step may be negative. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00".| |timestamp_extract|timestamp_extract(expr, unit, \[timezone\]) extracts a time part from expr, returning it as a number. Unit can be EPOCH, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), DOY (day of year), WEEK (week of [week year](https://en.wikipedia.org/wiki/ISO_week_date)), MONTH (1 through 12), QUARTER (1 through 4), or YEAR. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00"| -|timestamp_parse|timestamp_parse(string expr, \[pattern, [timezone\]\]) parses a string into a timestamp using a given [Joda DateTimeFormat pattern](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or ISO8601 if the pattern is not provided. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00", and will be used as the time zone for strings that do not include a time zone offset. Pattern and time zone must be literals. Strings that cannot be parsed as timestamps will be returned as nulls.| +|timestamp_parse|timestamp_parse(string expr, \[pattern, [timezone\]\]) parses a string into a timestamp using a given [Joda DateTimeFormat pattern](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html). If the pattern is not provided, this parses time strings in either ISO8601 or SQL format. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00", and will be used as the time zone for strings that do not include a time zone offset. Pattern and time zone must be literals. Strings that cannot be parsed as timestamps will be returned as nulls.| |timestamp_format|timestamp_format(expr, \[pattern, \[timezone\]\]) formats a timestamp as a string with a given [Joda DateTimeFormat pattern](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or ISO8601 if the pattern is not provided. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00". Pattern and time zone must be literals.| ## Math functions diff --git a/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java b/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java index d885d8709a9..b163aac140c 100644 --- a/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java +++ b/processing/src/main/java/io/druid/query/expression/TimestampParseExprMacro.java @@ -26,6 +26,10 @@ import io.druid.math.expr.ExprEval; import io.druid.math.expr.ExprMacroTable; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.DateTimeFormatterBuilder; +import org.joda.time.format.DateTimeParser; +import org.joda.time.format.ISODateTimeFormat; import javax.annotation.Nonnull; import java.util.List; @@ -57,7 +61,7 @@ public class TimestampParseExprMacro implements ExprMacroTable.ExprMacro final DateTimes.UtcFormatter formatter = formatString == null - ? DateTimes.ISO_DATE_OR_TIME + ? createDefaultParser(timeZone) : DateTimes.wrapFormatter(DateTimeFormat.forPattern(formatString).withZone(timeZone)); class TimestampParseExpr implements Expr @@ -91,4 +95,35 @@ public class TimestampParseExprMacro implements ExprMacroTable.ExprMacro return new TimestampParseExpr(); } + + /** + * Default formatter that parses according to the docs for this method: "If the pattern is not provided, this parses + * time strings in either ISO8601 or SQL format." + */ + private static DateTimes.UtcFormatter createDefaultParser(final DateTimeZone timeZone) + { + final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder() + .appendTimeZoneOffset("Z", true, 2, 4) + .toFormatter(); + + DateTimeParser timeOrOffset = new DateTimeFormatterBuilder() + .append( + null, + new DateTimeParser[]{ + new DateTimeFormatterBuilder().appendLiteral('T').toParser(), + new DateTimeFormatterBuilder().appendLiteral(' ').toParser() + } + ) + .appendOptional(ISODateTimeFormat.timeElementParser().getParser()) + .appendOptional(offsetElement.getParser()) + .toParser(); + + return DateTimes.wrapFormatter( + new DateTimeFormatterBuilder() + .append(ISODateTimeFormat.dateElementParser()) + .appendOptional(timeOrOffset) + .toFormatter() + .withZone(timeZone) + ); + } } diff --git a/server/src/test/java/io/druid/query/expression/ExprMacroTest.java b/server/src/test/java/io/druid/query/expression/ExprMacroTest.java index e7bb2397fe2..5dc3135ed96 100644 --- a/server/src/test/java/io/druid/query/expression/ExprMacroTest.java +++ b/server/src/test/java/io/druid/query/expression/ExprMacroTest.java @@ -120,8 +120,17 @@ public class ExprMacroTest public void testTimestampParse() { assertExpr("timestamp_parse(tstr)", DateTimes.of("2000-02-03T04:05:06").getMillis()); - assertExpr("timestamp_parse(tstr_sql)", null); + assertExpr("timestamp_parse(tstr_sql)", DateTimes.of("2000-02-03T04:05:06").getMillis()); + assertExpr( + "timestamp_parse(tstr_sql,'','America/Los_Angeles')", + DateTimes.of("2000-02-03T04:05:06-08:00").getMillis() + ); + assertExpr("timestamp_parse('2000-02-03')", DateTimes.of("2000-02-03").getMillis()); + assertExpr("timestamp_parse('2000-02')", DateTimes.of("2000-02-01").getMillis()); + assertExpr("timestamp_parse('')", null); + assertExpr("timestamp_parse('z2000')", null); assertExpr("timestamp_parse(tstr_sql,'yyyy-MM-dd HH:mm:ss')", DateTimes.of("2000-02-03T04:05:06").getMillis()); + assertExpr("timestamp_parse('02/03/2000','MM/dd/yyyy')", DateTimes.of("2000-02-03").getMillis()); assertExpr( "timestamp_parse(tstr_sql,'yyyy-MM-dd HH:mm:ss','America/Los_Angeles')", DateTimes.of("2000-02-03T04:05:06-08:00").getMillis() diff --git a/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java b/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java index 1f34bde5082..7cc7acaf437 100644 --- a/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java +++ b/sql/src/main/java/io/druid/sql/calcite/expression/builtin/CastOperatorConversion.java @@ -140,12 +140,12 @@ public class CastOperatorConversion implements SqlOperatorConversion final SqlTypeName toType ) { - // Cast strings to datetimes by parsin them from SQL format. + // Cast strings to datetimes by parsing them from SQL format. final DruidExpression timestampExpression = DruidExpression.fromFunctionCall( "timestamp_parse", ImmutableList.of( operand, - DruidExpression.fromExpression(DruidExpression.stringLiteral(dateTimeFormatString(toType))), + DruidExpression.fromExpression(DruidExpression.nullLiteral()), DruidExpression.fromExpression(DruidExpression.stringLiteral(plannerContext.getTimeZone().getID())) ) ); diff --git a/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java b/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java index 2eba93b58de..1e187f78742 100644 --- a/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java +++ b/sql/src/main/java/io/druid/sql/calcite/planner/DruidRexExecutor.java @@ -20,6 +20,7 @@ package io.druid.sql.calcite.planner; import io.druid.java.util.common.DateTimes; +import io.druid.java.util.common.IAE; import io.druid.math.expr.Expr; import io.druid.math.expr.ExprEval; import io.druid.math.expr.ExprType; @@ -83,6 +84,10 @@ public class DruidRexExecutor implements RexExecutor if (sqlTypeName == SqlTypeName.BOOLEAN) { literal = rexBuilder.makeLiteral(exprResult.asBoolean(), constExp.getType(), true); } else if (sqlTypeName == SqlTypeName.DATE) { + if (!constExp.getType().isNullable() && exprResult.isNull()) { + throw new IAE("Illegal DATE constant: %s", constExp); + } + literal = rexBuilder.makeDateLiteral( Calcites.jodaToCalciteDateString( DateTimes.utc(exprResult.asLong()), @@ -90,6 +95,10 @@ public class DruidRexExecutor implements RexExecutor ) ); } else if (sqlTypeName == SqlTypeName.TIMESTAMP) { + if (!constExp.getType().isNullable() && exprResult.isNull()) { + throw new IAE("Illegal TIMESTAMP constant: %s", constExp); + } + literal = rexBuilder.makeTimestampLiteral( Calcites.jodaToCalciteTimestampString( DateTimes.utc(exprResult.asLong()), diff --git a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java index bd3eba52300..35ccbbaa914 100644 --- a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java @@ -104,6 +104,7 @@ import io.druid.sql.calcite.util.QueryLogHook; import io.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker; import io.druid.sql.calcite.view.InProcessViewManager; import org.apache.calcite.plan.RelOptPlanner; +import org.hamcrest.CoreMatchers; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.Interval; @@ -115,6 +116,8 @@ import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; +import org.junit.internal.matchers.ThrowableMessageMatcher; +import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import java.util.ArrayList; @@ -218,6 +221,9 @@ public class CalciteQueryTest private static final PagingSpec FIRST_PAGING_SPEC = new PagingSpec(null, 1000, true); + @Rule + public ExpectedException expectedException = ExpectedException.none(); + @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); @@ -2808,15 +2814,23 @@ public class CalciteQueryTest @Test public void testCountStarWithTimeFilterUsingStringLiterals() throws Exception { - // Strings are implicitly cast to timestamps. + // Strings are implicitly cast to timestamps. Test a few different forms. testQuery( - "SELECT COUNT(*) FROM druid.foo " - + "WHERE __time >= '2000-01-01 00:00:00' AND __time < '2001-01-01 00:00:00'", + "SELECT COUNT(*) FROM druid.foo\n" + + "WHERE __time >= '2000-01-01 00:00:00' AND __time < '2001-01-01T00:00:00'\n" + + "OR __time >= '2001-02-01' AND __time < '2001-02-02'\n" + + "OR __time BETWEEN '2001-03-01' AND '2001-03-02'", ImmutableList.of( Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) - .intervals(QSS(Intervals.of("2000-01-01/2001-01-01"))) + .intervals( + QSS( + Intervals.of("2000-01-01/2001-01-01"), + Intervals.of("2001-02-01/2001-02-02"), + Intervals.of("2001-03-01/2001-03-02T00:00:00.001") + ) + ) .granularity(Granularities.ALL) .aggregators(AGGS(new CountAggregatorFactory("a0"))) .context(TIMESERIES_CONTEXT_DEFAULT) @@ -2828,6 +2842,26 @@ public class CalciteQueryTest ); } + @Test + public void testCountStarWithTimeFilterUsingStringLiteralsInvalid() throws Exception + { + // Strings are implicitly cast to timestamps. Test an invalid string. + + // This error message isn't ideal but it is at least better than silently ignoring the problem. + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("Error while applying rule ReduceExpressionsRule"); + expectedException.expectCause( + ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Illegal TIMESTAMP constant")) + ); + + testQuery( + "SELECT COUNT(*) FROM druid.foo\n" + + "WHERE __time >= 'z2000-01-01 00:00:00' AND __time < '2001-01-01 00:00:00'\n", + ImmutableList.of(), + ImmutableList.of() + ); + } + @Test public void testCountStarWithSinglePointInTime() throws Exception { diff --git a/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java b/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java index 1ef16e824af..fe0d5496212 100644 --- a/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/expression/ExpressionsTest.java @@ -724,7 +724,7 @@ public class ExpressionsTest ), DruidExpression.of( null, - "timestamp_parse(\"tstr\",'yyyy-MM-dd HH:mm:ss','UTC')" + "timestamp_parse(\"tstr\",'','UTC')" ), DateTimes.of("2000-02-03T04:05:06Z").getMillis() ); @@ -784,7 +784,7 @@ public class ExpressionsTest inputRef("dstr") ), DruidExpression.fromExpression( - "timestamp_floor(timestamp_parse(\"dstr\",'yyyy-MM-dd','UTC'),'P1D','','UTC')" + "timestamp_floor(timestamp_parse(\"dstr\",'','UTC'),'P1D','','UTC')" ), DateTimes.of("2000-02-03").getMillis() );