Druid SQL EXTRACT time function - adding support for additional Time Units (#8068)

* 1. Added TimestampExtractExprMacro.Unit for MILLISECOND 2. expr eval for MILLISECOND 3. Added a test case to test extracting millisecond from expression. #7935

* 1. Adding DATASOURCE4 in tests. 2. Adding test TimeExtractWithMilliseconds

* Fixing testInformationSchemaTables test

* Fixing failing tests in DruidAvaticaHandlerTest

* Adding cannotVectorize() call before the test

* Extract time function - Adding support for MICROSECOND, ISODOW, ISOYEAR and CENTURY time units, documentation changes.

* Adding MILLISECOND in test case

* Adding support DECADE and MILLENNIUM, updating test case and documentation

* Fixing expression eval for DECADE and MILLENIUM
This commit is contained in:
Sashidhar Thallam 2019-07-20 09:08:32 +05:30 committed by Clint Wylie
parent ceb3a891bb
commit ea4bad7836
6 changed files with 162 additions and 17 deletions

View File

@ -290,7 +290,7 @@ simplest way to write literal timestamps in other time zones is to use TIME_PARS
|`TIME_FORMAT(<timestamp_expr>, [<pattern>, [<timezone>]])`|Formats a timestamp as a string with a given [Joda DateTimeFormat pattern](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or ISO8601 (e.g. `2000-01-02T03:04:05Z`) if the pattern is not provided. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00". Pattern and time zone must be literals.|
|`MILLIS_TO_TIMESTAMP(millis_expr)`|Converts a number of milliseconds since the epoch into a timestamp.|
|`TIMESTAMP_TO_MILLIS(timestamp_expr)`|Converts a timestamp into a number of milliseconds since the epoch.|
|`EXTRACT(<unit> FROM timestamp_expr)`|Extracts a time part from expr, returning it as a number. Unit can be EPOCH, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), DOY (day of year), WEEK (week of year), MONTH, QUARTER, or YEAR. Units must be provided unquoted, like `EXTRACT(HOUR FROM __time)`.|
|`EXTRACT(<unit> FROM timestamp_expr)`|Extracts a time part from expr, returning it as a number. Unit can be EPOCH, MICROSECOND, MILLISECOND, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), ISODOW (ISO day of week), DOY (day of year), WEEK (week of year), MONTH, QUARTER, YEAR, ISOYEAR, DECADE, CENTURY or MILLENNIUM. Units must be provided unquoted, like `EXTRACT(HOUR FROM __time)`.|
|`FLOOR(timestamp_expr TO <unit>)`|Rounds down a timestamp, returning it as a new timestamp. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|`CEIL(timestamp_expr TO <unit>)`|Rounds up a timestamp, returning it as a new timestamp. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|`TIMESTAMPADD(<unit>, <count>, <timestamp>)`|Equivalent to `timestamp + count * INTERVAL '1' UNIT`.|

View File

@ -37,16 +37,23 @@ public class TimestampExtractExprMacro implements ExprMacroTable.ExprMacro
public enum Unit
{
EPOCH,
MICROSECOND,
MILLISECOND,
SECOND,
MINUTE,
HOUR,
DAY,
DOW,
ISODOW,
DOY,
WEEK,
MONTH,
QUARTER,
YEAR
YEAR,
ISOYEAR,
DECADE,
CENTURY,
MILLENNIUM
}
@Override
@ -99,9 +106,15 @@ public class TimestampExtractExprMacro implements ExprMacroTable.ExprMacro
return ExprEval.of(null);
}
final DateTime dateTime = new DateTime(val, chronology);
long epoch = dateTime.getMillis() / 1000;
switch (unit) {
case EPOCH:
return ExprEval.of(dateTime.getMillis() / 1000);
return ExprEval.of(epoch);
case MICROSECOND:
return ExprEval.of(epoch / 1000);
case MILLISECOND:
return ExprEval.of(dateTime.millisOfSecond().get());
case SECOND:
return ExprEval.of(dateTime.secondOfMinute().get());
case MINUTE:
@ -112,6 +125,8 @@ public class TimestampExtractExprMacro implements ExprMacroTable.ExprMacro
return ExprEval.of(dateTime.dayOfMonth().get());
case DOW:
return ExprEval.of(dateTime.dayOfWeek().get());
case ISODOW:
return ExprEval.of(dateTime.dayOfWeek().get());
case DOY:
return ExprEval.of(dateTime.dayOfYear().get());
case WEEK:
@ -122,6 +137,17 @@ public class TimestampExtractExprMacro implements ExprMacroTable.ExprMacro
return ExprEval.of((dateTime.monthOfYear().get() - 1) / 3 + 1);
case YEAR:
return ExprEval.of(dateTime.year().get());
case ISOYEAR:
return ExprEval.of(dateTime.year().get());
case DECADE:
// The year field divided by 10, See https://www.postgresql.org/docs/10/functions-datetime.html
return ExprEval.of(Math.floor(dateTime.year().get() / 10));
case CENTURY:
return ExprEval.of(dateTime.centuryOfEra().get() + 1);
case MILLENNIUM:
// Years in the 1900s are in the second millennium. The third millennium started January 1, 2001.
// See https://www.postgresql.org/docs/10/functions-datetime.html
return ExprEval.of(Math.round(Math.ceil(dateTime.year().get() / 1000)));
default:
throw new ISE("Unhandled unit[%s]", unit);
}

View File

@ -40,16 +40,23 @@ public class ExtractOperatorConversion implements SqlOperatorConversion
private static final Map<TimeUnitRange, TimestampExtractExprMacro.Unit> EXTRACT_UNIT_MAP =
ImmutableMap.<TimeUnitRange, TimestampExtractExprMacro.Unit>builder()
.put(TimeUnitRange.EPOCH, TimestampExtractExprMacro.Unit.EPOCH)
.put(TimeUnitRange.MICROSECOND, TimestampExtractExprMacro.Unit.MICROSECOND)
.put(TimeUnitRange.MILLISECOND, TimestampExtractExprMacro.Unit.MILLISECOND)
.put(TimeUnitRange.SECOND, TimestampExtractExprMacro.Unit.SECOND)
.put(TimeUnitRange.MINUTE, TimestampExtractExprMacro.Unit.MINUTE)
.put(TimeUnitRange.HOUR, TimestampExtractExprMacro.Unit.HOUR)
.put(TimeUnitRange.DAY, TimestampExtractExprMacro.Unit.DAY)
.put(TimeUnitRange.DOW, TimestampExtractExprMacro.Unit.DOW)
.put(TimeUnitRange.ISODOW, TimestampExtractExprMacro.Unit.ISODOW)
.put(TimeUnitRange.DOY, TimestampExtractExprMacro.Unit.DOY)
.put(TimeUnitRange.WEEK, TimestampExtractExprMacro.Unit.WEEK)
.put(TimeUnitRange.MONTH, TimestampExtractExprMacro.Unit.MONTH)
.put(TimeUnitRange.QUARTER, TimestampExtractExprMacro.Unit.QUARTER)
.put(TimeUnitRange.YEAR, TimestampExtractExprMacro.Unit.YEAR)
.put(TimeUnitRange.ISOYEAR, TimestampExtractExprMacro.Unit.ISOYEAR)
.put(TimeUnitRange.DECADE, TimestampExtractExprMacro.Unit.DECADE)
.put(TimeUnitRange.CENTURY, TimestampExtractExprMacro.Unit.CENTURY)
.put(TimeUnitRange.MILLENNIUM, TimestampExtractExprMacro.Unit.MILLENNIUM)
.build();
@Override

View File

@ -397,12 +397,19 @@ public class DruidAvaticaHandlerTest extends CalciteTestBase
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE4),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE3),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
)
),
getRows(
metaData.getTables(null, "druid", "%", null),
@ -429,6 +436,12 @@ public class DruidAvaticaHandlerTest extends CalciteTestBase
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE4),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.FORBIDDEN_DATASOURCE),
@ -441,6 +454,7 @@ public class DruidAvaticaHandlerTest extends CalciteTestBase
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
)
),
getRows(
metaData.getTables(null, "druid", "%", null),

View File

@ -313,20 +313,21 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
+ "FROM INFORMATION_SCHEMA.TABLES\n"
+ "WHERE TABLE_TYPE IN ('SYSTEM_TABLE', 'TABLE', 'VIEW')",
ImmutableList.of(),
ImmutableList.of(
new Object[]{"druid", CalciteTests.DATASOURCE1, "TABLE"},
new Object[]{"druid", CalciteTests.DATASOURCE2, "TABLE"},
new Object[]{"druid", CalciteTests.DATASOURCE3, "TABLE"},
new Object[]{"druid", "aview", "VIEW"},
new Object[]{"druid", "bview", "VIEW"},
new Object[]{"INFORMATION_SCHEMA", "COLUMNS", "SYSTEM_TABLE"},
new Object[]{"INFORMATION_SCHEMA", "SCHEMATA", "SYSTEM_TABLE"},
new Object[]{"INFORMATION_SCHEMA", "TABLES", "SYSTEM_TABLE"},
new Object[]{"sys", "segments", "SYSTEM_TABLE"},
new Object[]{"sys", "server_segments", "SYSTEM_TABLE"},
new Object[]{"sys", "servers", "SYSTEM_TABLE"},
new Object[]{"sys", "tasks", "SYSTEM_TABLE"}
)
ImmutableList.<Object[]>builder()
.add(new Object[]{"druid", CalciteTests.DATASOURCE1, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE2, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE4, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE3, "TABLE"})
.add(new Object[]{"druid", "aview", "VIEW"})
.add(new Object[]{"druid", "bview", "VIEW"})
.add(new Object[]{"INFORMATION_SCHEMA", "COLUMNS", "SYSTEM_TABLE"})
.add(new Object[]{"INFORMATION_SCHEMA", "SCHEMATA", "SYSTEM_TABLE"})
.add(new Object[]{"INFORMATION_SCHEMA", "TABLES", "SYSTEM_TABLE"})
.add(new Object[]{"sys", "segments", "SYSTEM_TABLE"})
.add(new Object[]{"sys", "server_segments", "SYSTEM_TABLE"})
.add(new Object[]{"sys", "servers", "SYSTEM_TABLE"})
.add(new Object[]{"sys", "tasks", "SYSTEM_TABLE"})
.build()
);
testQuery(
@ -339,6 +340,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
ImmutableList.<Object[]>builder()
.add(new Object[]{"druid", CalciteTests.DATASOURCE1, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE2, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE4, "TABLE"})
.add(new Object[]{"druid", CalciteTests.FORBIDDEN_DATASOURCE, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE3, "TABLE"})
.add(new Object[]{"druid", "aview", "VIEW"})
@ -5793,6 +5795,61 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testFilterOnTimeExtractWithVariousTimeUnits() throws Exception
{
// Cannot vectorize due to virtual columns.
cannotVectorize();
testQuery(
"SELECT COUNT(*) FROM druid.foo4\n"
+ "WHERE EXTRACT(YEAR FROM __time) = 2000\n"
+ "AND EXTRACT(MICROSECOND FROM __time) = 946723\n"
+ "AND EXTRACT(MILLISECOND FROM __time) = 695\n"
+ "AND EXTRACT(ISODOW FROM __time) = 6\n"
+ "AND EXTRACT(ISOYEAR FROM __time) = 2000\n"
+ "AND EXTRACT(DECADE FROM __time) = 200\n"
+ "AND EXTRACT(CENTURY FROM __time) = 21\n"
+ "AND EXTRACT(MILLENNIUM FROM __time) = 2\n",
TIMESERIES_CONTEXT_DEFAULT,
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(CalciteTests.DATASOURCE4)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.virtualColumns(
expressionVirtualColumn("v0", "timestamp_extract(\"__time\",'YEAR','UTC')", ValueType.LONG),
expressionVirtualColumn("v1", "timestamp_extract(\"__time\",'MICROSECOND','UTC')", ValueType.LONG),
expressionVirtualColumn("v2", "timestamp_extract(\"__time\",'MILLISECOND','UTC')", ValueType.LONG),
expressionVirtualColumn("v3", "timestamp_extract(\"__time\",'ISODOW','UTC')", ValueType.LONG),
expressionVirtualColumn("v4", "timestamp_extract(\"__time\",'ISOYEAR','UTC')", ValueType.LONG),
expressionVirtualColumn("v5", "timestamp_extract(\"__time\",'DECADE','UTC')", ValueType.LONG),
expressionVirtualColumn("v6", "timestamp_extract(\"__time\",'CENTURY','UTC')", ValueType.LONG),
expressionVirtualColumn("v7", "timestamp_extract(\"__time\",'MILLENNIUM','UTC')", ValueType.LONG)
)
.aggregators(aggregators(new CountAggregatorFactory("a0")))
.filters(
and(
selector("v0", "2000", null),
selector("v1", "946723", null),
selector("v2", "695", null),
selector("v3", "6", null),
selector("v4", "2000", null),
selector("v5", "200", null),
selector("v6", "21", null),
selector("v7", "2", null)
)
)
.context(TIMESERIES_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{1L}
)
);
}
@Test
public void testFilterOnTimeFloorMisaligned() throws Exception
{

View File

@ -149,6 +149,7 @@ public class CalciteTests
public static final String DATASOURCE1 = "foo";
public static final String DATASOURCE2 = "foo2";
public static final String DATASOURCE3 = "numfoo";
public static final String DATASOURCE4 = "foo4";
public static final String FORBIDDEN_DATASOURCE = "forbiddenDatasource";
public static final String TEST_SUPERUSER_NAME = "testSuperuser";
@ -425,6 +426,30 @@ public class CalciteTests
createRow("2000-01-01", "друид", "ru", 1.0)
);
public static final List<InputRow> ROWS1_WITH_FULL_TIMESTAMP = ImmutableList.of(
createRow(
ImmutableMap.<String, Object>builder()
.put("t", "2000-01-01T10:51:45.695Z")
.put("m1", "1.0")
.put("m2", "1.0")
.put("dim1", "")
.put("dim2", ImmutableList.of("a"))
.put("dim3", ImmutableList.of("a", "b"))
.build()
),
createRow(
ImmutableMap.<String, Object>builder()
.put("t", "2000-01-18T10:51:45.695Z")
.put("m1", "2.0")
.put("m2", "2.0")
.put("dim1", "10.1")
.put("dim2", ImmutableList.of())
.put("dim3", ImmutableList.of("b", "c"))
.build()
)
);
public static final List<InputRow> FORBIDDEN_ROWS = ImmutableList.of(
createRow("2000-01-01", "forbidden", "abcd", 9999.0)
);
@ -615,6 +640,15 @@ public class CalciteTests
.rows(ROWS1_WITH_NUMERIC_DIMS)
.buildMMappedIndex();
final QueryableIndex index4 = IndexBuilder
.create()
.tmpDir(new File(tmpDir, "4"))
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(INDEX_SCHEMA)
.rows(ROWS1_WITH_FULL_TIMESTAMP)
.buildMMappedIndex();
return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(
DataSegment.builder()
.dataSource(DATASOURCE1)
@ -646,6 +680,13 @@ public class CalciteTests
.shardSpec(new LinearShardSpec(0))
.build(),
indexNumericDims
).add(DataSegment.builder()
.dataSource(DATASOURCE4)
.interval(index4.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.build(),
index4
);
}