SQL: Add TIME_CEIL function. (#8027)

Also simplify conversions for CEIL, FLOOR, and TIME_FLOOR by allowing them to
share more code.
This commit is contained in:
Gian Merlino 2019-07-04 15:40:03 -07:00 committed by Fangjin Yang
parent 6760505a7e
commit 613f09b45a
8 changed files with 219 additions and 121 deletions

View File

@ -282,6 +282,7 @@ simplest way to write literal timestamps in other time zones is to use TIME_PARS
|`CURRENT_TIMESTAMP`|Current timestamp in the connection's time zone.|
|`CURRENT_DATE`|Current date in the connection's time zone.|
|`DATE_TRUNC(<unit>, <timestamp_expr>)`|Rounds down a timestamp, returning it as a new timestamp. Unit can be 'milliseconds', 'second', 'minute', 'hour', 'day', 'week', 'month', 'quarter', 'year', 'decade', 'century', or 'millenium'.|
|`TIME_CEIL(<timestamp_expr>, <period>, [<origin>, [<timezone>]])`|Rounds up a timestamp, returning it as a new timestamp. Period can be any ISO8601 period, like P3M (quarters) or PT12H (half-days). The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00". This function is similar to `CEIL` but is more flexible.|
|`TIME_FLOOR(<timestamp_expr>, <period>, [<origin>, [<timezone>]])`|Rounds down a timestamp, returning it as a new timestamp. Period can be any ISO8601 period, like P3M (quarters) or PT12H (half-days). The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00". This function is similar to `FLOOR` but is more flexible.|
|`TIME_SHIFT(<timestamp_expr>, <period>, <step>, [<timezone>])`|Shifts a timestamp by a period (step times), returning it as a new timestamp. Period can be any ISO8601 period. Step may be negative. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00".|
|`TIME_EXTRACT(<timestamp_expr>, [<unit>, [<timezone>]])`|Extracts a time part from expr, returning it as a number. Unit can be EPOCH, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), DOY (day of year), WEEK (week of [week year](https://en.wikipedia.org/wiki/ISO_week_date)), MONTH (1 through 12), QUARTER (1 through 4), or YEAR. The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00". This function is similar to `EXTRACT` but is more flexible. Unit and time zone must be literals, and must be provided quoted, like `TIME_EXTRACT(__time, 'HOUR')` or `TIME_EXTRACT(__time, 'HOUR', 'America/Los_Angeles')`.|

View File

@ -21,10 +21,9 @@ package org.apache.druid.sql.calcite.expression;
import com.google.common.collect.ImmutableMap;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import javax.annotation.Nullable;
import java.util.Map;
public class TimeUnits
@ -44,17 +43,12 @@ public class TimeUnits
* Returns the Druid QueryGranularity corresponding to a Calcite TimeUnitRange, or null if there is none.
*
* @param timeUnitRange time unit
* @param timeZone session time zone
*
* @return queryGranularity, or null
*/
public static PeriodGranularity toQueryGranularity(final TimeUnitRange timeUnitRange, final DateTimeZone timeZone)
@Nullable
public static Period toPeriod(final TimeUnitRange timeUnitRange)
{
final Period period = PERIOD_MAP.get(timeUnitRange);
if (period == null) {
return null;
}
return new PeriodGranularity(period, null, timeZone);
return PERIOD_MAP.get(timeUnitRange);
}
}

View File

@ -19,23 +19,17 @@
package org.apache.druid.sql.calcite.expression.builtin;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.expression.TimeUnits;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignature;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
public class CeilOperatorConversion implements SqlOperatorConversion
{
@ -46,6 +40,7 @@ public class CeilOperatorConversion implements SqlOperatorConversion
}
@Override
@Nullable
public DruidExpression toDruidExpression(
final PlannerContext plannerContext,
final RowSignature rowSignature,
@ -53,47 +48,18 @@ public class CeilOperatorConversion implements SqlOperatorConversion
)
{
final RexCall call = (RexCall) rexNode;
final RexNode arg = call.getOperands().get(0);
final DruidExpression druidExpression = Expressions.toDruidExpression(
plannerContext,
rowSignature,
arg
);
if (druidExpression == null) {
return null;
} else if (call.getOperands().size() == 1) {
// CEIL(expr)
return druidExpression.map(
simpleExtraction -> null,
expression -> StringUtils.format("ceil(%s)", expression)
);
} else if (call.getOperands().size() == 2) {
// CEIL(expr TO timeUnit)
final RexLiteral flag = (RexLiteral) call.getOperands().get(1);
final TimeUnitRange timeUnit = (TimeUnitRange) flag.getValue();
final PeriodGranularity granularity = TimeUnits.toQueryGranularity(timeUnit, plannerContext.getTimeZone());
if (granularity == null) {
return null;
}
// Unlike FLOOR(expr TO timeUnit) there is no built-in extractionFn that can behave like timestamp_ceil.
// So there is no simple extraction for this operator.
if (call.getOperands().size() == 1) {
// CEIL(expr) -- numeric CEIL
return OperatorConversions.convertCall(plannerContext, rowSignature, call, "ceil");
} else if (call.getOperands().size() == 2) {
// CEIL(expr TO timeUnit) -- time CEIL
return DruidExpression.fromFunctionCall(
"timestamp_ceil",
Stream
.of(
druidExpression.getExpression(),
DruidExpression.stringLiteral(granularity.getPeriod().toString()),
DruidExpression.numberLiteral(
granularity.getOrigin() == null ? null : granularity.getOrigin().getMillis()
),
DruidExpression.stringLiteral(granularity.getTimeZone().toString())
)
.map(DruidExpression::fromExpression)
.collect(Collectors.toList())
TimeFloorOperatorConversion.toTimestampFloorOrCeilArgs(plannerContext, rowSignature, call.getOperands())
);
} else {
// WTF? CEIL with 3 arguments?
// WTF? CEIL with the wrong number of arguments?
return null;
}
}

View File

@ -19,21 +19,18 @@
package org.apache.druid.sql.calcite.expression.builtin;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.expression.TimeUnits;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignature;
import javax.annotation.Nullable;
public class FloorOperatorConversion implements SqlOperatorConversion
{
@Override
@ -43,6 +40,7 @@ public class FloorOperatorConversion implements SqlOperatorConversion
}
@Override
@Nullable
public DruidExpression toDruidExpression(
final PlannerContext plannerContext,
final RowSignature rowSignature,
@ -50,36 +48,18 @@ public class FloorOperatorConversion implements SqlOperatorConversion
)
{
final RexCall call = (RexCall) rexNode;
final RexNode arg = call.getOperands().get(0);
final DruidExpression druidExpression = Expressions.toDruidExpression(
plannerContext,
rowSignature,
arg
);
if (druidExpression == null) {
return null;
} else if (call.getOperands().size() == 1) {
// FLOOR(expr)
return druidExpression.map(
simpleExtraction -> null, // BucketExtractionFn could do this, but it's lame since it returns strings.
expression -> StringUtils.format("floor(%s)", expression)
);
} else if (call.getOperands().size() == 2) {
// FLOOR(expr TO timeUnit)
final RexLiteral flag = (RexLiteral) call.getOperands().get(1);
final TimeUnitRange timeUnit = (TimeUnitRange) flag.getValue();
final PeriodGranularity granularity = TimeUnits.toQueryGranularity(timeUnit, plannerContext.getTimeZone());
if (granularity == null) {
return null;
}
return TimeFloorOperatorConversion.applyTimestampFloor(
druidExpression,
granularity,
plannerContext.getExprMacroTable()
if (call.getOperands().size() == 1) {
// FLOOR(expr) -- numeric FLOOR
return OperatorConversions.convertCall(plannerContext, rowSignature, call, "floor");
} else if (call.getOperands().size() == 2) {
// FLOOR(expr TO timeUnit) -- time FLOOR
return DruidExpression.fromFunctionCall(
"timestamp_floor",
TimeFloorOperatorConversion.toTimestampFloorOrCeilArgs(plannerContext, rowSignature, call.getOperands())
);
} else {
// WTF? FLOOR with 3 arguments?
// WTF? FLOOR with the wrong number of arguments?
return null;
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.sql.calcite.expression.builtin;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignature;
import javax.annotation.Nullable;
import java.util.List;
public class TimeCeilOperatorConversion implements SqlOperatorConversion
{
private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder("TIME_CEIL")
.operandTypes(SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER, SqlTypeFamily.TIMESTAMP, SqlTypeFamily.CHARACTER)
.requiredOperands(2)
.returnType(SqlTypeName.TIMESTAMP)
.functionCategory(SqlFunctionCategory.TIMEDATE)
.build();
@Override
public SqlOperator calciteOperator()
{
return SQL_FUNCTION;
}
@Override
@Nullable
public DruidExpression toDruidExpression(
final PlannerContext plannerContext,
final RowSignature rowSignature,
final RexNode rexNode
)
{
final RexCall call = (RexCall) rexNode;
final List<DruidExpression> functionArgs = TimeFloorOperatorConversion.toTimestampFloorOrCeilArgs(
plannerContext,
rowSignature,
call.getOperands()
);
if (functionArgs == null) {
return null;
}
return DruidExpression.fromFunctionCall("timestamp_ceil", functionArgs);
}
}

View File

@ -21,6 +21,7 @@ package org.apache.druid.sql.calcite.expression.builtin;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
@ -30,7 +31,6 @@ import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.expression.TimestampFloorExprMacro;
@ -38,13 +38,14 @@ import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
import org.apache.druid.sql.calcite.expression.SqlOperatorConversion;
import org.apache.druid.sql.calcite.expression.TimeUnits;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignature;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
@ -59,6 +60,11 @@ public class TimeFloorOperatorConversion implements SqlOperatorConversion
.functionCategory(SqlFunctionCategory.TIMEDATE)
.build();
/**
* Function that floors a DruidExpression to a particular granularity. Not actually used by the
* TimeFloorOperatorConversion, but I'm not sure where else to put this. It makes some sense in this file, since
* it's responsible for generating "timestamp_floor" calls.
*/
public static DruidExpression applyTimestampFloor(
final DruidExpression input,
final PeriodGranularity granularity,
@ -98,6 +104,74 @@ public class TimeFloorOperatorConversion implements SqlOperatorConversion
);
}
/**
* Function that converts SQL TIME_FLOOR or TIME_CEIL args to Druid expression "timestamp_floor" or "timestamp_ceil"
* args. The main reason this function is necessary is because the handling of origin and timezone must take into
* account the SQL context timezone. It also helps with handling SQL FLOOR and CEIL, by offering handling of
* TimeUnitRange args.
*/
@Nullable
public static List<DruidExpression> toTimestampFloorOrCeilArgs(
final PlannerContext plannerContext,
final RowSignature rowSignature,
final List<RexNode> operands
)
{
final List<DruidExpression> functionArgs = new ArrayList<>();
// Timestamp
functionArgs.add(Expressions.toDruidExpression(plannerContext, rowSignature, operands.get(0)));
// Period
final RexNode periodOperand = operands.get(1);
if (periodOperand.isA(SqlKind.LITERAL) && RexLiteral.value(periodOperand) instanceof TimeUnitRange) {
// TimeUnitRange literals are used by FLOOR(t TO unit) and CEIL(t TO unit)
final Period period = TimeUnits.toPeriod((TimeUnitRange) RexLiteral.value(periodOperand));
if (period == null) {
// Unrecognized time unit, bail out.
return null;
}
functionArgs.add(DruidExpression.fromExpression(DruidExpression.stringLiteral(period.toString())));
} else {
// Other literal types are used by TIME_FLOOR and TIME_CEIL
functionArgs.add(Expressions.toDruidExpression(plannerContext, rowSignature, periodOperand));
}
// Origin
functionArgs.add(
OperatorConversions.getOperandWithDefault(
operands,
2,
operand -> {
if (operand.isA(SqlKind.LITERAL)) {
return DruidExpression.fromExpression(
DruidExpression.numberLiteral(
Calcites.calciteDateTimeLiteralToJoda(operand, plannerContext.getTimeZone()).getMillis()
)
);
} else {
return Expressions.toDruidExpression(plannerContext, rowSignature, operand);
}
},
DruidExpression.fromExpression(DruidExpression.nullLiteral())
)
);
// Time zone
functionArgs.add(
OperatorConversions.getOperandWithDefault(
operands,
3,
operand -> Expressions.toDruidExpression(plannerContext, rowSignature, operand),
DruidExpression.fromExpression(DruidExpression.stringLiteral(plannerContext.getTimeZone().getID()))
)
);
return functionArgs.stream().noneMatch(Objects::isNull) ? functionArgs : null;
}
private static boolean periodIsDayMultiple(final Period period)
{
return period.getMillis() == 0
@ -114,6 +188,7 @@ public class TimeFloorOperatorConversion implements SqlOperatorConversion
}
@Override
@Nullable
public DruidExpression toDruidExpression(
final PlannerContext plannerContext,
final RowSignature rowSignature,
@ -121,40 +196,16 @@ public class TimeFloorOperatorConversion implements SqlOperatorConversion
)
{
final RexCall call = (RexCall) rexNode;
final List<RexNode> operands = call.getOperands();
final List<DruidExpression> druidExpressions = Expressions.toDruidExpressions(
final List<DruidExpression> functionArgs = toTimestampFloorOrCeilArgs(
plannerContext,
rowSignature,
operands
call.getOperands()
);
if (druidExpressions == null) {
if (functionArgs == null) {
return null;
} else if (operands.get(1).isA(SqlKind.LITERAL)
&& (operands.size() <= 2 || operands.get(2).isA(SqlKind.LITERAL))
&& (operands.size() <= 3 || operands.get(3).isA(SqlKind.LITERAL))) {
// Granularity is a literal. Special case since we can use an extractionFn here.
final Period period = new Period(RexLiteral.stringValue(operands.get(1)));
final DateTime origin = OperatorConversions.getOperandWithDefault(
call.getOperands(),
2,
operand -> Calcites.calciteDateTimeLiteralToJoda(operands.get(2), plannerContext.getTimeZone()),
null
);
final DateTimeZone timeZone = OperatorConversions.getOperandWithDefault(
call.getOperands(),
3,
operand -> DateTimes.inferTzFromString(RexLiteral.stringValue(operand)),
plannerContext.getTimeZone()
);
final PeriodGranularity granularity = new PeriodGranularity(period, origin, timeZone);
return applyTimestampFloor(druidExpressions.get(0), granularity, plannerContext.getExprMacroTable());
} else {
// Granularity is dynamic
return DruidExpression.fromFunctionCall("timestamp_floor", druidExpressions);
}
return DruidExpression.fromFunctionCall("timestamp_floor", functionArgs);
}
}

View File

@ -87,6 +87,7 @@ import org.apache.druid.sql.calcite.expression.builtin.StrposOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.SubstringOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TextcatOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeArithmeticOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeCeilOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeExtractOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeFloorOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeFormatOperatorConversion;
@ -186,6 +187,7 @@ public class DruidOperatorTable implements SqlOperatorTable
.add(new TimeArithmeticOperatorConversion.TimeMinusIntervalOperatorConversion())
.add(new TimeArithmeticOperatorConversion.TimePlusIntervalOperatorConversion())
.add(new TimeExtractOperatorConversion())
.add(new TimeCeilOperatorConversion())
.add(new TimeFloorOperatorConversion())
.add(new TimeFormatOperatorConversion())
.add(new TimeParseOperatorConversion())

View File

@ -53,6 +53,7 @@ import org.apache.druid.sql.calcite.expression.builtin.RightOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.RoundOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.StringFormatOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.StrposOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeCeilOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeExtractOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeFloorOperatorConversion;
import org.apache.druid.sql.calcite.expression.builtin.TimeFormatOperatorConversion;
@ -520,7 +521,8 @@ public class ExpressionsTest extends CalciteTestBase
final SqlFunction roundFunction = new RoundOperatorConversion().calciteOperator();
expectedException.expect(IAE.class);
expectedException.expectMessage("The first argument to the function[round] should be integer or double type but get the STRING type");
expectedException.expectMessage(
"The first argument to the function[round] should be integer or double type but get the STRING type");
testExpression(
rexBuilder.makeCall(roundFunction, inputRef("s")),
DruidExpression.fromExpression("round(\"s\")"),
@ -534,7 +536,8 @@ public class ExpressionsTest extends CalciteTestBase
final SqlFunction roundFunction = new RoundOperatorConversion().calciteOperator();
expectedException.expect(IAE.class);
expectedException.expectMessage("The second argument to the function[round] should be integer type but get the STRING type");
expectedException.expectMessage(
"The second argument to the function[round] should be integer type but get the STRING type");
testExpression(
rexBuilder.makeCall(roundFunction, inputRef("x"), rexBuilder.makeLiteral("foo")),
DruidExpression.fromExpression("round(\"x\",'foo')"),
@ -672,6 +675,32 @@ public class ExpressionsTest extends CalciteTestBase
);
}
@Test
public void testTimeCeil()
{
testExpression(
rexBuilder.makeCall(
new TimeCeilOperatorConversion().calciteOperator(),
timestampLiteral(DateTimes.of("2000-02-03T04:05:06Z")),
rexBuilder.makeLiteral("PT1H")
),
DruidExpression.fromExpression("timestamp_ceil(949550706000,'PT1H',null,'UTC')"),
DateTimes.of("2000-02-03T05:00:00").getMillis()
);
testExpression(
rexBuilder.makeCall(
new TimeCeilOperatorConversion().calciteOperator(),
inputRef("t"),
rexBuilder.makeLiteral("P1D"),
rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.TIMESTAMP)),
rexBuilder.makeLiteral("America/Los_Angeles")
),
DruidExpression.fromExpression("timestamp_ceil(\"t\",'P1D',null,'America/Los_Angeles')"),
DateTimes.of("2000-02-03T08:00:00").getMillis()
);
}
@Test
public void testOtherTimeCeil()
{