SQL: Allow NULLs in place of optional arguments in many functions. (#7709)

* SQL: Allow NULLs in place of optional arguments in many functions.

Also adjust SQL docs to describe how to make time literals using
TIME_PARSE (which is now possible in a nicer way).

* Be less forbidden.
This commit is contained in:
Gian Merlino 2019-05-21 11:54:34 -07:00 committed by Fangjin Yang
parent 43c54385f6
commit cbbce955de
3 changed files with 232 additions and 12 deletions

View File

@ -214,6 +214,10 @@ context parameter "sqlTimeZone" to the name of another time zone, like "America/
the connection time zone, some functions also accept time zones as parameters. These parameters always take precedence
over the connection time zone.
Literal timestamps in the connection time zone can be written using `TIMESTAMP '2000-01-01 00:00:00'` syntax. The
simplest way to write literal timestamps in other time zones is to use TIME_PARSE, like
`TIME_PARSE('2000-02-01 00:00:00', NULL, 'America/Los_Angeles')`.
|Function|Notes|
|--------|-----|
|`CURRENT_TIMESTAMP`|Current timestamp in the connection's time zone.|
@ -291,11 +295,12 @@ Additionally, some Druid features are not supported by the SQL language. Some un
Druid natively supports five basic column types: "long" (64 bit signed int), "float" (32 bit float), "double" (64 bit
float) "string" (UTF-8 encoded strings), and "complex" (catch-all for more exotic data types like hyperUnique and
approxHistogram columns). Timestamps (including the `__time` column) are stored as longs, with the value being the
number of milliseconds since 1 January 1970 UTC.
approxHistogram columns).
At runtime, Druid may widen 32-bit floats to 64-bit for certain operators, like SUM aggregators. The reverse will not
happen: 64-bit floats are not be narrowed to 32-bit.
Timestamps (including the `__time` column) are treated by Druid as longs, with the value being the number of
milliseconds since 1970-01-01 00:00:00 UTC, not counting leap seconds. Therefore, timestamps in Druid do not carry any
timezone information, but only carry information about the exact moment in time they represent. See the
[Time functions](#time-functions) section for more information about timestamp handling.
Druid generally treats NULLs and empty strings interchangeably, rather than according to the SQL standard. As such,
Druid SQL only has partial support for NULLs. For example, the expressions `col IS NULL` and `col = ''` are equivalent,
@ -307,7 +312,7 @@ datasource, then it will be treated as zero for rows from those segments.
For mathematical operations, Druid SQL will use integer math if all operands involved in an expression are integers.
Otherwise, Druid will switch to floating point math. You can force this to happen by casting one of your operands
to FLOAT.
to FLOAT. At runtime, Druid may widen 32-bit floats to 64-bit for certain operators, like SUM aggregators.
The following table describes how SQL types map onto Druid types during query runtime. Casts between two SQL types
that have the same Druid runtime type will have no effect, other than exceptions noted in the table. Casts between two

View File

@ -20,18 +20,29 @@
package org.apache.druid.sql.calcite.expression;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import it.unimi.dsi.fastutil.ints.IntArraySet;
import it.unimi.dsi.fastutil.ints.IntSet;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlCallBinding;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperandCountRange;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlUtil;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlOperandCountRanges;
import org.apache.calcite.sql.type.SqlOperandTypeChecker;
import org.apache.calcite.sql.type.SqlOperandTypeInference;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.util.Static;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext;
@ -41,6 +52,7 @@ import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.List;
import java.util.function.Function;
import java.util.stream.IntStream;
/**
* Utilities for assisting in writing {@link SqlOperatorConversion} implementations.
@ -131,7 +143,7 @@ public class OperatorConversions
public static class OperatorBuilder
{
private String name;
private final String name;
private SqlKind kind = SqlKind.OTHER_FUNCTION;
private SqlReturnTypeInference returnTypeInference;
private SqlFunctionCategory functionCategory = SqlFunctionCategory.USER_DEFINED_FUNCTION;
@ -200,27 +212,195 @@ public class OperatorConversions
public SqlFunction build()
{
// Create "nullableOperands" set including all optional arguments.
final IntSet nullableOperands = new IntArraySet();
if (requiredOperands != null) {
IntStream.range(requiredOperands, operandTypes.size()).forEach(nullableOperands::add);
}
final SqlOperandTypeChecker theOperandTypeChecker;
if (operandTypeChecker == null) {
theOperandTypeChecker = OperandTypes.family(
Preconditions.checkNotNull(operandTypes, "operandTypes"),
i -> requiredOperands == null || i + 1 > requiredOperands
theOperandTypeChecker = new DefaultOperandTypeChecker(
operandTypes,
requiredOperands == null ? operandTypes.size() : requiredOperands,
nullableOperands
);
} else if (operandTypes == null && requiredOperands == null) {
theOperandTypeChecker = operandTypeChecker;
} else {
throw new ISE("Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands'");
throw new ISE(
"Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands'"
);
}
return new SqlFunction(
name,
kind,
Preconditions.checkNotNull(returnTypeInference, "returnTypeInference"),
null,
new DefaultOperandTypeInference(operandTypes, nullableOperands),
theOperandTypeChecker,
functionCategory
);
}
}
/**
* Return the default, inferred specific type for a parameter that has a particular type family. Used to infer
* the type of NULL literals.
*/
private static SqlTypeName defaultTypeForFamily(final SqlTypeFamily family)
{
switch (family) {
case NUMERIC:
case APPROXIMATE_NUMERIC:
return SqlTypeName.DOUBLE;
case INTEGER:
case EXACT_NUMERIC:
return SqlTypeName.BIGINT;
case CHARACTER:
return SqlTypeName.VARCHAR;
case TIMESTAMP:
return SqlTypeName.TIMESTAMP;
default:
// No good default type for this family; just return the first one (or NULL, if empty).
return Iterables.getFirst(family.getTypeNames(), SqlTypeName.NULL);
}
}
/**
* Operand type inference that simply reports the types derived by the validator.
*
* We do this so that Calcite will allow NULL literals for type-checked operands. Otherwise, it will not be able to
* infer their types, and it will report them as NULL types, which will make operand type checking fail.
*/
private static class DefaultOperandTypeInference implements SqlOperandTypeInference
{
private final List<SqlTypeFamily> operandTypes;
private final IntSet nullableOperands;
DefaultOperandTypeInference(final List<SqlTypeFamily> operandTypes, final IntSet nullableOperands)
{
this.operandTypes = operandTypes;
this.nullableOperands = nullableOperands;
}
@Override
public void inferOperandTypes(
final SqlCallBinding callBinding,
final RelDataType returnType,
final RelDataType[] operandTypesOut
)
{
for (int i = 0; i < operandTypesOut.length; i++) {
final RelDataType derivedType = callBinding.getValidator()
.deriveType(callBinding.getScope(), callBinding.operand(i));
final RelDataType inferredType;
if (derivedType.getSqlTypeName() != SqlTypeName.NULL) {
// We could derive a non-NULL type; retain it.
inferredType = derivedType;
} else {
// We couldn't derive a non-NULL type; infer the default for the operand type family.
if (nullableOperands.contains(i)) {
inferredType = Calcites.createSqlTypeWithNullability(
callBinding.getTypeFactory(),
defaultTypeForFamily(operandTypes.get(i)),
true
);
} else {
inferredType = callBinding.getValidator().getUnknownType();
}
}
operandTypesOut[i] = inferredType;
}
}
}
/**
* Operand type checker that is used in 'simple' situations: there are a particular number of operands, with
* particular types, some of which may be optional or nullable.
*/
private static class DefaultOperandTypeChecker implements SqlOperandTypeChecker
{
private final List<SqlTypeFamily> operandTypes;
private final int requiredOperands;
private final IntSet nullableOperands;
DefaultOperandTypeChecker(
final List<SqlTypeFamily> operandTypes,
final int requiredOperands,
final IntSet nullableOperands
)
{
Preconditions.checkArgument(requiredOperands <= operandTypes.size() && requiredOperands >= 0);
this.operandTypes = Preconditions.checkNotNull(operandTypes, "operandTypes");
this.requiredOperands = requiredOperands;
this.nullableOperands = Preconditions.checkNotNull(nullableOperands, "nullableOperands");
}
@Override
public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure)
{
if (operandTypes.size() != callBinding.getOperandCount()) {
// Just like FamilyOperandTypeChecker: assume this is an inapplicable sub-rule of a composite rule; don't throw
return false;
}
for (int i = 0; i < callBinding.operands().size(); i++) {
final SqlNode operand = callBinding.operands().get(i);
final RelDataType operandType = callBinding.getValidator().deriveType(callBinding.getScope(), operand);
final SqlTypeFamily expectedFamily = operandTypes.get(i);
if (expectedFamily == SqlTypeFamily.ANY) {
// ANY matches anything. This operand is all good; do nothing.
} else if (expectedFamily.getTypeNames().contains(operandType.getSqlTypeName())) {
// Operand came in with one of the expected types.
} else if (operandType.getSqlTypeName() == SqlTypeName.NULL) {
// Null came in, check if operand is a nullable type.
if (!nullableOperands.contains(i)) {
if (throwOnFailure) {
throw callBinding.getValidator().newValidationError(operand, Static.RESOURCE.nullIllegal());
} else {
return false;
}
}
} else {
if (throwOnFailure) {
throw callBinding.newValidationSignatureError();
} else {
return false;
}
}
}
return true;
}
@Override
public SqlOperandCountRange getOperandCountRange()
{
return SqlOperandCountRanges.between(requiredOperands, operandTypes.size());
}
@Override
public String getAllowedSignatures(SqlOperator op, String opName)
{
return SqlUtil.getAliasedSignature(op, opName, operandTypes);
}
@Override
public Consistency getConsistency()
{
return Consistency.NONE;
}
@Override
public boolean isOptional(int i)
{
return i + 1 > requiredOperands;
}
}
}

View File

@ -70,9 +70,11 @@ import org.apache.druid.query.topn.TopNQueryBuilder;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.rel.CannotBuildQueryException;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.hamcrest.CoreMatchers;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Interval;
import org.joda.time.Period;
@ -5295,6 +5297,39 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testGroupAndFilterOnTimeFloorWithTimeZone() throws Exception
{
testQuery(
"SELECT TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles'), COUNT(*)\n"
+ "FROM druid.foo\n"
+ "WHERE\n"
+ "TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles') = "
+ " TIME_PARSE('2000-01-01 00:00:00', NULL, 'America/Los_Angeles')\n"
+ "OR TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles') = "
+ " TIME_PARSE('2000-02-01 00:00:00', NULL, 'America/Los_Angeles')\n"
+ "GROUP BY 1",
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Intervals.of("2000-01-01T00-08:00/2000-03-01T00-08:00")))
.granularity(new PeriodGranularity(Period.months(1), null, DateTimes.inferTzFromString(LOS_ANGELES)))
.aggregators(aggregators(new CountAggregatorFactory("a0")))
.context(TIMESERIES_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{
Calcites.jodaToCalciteTimestamp(
new DateTime("2000-01-01", DateTimes.inferTzFromString(LOS_ANGELES)),
DateTimeZone.UTC
),
2L
}
)
);
}
@Test
public void testFilterOnCurrentTimestampWithIntervalArithmetic() throws Exception
{