SQL: Allow NULLs in place of optional arguments in many functions. (#7709)

* SQL: Allow NULLs in place of optional arguments in many functions. Also adjust SQL docs to describe how to make time literals using TIME_PARSE (which is now possible in a nicer way). * Be less forbidden.
2019-05-21 11:54:34 -07:00 · 2019-05-21 11:54:34 -07:00 · cbbce955de
parent 43c54385f6
commit cbbce955de
3 changed files with 232 additions and 12 deletions
--- a/docs/content/querying/sql.md
+++ b/docs/content/querying/sql.md
@ -214,6 +214,10 @@ context parameter "sqlTimeZone" to the name of another time zone, like "America/
 the connection time zone, some functions also accept time zones as parameters. These parameters always take precedence
 over the connection time zone.

+Literal timestamps in the connection time zone can be written using `TIMESTAMP '2000-01-01 00:00:00'` syntax. The
+simplest way to write literal timestamps in other time zones is to use TIME_PARSE, like
+`TIME_PARSE('2000-02-01 00:00:00', NULL, 'America/Los_Angeles')`.
+
 |Function|Notes|
 |--------|-----|
 |`CURRENT_TIMESTAMP`|Current timestamp in the connection's time zone.|
@ -291,11 +295,12 @@ Additionally, some Druid features are not supported by the SQL language. Some un

 Druid natively supports five basic column types: "long" (64 bit signed int), "float" (32 bit float), "double" (64 bit
 float) "string" (UTF-8 encoded strings), and "complex" (catch-all for more exotic data types like hyperUnique and
-approxHistogram columns). Timestamps (including the `__time` column) are stored as longs, with the value being the
-number of milliseconds since 1 January 1970 UTC.
+approxHistogram columns).

-At runtime, Druid may widen 32-bit floats to 64-bit for certain operators, like SUM aggregators. The reverse will not
-happen: 64-bit floats are not be narrowed to 32-bit.
+Timestamps (including the `__time` column) are treated by Druid as longs, with the value being the number of
+milliseconds since 1970-01-01 00:00:00 UTC, not counting leap seconds. Therefore, timestamps in Druid do not carry any
+timezone information, but only carry information about the exact moment in time they represent. See the
+[Time functions](#time-functions) section for more information about timestamp handling.

 Druid generally treats NULLs and empty strings interchangeably, rather than according to the SQL standard. As such,
 Druid SQL only has partial support for NULLs. For example, the expressions `col IS NULL` and `col = ''` are equivalent,
@ -307,7 +312,7 @@ datasource, then it will be treated as zero for rows from those segments.

 For mathematical operations, Druid SQL will use integer math if all operands involved in an expression are integers.
 Otherwise, Druid will switch to floating point math. You can force this to happen by casting one of your operands
-to FLOAT.
+to FLOAT. At runtime, Druid may widen 32-bit floats to 64-bit for certain operators, like SUM aggregators.

 The following table describes how SQL types map onto Druid types during query runtime. Casts between two SQL types
 that have the same Druid runtime type will have no effect, other than exceptions noted in the table. Casts between two
--- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/OperatorConversions.java
@ -20,18 +20,29 @@
 package org.apache.druid.sql.calcite.expression;

 import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+import it.unimi.dsi.fastutil.ints.IntArraySet;
+import it.unimi.dsi.fastutil.ints.IntSet;
+import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlCallBinding;
 import org.apache.calcite.sql.SqlFunction;
 import org.apache.calcite.sql.SqlFunctionCategory;
 import org.apache.calcite.sql.SqlKind;
-import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.SqlUtil;
 import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandCountRanges;
 import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.type.SqlTypeFamily;
 import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.Static;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.sql.calcite.planner.Calcites;
 import org.apache.druid.sql.calcite.planner.PlannerContext;
@ -41,6 +52,7 @@ import javax.annotation.Nullable;
 import java.util.Arrays;
 import java.util.List;
 import java.util.function.Function;
+import java.util.stream.IntStream;

 /**
 * Utilities for assisting in writing {@link SqlOperatorConversion} implementations.
@ -131,7 +143,7 @@ public class OperatorConversions

  public static class OperatorBuilder
  {
-    private String name;
+    private final String name;
    private SqlKind kind = SqlKind.OTHER_FUNCTION;
    private SqlReturnTypeInference returnTypeInference;
    private SqlFunctionCategory functionCategory = SqlFunctionCategory.USER_DEFINED_FUNCTION;
@ -200,27 +212,195 @@ public class OperatorConversions

    public SqlFunction build()
    {
+      // Create "nullableOperands" set including all optional arguments.
+      final IntSet nullableOperands = new IntArraySet();
+      if (requiredOperands != null) {
+        IntStream.range(requiredOperands, operandTypes.size()).forEach(nullableOperands::add);
+      }
+
      final SqlOperandTypeChecker theOperandTypeChecker;

      if (operandTypeChecker == null) {
-        theOperandTypeChecker = OperandTypes.family(
-            Preconditions.checkNotNull(operandTypes, "operandTypes"),
-            i -> requiredOperands == null || i + 1 > requiredOperands
+        theOperandTypeChecker = new DefaultOperandTypeChecker(
+            operandTypes,
+            requiredOperands == null ? operandTypes.size() : requiredOperands,
+            nullableOperands
        );
      } else if (operandTypes == null && requiredOperands == null) {
        theOperandTypeChecker = operandTypeChecker;
      } else {
-        throw new ISE("Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands'");
+        throw new ISE(
+            "Cannot have both 'operandTypeChecker' and 'operandTypes' / 'requiredOperands'"
+        );
      }

      return new SqlFunction(
          name,
          kind,
          Preconditions.checkNotNull(returnTypeInference, "returnTypeInference"),
-          null,
+          new DefaultOperandTypeInference(operandTypes, nullableOperands),
          theOperandTypeChecker,
          functionCategory
      );
    }
  }
+
+  /**
+   * Return the default, inferred specific type for a parameter that has a particular type family. Used to infer
+   * the type of NULL literals.
+   */
+  private static SqlTypeName defaultTypeForFamily(final SqlTypeFamily family)
+  {
+    switch (family) {
+      case NUMERIC:
+      case APPROXIMATE_NUMERIC:
+        return SqlTypeName.DOUBLE;
+      case INTEGER:
+      case EXACT_NUMERIC:
+        return SqlTypeName.BIGINT;
+      case CHARACTER:
+        return SqlTypeName.VARCHAR;
+      case TIMESTAMP:
+        return SqlTypeName.TIMESTAMP;
+      default:
+        // No good default type for this family; just return the first one (or NULL, if empty).
+        return Iterables.getFirst(family.getTypeNames(), SqlTypeName.NULL);
+    }
+  }
+
+  /**
+   * Operand type inference that simply reports the types derived by the validator.
+   *
+   * We do this so that Calcite will allow NULL literals for type-checked operands. Otherwise, it will not be able to
+   * infer their types, and it will report them as NULL types, which will make operand type checking fail.
+   */
+  private static class DefaultOperandTypeInference implements SqlOperandTypeInference
+  {
+    private final List<SqlTypeFamily> operandTypes;
+    private final IntSet nullableOperands;
+
+    DefaultOperandTypeInference(final List<SqlTypeFamily> operandTypes, final IntSet nullableOperands)
+    {
+      this.operandTypes = operandTypes;
+      this.nullableOperands = nullableOperands;
+    }
+
+    @Override
+    public void inferOperandTypes(
+        final SqlCallBinding callBinding,
+        final RelDataType returnType,
+        final RelDataType[] operandTypesOut
+    )
+    {
+      for (int i = 0; i < operandTypesOut.length; i++) {
+        final RelDataType derivedType = callBinding.getValidator()
+                                                   .deriveType(callBinding.getScope(), callBinding.operand(i));
+
+        final RelDataType inferredType;
+
+        if (derivedType.getSqlTypeName() != SqlTypeName.NULL) {
+          // We could derive a non-NULL type; retain it.
+          inferredType = derivedType;
+        } else {
+          // We couldn't derive a non-NULL type; infer the default for the operand type family.
+          if (nullableOperands.contains(i)) {
+            inferredType = Calcites.createSqlTypeWithNullability(
+                callBinding.getTypeFactory(),
+                defaultTypeForFamily(operandTypes.get(i)),
+                true
+            );
+          } else {
+            inferredType = callBinding.getValidator().getUnknownType();
+          }
+        }
+
+        operandTypesOut[i] = inferredType;
+      }
+    }
+  }
+
+  /**
+   * Operand type checker that is used in 'simple' situations: there are a particular number of operands, with
+   * particular types, some of which may be optional or nullable.
+   */
+  private static class DefaultOperandTypeChecker implements SqlOperandTypeChecker
+  {
+    private final List<SqlTypeFamily> operandTypes;
+    private final int requiredOperands;
+    private final IntSet nullableOperands;
+
+    DefaultOperandTypeChecker(
+        final List<SqlTypeFamily> operandTypes,
+        final int requiredOperands,
+        final IntSet nullableOperands
+    )
+    {
+      Preconditions.checkArgument(requiredOperands <= operandTypes.size() && requiredOperands >= 0);
+      this.operandTypes = Preconditions.checkNotNull(operandTypes, "operandTypes");
+      this.requiredOperands = requiredOperands;
+      this.nullableOperands = Preconditions.checkNotNull(nullableOperands, "nullableOperands");
+    }
+
+    @Override
+    public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure)
+    {
+      if (operandTypes.size() != callBinding.getOperandCount()) {
+        // Just like FamilyOperandTypeChecker: assume this is an inapplicable sub-rule of a composite rule; don't throw
+        return false;
+      }
+
+      for (int i = 0; i < callBinding.operands().size(); i++) {
+        final SqlNode operand = callBinding.operands().get(i);
+        final RelDataType operandType = callBinding.getValidator().deriveType(callBinding.getScope(), operand);
+        final SqlTypeFamily expectedFamily = operandTypes.get(i);
+
+        if (expectedFamily == SqlTypeFamily.ANY) {
+          // ANY matches anything. This operand is all good; do nothing.
+        } else if (expectedFamily.getTypeNames().contains(operandType.getSqlTypeName())) {
+          // Operand came in with one of the expected types.
+        } else if (operandType.getSqlTypeName() == SqlTypeName.NULL) {
+          // Null came in, check if operand is a nullable type.
+          if (!nullableOperands.contains(i)) {
+            if (throwOnFailure) {
+              throw callBinding.getValidator().newValidationError(operand, Static.RESOURCE.nullIllegal());
+            } else {
+              return false;
+            }
+          }
+        } else {
+          if (throwOnFailure) {
+            throw callBinding.newValidationSignatureError();
+          } else {
+            return false;
+          }
+        }
+      }
+
+      return true;
+    }
+
+    @Override
+    public SqlOperandCountRange getOperandCountRange()
+    {
+      return SqlOperandCountRanges.between(requiredOperands, operandTypes.size());
+    }
+
+    @Override
+    public String getAllowedSignatures(SqlOperator op, String opName)
+    {
+      return SqlUtil.getAliasedSignature(op, opName, operandTypes);
+    }
+
+    @Override
+    public Consistency getConsistency()
+    {
+      return Consistency.NONE;
+    }
+
+    @Override
+    public boolean isOptional(int i)
+    {
+      return i + 1 > requiredOperands;
+    }
+  }
 }
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@ -70,9 +70,11 @@ import org.apache.druid.query.topn.TopNQueryBuilder;
 import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.sql.calcite.expression.DruidExpression;
 import org.apache.druid.sql.calcite.filtration.Filtration;
+import org.apache.druid.sql.calcite.planner.Calcites;
 import org.apache.druid.sql.calcite.rel.CannotBuildQueryException;
 import org.apache.druid.sql.calcite.util.CalciteTests;
 import org.hamcrest.CoreMatchers;
+import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 import org.joda.time.Interval;
 import org.joda.time.Period;
@ -5295,6 +5297,39 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
    );
  }

+  @Test
+  public void testGroupAndFilterOnTimeFloorWithTimeZone() throws Exception
+  {
+    testQuery(
+        "SELECT TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles'), COUNT(*)\n"
+        + "FROM druid.foo\n"
+        + "WHERE\n"
+        + "TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles') = "
+        + "  TIME_PARSE('2000-01-01 00:00:00', NULL, 'America/Los_Angeles')\n"
+        + "OR TIME_FLOOR(__time, 'P1M', NULL, 'America/Los_Angeles') = "
+        + "  TIME_PARSE('2000-02-01 00:00:00', NULL, 'America/Los_Angeles')\n"
+        + "GROUP BY 1",
+        ImmutableList.of(
+            Druids.newTimeseriesQueryBuilder()
+                  .dataSource(CalciteTests.DATASOURCE1)
+                  .intervals(querySegmentSpec(Intervals.of("2000-01-01T00-08:00/2000-03-01T00-08:00")))
+                  .granularity(new PeriodGranularity(Period.months(1), null, DateTimes.inferTzFromString(LOS_ANGELES)))
+                  .aggregators(aggregators(new CountAggregatorFactory("a0")))
+                  .context(TIMESERIES_CONTEXT_DEFAULT)
+                  .build()
+        ),
+        ImmutableList.of(
+            new Object[]{
+                Calcites.jodaToCalciteTimestamp(
+                    new DateTime("2000-01-01", DateTimes.inferTzFromString(LOS_ANGELES)),
+                    DateTimeZone.UTC
+                ),
+                2L
+            }
+        )
+    );
+  }
+
  @Test
  public void testFilterOnCurrentTimestampWithIntervalArithmetic() throws Exception
  {