diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index 64db457f208..5fc88491b2d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -28,6 +28,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -519,18 +520,18 @@ public class Expressions // column instead for filtering to ensure that results are correct if (druidExpression.isSimpleExtraction() && !(isOutputNumeric && !rowSignature.isNumeric(druidExpression.getDirectColumn()))) { - if (!plannerContext.isUseBoundsAndSelectors()) { - if (druidExpression.getSimpleExtraction().getExtractionFn() != null) { - // return null to fallback to using an expression filter - return null; - } - equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn()); - } else { + if (plannerContext.isUseBoundsAndSelectors()) { equalFilter = new SelectorDimFilter( druidExpression.getSimpleExtraction().getColumn(), NullHandling.defaultStringValue(), druidExpression.getSimpleExtraction().getExtractionFn() ); + } else { + if (druidExpression.getSimpleExtraction().getExtractionFn() != null) { + // return null to fallback to using an expression filter + return null; + } + equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn()); } } else if (virtualColumnRegistry != null) { final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( @@ -538,14 +539,10 @@ public class Expressions operand.getType() ); - if (!plannerContext.isUseBoundsAndSelectors()) { - equalFilter = NullFilter.forColumn(virtualColumn); + if (plannerContext.isUseBoundsAndSelectors()) { + equalFilter = new SelectorDimFilter(virtualColumn, NullHandling.defaultStringValue(), null); } else { - equalFilter = new SelectorDimFilter( - virtualColumn, - NullHandling.defaultStringValue(), - null - ); + equalFilter = NullFilter.forColumn(virtualColumn); } } else { return null; @@ -601,11 +598,11 @@ public class Expressions } final DruidExpression rhsExpression = toDruidExpression(plannerContext, rowSignature, rhs); - final Expr parsedRhsExpression = rhsExpression != null + final Expr rhsParsed = rhsExpression != null ? plannerContext.parseExpression(rhsExpression.getExpression()) : null; // rhs must be a literal - if (rhs.getKind() != SqlKind.LITERAL && (parsedRhsExpression == null || !parsedRhsExpression.isLiteral())) { + if (rhsParsed == null || !rhsParsed.isLiteral()) { return null; } @@ -651,43 +648,37 @@ public class Expressions final Granularity granularity = ExtractionFns.toQueryGranularity(extractionFn); if (granularity != null) { - // lhs is FLOOR(__time TO granularity); rhs must be a timestamp + // lhs is FLOOR(__time TO granularity); rhs must be a timestamp. final long rhsMillis = Calcites.calciteDateTimeLiteralToJoda(rhs, plannerContext.getTimeZone()).getMillis(); - final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); - - // Is rhs aligned on granularity boundaries? - final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - - if (plannerContext.isUseBoundsAndSelectors()) { - // Create a BoundRefKey that strips the extractionFn and compares __time as a number. - final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); - - return getBoundTimeDimFilter(flippedKind, boundRefKey, rhsInterval, rhsAligned); - } else { - final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG); - return getRangeTimeDimFilter(flippedKind, rangeRefKey, rhsInterval, rhsAligned); - } + return buildTimeFloorFilter(column, granularity, flippedKind, rhsMillis, plannerContext); } } - if (plannerContext.isUseBoundsAndSelectors() && rhs instanceof RexLiteral) { - final String val; - final RexLiteral rhsLiteral = (RexLiteral) rhs; - if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) { - val = String.valueOf(RexLiteral.value(rhsLiteral)); - } else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) { - val = String.valueOf(RexLiteral.stringValue(rhsLiteral)); - } else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) { - val = String.valueOf( - Calcites.calciteDateTimeLiteralToJoda( - rhsLiteral, - plannerContext.getTimeZone() - ).getMillis() - ); - } else { - // Don't know how to filter on this kind of literal. + final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType()); + + if (plannerContext.isUseBoundsAndSelectors()) { + if (matchValueType == null || !matchValueType.isPrimitive()) { + // Fall back to expression filter. return null; } + + final String stringVal; + + if (rhsParsed.getLiteralValue() == null) { + stringVal = NullHandling.defaultStringValue(); + } else if (RexUtil.isLiteral(rhs, true) && SqlTypeName.NUMERIC_TYPES.contains(rhs.getType().getSqlTypeName())) { + // Peek inside the original rhs for numerics, rather than using the parsed version, for highest fidelity + // to what the query originally contained. (It may be a BigDecimal.) + stringVal = String.valueOf(RexLiteral.value(rhs)); + } else { + stringVal = String.valueOf(rhsParsed.getLiteralValue()); + } + + if (stringVal == null) { + // Fall back to expression filter. + return null; + } + // Numeric lhs needs a numeric comparison. final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType()); final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator); @@ -696,22 +687,22 @@ public class Expressions // Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator). switch (flippedKind) { case EQUALS: - filter = Bounds.equalTo(boundRefKey, val); + filter = Bounds.equalTo(boundRefKey, stringVal); break; case NOT_EQUALS: - filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val)); + filter = new NotDimFilter(Bounds.equalTo(boundRefKey, stringVal)); break; case GREATER_THAN: - filter = Bounds.greaterThan(boundRefKey, val); + filter = Bounds.greaterThan(boundRefKey, stringVal); break; case GREATER_THAN_OR_EQUAL: - filter = Bounds.greaterThanOrEqualTo(boundRefKey, val); + filter = Bounds.greaterThanOrEqualTo(boundRefKey, stringVal); break; case LESS_THAN: - filter = Bounds.lessThan(boundRefKey, val); + filter = Bounds.lessThan(boundRefKey, stringVal); break; case LESS_THAN_OR_EQUAL: - filter = Bounds.lessThanOrEqualTo(boundRefKey, val); + filter = Bounds.lessThanOrEqualTo(boundRefKey, stringVal); break; default: throw new IllegalStateException("Shouldn't have got here"); @@ -719,20 +710,13 @@ public class Expressions return filter; } else { - //noinspection VariableNotUsedInsideIf - if (extractionFn != null) { + final Object val = rhsParsed.getLiteralValue(); + + if (extractionFn != null || val == null) { // fall back to expression filter return null; } - final Object val; - if (parsedRhsExpression != null && parsedRhsExpression.isLiteral()) { - val = parsedRhsExpression.getLiteralValue(); - } else { - // Don't know how to filter on this kind of literal. - return null; - } - final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType()); final RangeRefKey rangeRefKey = new RangeRefKey(column, matchValueType); final DimFilter filter; @@ -850,26 +834,20 @@ public class Expressions final PlannerContext plannerContext ) { + final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); + + // Is rhs aligned on granularity boundaries? + final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; + if (plannerContext.isUseBoundsAndSelectors()) { final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); - final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); - - // Is rhs aligned on granularity boundaries? - final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - return getBoundTimeDimFilter(operatorKind, boundRefKey, rhsInterval, rhsAligned); } else { final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG); - final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); - - // Is rhs aligned on granularity boundaries? - final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - return getRangeTimeDimFilter(operatorKind, rangeRefKey, rhsInterval, rhsAligned); } } - private static DimFilter getBoundTimeDimFilter( SqlKind operatorKind, BoundRefKey boundRefKey, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java index 15819d1e155..cffee832535 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java @@ -41,9 +41,8 @@ import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; import javax.annotation.Nullable; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOperatorConversion { @@ -98,7 +97,9 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper // if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into // selector/equality filters on the individual array elements - if (leftExpr.isSimpleExtraction() && !(leftExpr.isDirectColumnAccess() && leftExpr.getDruidType() != null && leftExpr.getDruidType().isArray())) { + if (leftExpr.isSimpleExtraction() + && !isArray(leftExpr) + && (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) { Expr expr = plannerContext.parseExpression(rightExpr.getExpression()); // To convert this expression filter into an And of Selector filters, we need to extract all array elements. // For now, we can optimize only when rightExpr is a literal because there is no way to extract the array elements @@ -115,37 +116,34 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper // However, since both Calcite's SqlMultisetValueConstructor and Druid's ArrayConstructorFunction don't allow // to create an empty array with no argument, we just return null. return null; - } else if (arrayElements.length == 1) { - if (plannerContext.isUseBoundsAndSelectors()) { - return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); - } else { - return new EqualityFilter( - leftExpr.getSimpleExtraction().getColumn(), - ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())), - arrayElements[0], - null - ); - } } else { - final List selectFilters = Arrays - .stream(arrayElements) - .map(val -> { - if (plannerContext.isUseBoundsAndSelectors()) { - return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val)); - } else { - return new EqualityFilter( + final List filters = new ArrayList<>(arrayElements.length); + + for (final Object val : arrayElements) { + if (plannerContext.isUseBoundsAndSelectors()) { + filters.add(newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val))); + } else { + // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above. + filters.add( + new EqualityFilter( leftExpr.getSimpleExtraction().getColumn(), ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())), val, null - ); - } - }) - .collect(Collectors.toList()); - return new AndDimFilter(selectFilters); + ) + ); + } + } + + return filters.size() == 1 ? filters.get(0) : new AndDimFilter(filters); } } } return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions); } + + private static boolean isArray(final DruidExpression expr) + { + return expr.getDruidType() != null && expr.getDruidType().isArray(); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index ea2c590a2f9..8e09ea0c734 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -114,7 +114,7 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera } Expr expr = plannerContext.parseExpression(complexExpr.getExpression()); - if (expr.isLiteral()) { + if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) { // Evaluate the expression to take out the array elements. // We can safely pass null if the expression is literal. ExprEval exprEval = expr.eval(InputBindings.nilBindings()); @@ -129,6 +129,7 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera if (plannerContext.isUseBoundsAndSelectors()) { return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); } else { + // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above. return new EqualityFilter( simpleExtractionExpr.getSimpleExtraction().getColumn(), ExpressionType.toColumnType(exprEval.type()), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java index e980ce91c6e..6d5db69f9a9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java @@ -119,11 +119,7 @@ public class CaseOperatorConversion implements SqlOperatorConversion DimFilter thenFilter = null, elseFilter = null; final DimFilter isNull; if (plannerContext.isUseBoundsAndSelectors()) { - isNull = new SelectorDimFilter( - condition.getArguments().get(0).getDirectColumn(), - null, - null - ); + isNull = new SelectorDimFilter(condition.getArguments().get(0).getDirectColumn(), null, null); } else { isNull = NullFilter.forColumn(condition.getArguments().get(0).getDirectColumn()); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java index 7c066e663c4..2e634e69b89 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java @@ -44,6 +44,7 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.math.expr.ExpressionProcessing; +import org.apache.druid.math.expr.ExpressionProcessingConfig; import org.apache.druid.query.ordering.StringComparator; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.ColumnType; @@ -132,9 +133,15 @@ public class Calcites } /** - * Convert {@link RelDataType} to the most appropriate {@link ValueType} - * Caller who want to coerce all ARRAY types to STRING can set `druid.expressions.allowArrayToStringCast` - * runtime property in {@link org.apache.druid.math.expr.ExpressionProcessingConfig} + * Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType} + * that is appropriate for this {@link RelDataType}. + * + * Equivalent to {@link #getValueTypeForRelDataTypeFull(RelDataType)}, except this method returns + * {@link ColumnType#STRING} when {@link ColumnType#isArray()} if + * {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} is set via the server property + * {@code druid.expressions.allowArrayToStringCast}. + * + * @return type, or null if there is no matching type */ @Nullable public static ColumnType getColumnTypeForRelDataType(final RelDataType type) @@ -148,7 +155,13 @@ public class Calcites } /** - * Convert {@link RelDataType} to the most appropriate {@link ValueType} + * Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType} + * that is appropriate for this {@link RelDataType}. + * + * Equivalent to {@link #getColumnTypeForRelDataType(RelDataType)}, but ignores + * {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} (and acts as if it is false). + * + * @return type, or null if there is no matching type */ @Nullable public static ColumnType getValueTypeForRelDataTypeFull(final RelDataType type) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java index d0230da06b1..ca6cb207ce6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java @@ -316,6 +316,14 @@ public class PlannerContext return stringifyArrays; } + /** + * Whether we should use {@link org.apache.druid.query.filter.BoundDimFilter} and + * {@link org.apache.druid.query.filter.SelectorDimFilter} (true) or {@link org.apache.druid.query.filter.RangeFilter}, + * {@link org.apache.druid.query.filter.EqualityFilter}, and {@link org.apache.druid.query.filter.NullFilter} (false). + * + * Typically true when {@link NullHandling#replaceWithDefault()} and false when {@link NullHandling#sqlCompatible()}. + * Can be overriden by the undocumented context parameter {@link #CTX_SQL_USE_BOUNDS_AND_SELECTORS}. + */ public boolean isUseBoundsAndSelectors() { return useBoundsAndSelectors; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 1a43a240f17..8bd82b7d255 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -41,6 +41,7 @@ import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; @@ -517,6 +518,33 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayOverlapFilterWithExtractionFn() + { + testQuery( + "SELECT dim3 FROM druid.numfoo WHERE ARRAY_OVERLAP(SUBSTRING(dim3, 1, 1), ARRAY['a','b']) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + NullHandling.sqlCompatible() + ? expressionFilter("array_overlap(substring(\"dim3\", 0, 1),array('a','b'))") + : new InDimFilter("dim3", ImmutableList.of("a", "b"), new SubstringDimExtractionFn(0, 1)) + ) + .columns("dim3") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]"}, + new Object[]{"[\"b\",\"c\"]"} + ) + ); + } + @Test public void testArrayOverlapFilterNonLiteral() { @@ -566,6 +594,35 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayContainsFilterWithExtractionFn() + { + testQuery( + "SELECT dim3 FROM druid.numfoo WHERE ARRAY_CONTAINS(SUBSTRING(dim3, 1, 1), ARRAY['a','b']) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + NullHandling.sqlCompatible() + ? expressionFilter("array_contains(substring(\"dim3\", 0, 1),array('a','b'))") + : and( + selector("dim3", "a", new SubstringDimExtractionFn(0, 1)), + selector("dim3", "b", new SubstringDimExtractionFn(0, 1)) + ) + ) + .columns("dim3") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]"} + ) + ); + } + @Test public void testArrayContainsArrayOfOneElement() {