From c2e67585809d035a1973546ea081f46dc755b46f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 19 Jul 2023 22:40:47 -0700 Subject: [PATCH] Simplify bounds/range vs selectors/equality logic in SQL planning. (#14619) * Simplify bounds/range vs selectors/equality logic in SQL planning. 1) Consolidate duplicate code related to Expressions#buildTimeFloorFilter. 2) Cleaner logic in Expressions#toSimpleLeafFilter: choose bounds vs range filter based solely on plannerContext.isUseBoundsAndSelectors, not also considering rhs kind. Use parsed rhs in both paths (except for numerics in the bound path). 3) Fix ArrayContains, ArrayOverlap to avoid equality filters when there is an extractionFn present. Fixes a bug introduced in #14612. * Avoid sending nonprimitives down the bound path. --- .../sql/calcite/expression/Expressions.java | 126 ++++++++---------- .../ArrayContainsOperatorConversion.java | 50 ++++--- .../ArrayOverlapOperatorConversion.java | 3 +- .../builtin/CaseOperatorConversion.java | 6 +- .../druid/sql/calcite/planner/Calcites.java | 21 ++- .../sql/calcite/planner/PlannerContext.java | 8 ++ .../sql/calcite/CalciteArraysQueryTest.java | 57 ++++++++ 7 files changed, 161 insertions(+), 110 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index 64db457f208..5fc88491b2d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -28,6 +28,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -519,18 +520,18 @@ public class Expressions // column instead for filtering to ensure that results are correct if (druidExpression.isSimpleExtraction() && !(isOutputNumeric && !rowSignature.isNumeric(druidExpression.getDirectColumn()))) { - if (!plannerContext.isUseBoundsAndSelectors()) { - if (druidExpression.getSimpleExtraction().getExtractionFn() != null) { - // return null to fallback to using an expression filter - return null; - } - equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn()); - } else { + if (plannerContext.isUseBoundsAndSelectors()) { equalFilter = new SelectorDimFilter( druidExpression.getSimpleExtraction().getColumn(), NullHandling.defaultStringValue(), druidExpression.getSimpleExtraction().getExtractionFn() ); + } else { + if (druidExpression.getSimpleExtraction().getExtractionFn() != null) { + // return null to fallback to using an expression filter + return null; + } + equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn()); } } else if (virtualColumnRegistry != null) { final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( @@ -538,14 +539,10 @@ public class Expressions operand.getType() ); - if (!plannerContext.isUseBoundsAndSelectors()) { - equalFilter = NullFilter.forColumn(virtualColumn); + if (plannerContext.isUseBoundsAndSelectors()) { + equalFilter = new SelectorDimFilter(virtualColumn, NullHandling.defaultStringValue(), null); } else { - equalFilter = new SelectorDimFilter( - virtualColumn, - NullHandling.defaultStringValue(), - null - ); + equalFilter = NullFilter.forColumn(virtualColumn); } } else { return null; @@ -601,11 +598,11 @@ public class Expressions } final DruidExpression rhsExpression = toDruidExpression(plannerContext, rowSignature, rhs); - final Expr parsedRhsExpression = rhsExpression != null + final Expr rhsParsed = rhsExpression != null ? plannerContext.parseExpression(rhsExpression.getExpression()) : null; // rhs must be a literal - if (rhs.getKind() != SqlKind.LITERAL && (parsedRhsExpression == null || !parsedRhsExpression.isLiteral())) { + if (rhsParsed == null || !rhsParsed.isLiteral()) { return null; } @@ -651,43 +648,37 @@ public class Expressions final Granularity granularity = ExtractionFns.toQueryGranularity(extractionFn); if (granularity != null) { - // lhs is FLOOR(__time TO granularity); rhs must be a timestamp + // lhs is FLOOR(__time TO granularity); rhs must be a timestamp. final long rhsMillis = Calcites.calciteDateTimeLiteralToJoda(rhs, plannerContext.getTimeZone()).getMillis(); - final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); - - // Is rhs aligned on granularity boundaries? - final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - - if (plannerContext.isUseBoundsAndSelectors()) { - // Create a BoundRefKey that strips the extractionFn and compares __time as a number. - final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); - - return getBoundTimeDimFilter(flippedKind, boundRefKey, rhsInterval, rhsAligned); - } else { - final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG); - return getRangeTimeDimFilter(flippedKind, rangeRefKey, rhsInterval, rhsAligned); - } + return buildTimeFloorFilter(column, granularity, flippedKind, rhsMillis, plannerContext); } } - if (plannerContext.isUseBoundsAndSelectors() && rhs instanceof RexLiteral) { - final String val; - final RexLiteral rhsLiteral = (RexLiteral) rhs; - if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) { - val = String.valueOf(RexLiteral.value(rhsLiteral)); - } else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) { - val = String.valueOf(RexLiteral.stringValue(rhsLiteral)); - } else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) { - val = String.valueOf( - Calcites.calciteDateTimeLiteralToJoda( - rhsLiteral, - plannerContext.getTimeZone() - ).getMillis() - ); - } else { - // Don't know how to filter on this kind of literal. + final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType()); + + if (plannerContext.isUseBoundsAndSelectors()) { + if (matchValueType == null || !matchValueType.isPrimitive()) { + // Fall back to expression filter. return null; } + + final String stringVal; + + if (rhsParsed.getLiteralValue() == null) { + stringVal = NullHandling.defaultStringValue(); + } else if (RexUtil.isLiteral(rhs, true) && SqlTypeName.NUMERIC_TYPES.contains(rhs.getType().getSqlTypeName())) { + // Peek inside the original rhs for numerics, rather than using the parsed version, for highest fidelity + // to what the query originally contained. (It may be a BigDecimal.) + stringVal = String.valueOf(RexLiteral.value(rhs)); + } else { + stringVal = String.valueOf(rhsParsed.getLiteralValue()); + } + + if (stringVal == null) { + // Fall back to expression filter. + return null; + } + // Numeric lhs needs a numeric comparison. final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType()); final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator); @@ -696,22 +687,22 @@ public class Expressions // Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator). switch (flippedKind) { case EQUALS: - filter = Bounds.equalTo(boundRefKey, val); + filter = Bounds.equalTo(boundRefKey, stringVal); break; case NOT_EQUALS: - filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val)); + filter = new NotDimFilter(Bounds.equalTo(boundRefKey, stringVal)); break; case GREATER_THAN: - filter = Bounds.greaterThan(boundRefKey, val); + filter = Bounds.greaterThan(boundRefKey, stringVal); break; case GREATER_THAN_OR_EQUAL: - filter = Bounds.greaterThanOrEqualTo(boundRefKey, val); + filter = Bounds.greaterThanOrEqualTo(boundRefKey, stringVal); break; case LESS_THAN: - filter = Bounds.lessThan(boundRefKey, val); + filter = Bounds.lessThan(boundRefKey, stringVal); break; case LESS_THAN_OR_EQUAL: - filter = Bounds.lessThanOrEqualTo(boundRefKey, val); + filter = Bounds.lessThanOrEqualTo(boundRefKey, stringVal); break; default: throw new IllegalStateException("Shouldn't have got here"); @@ -719,20 +710,13 @@ public class Expressions return filter; } else { - //noinspection VariableNotUsedInsideIf - if (extractionFn != null) { + final Object val = rhsParsed.getLiteralValue(); + + if (extractionFn != null || val == null) { // fall back to expression filter return null; } - final Object val; - if (parsedRhsExpression != null && parsedRhsExpression.isLiteral()) { - val = parsedRhsExpression.getLiteralValue(); - } else { - // Don't know how to filter on this kind of literal. - return null; - } - final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType()); final RangeRefKey rangeRefKey = new RangeRefKey(column, matchValueType); final DimFilter filter; @@ -850,26 +834,20 @@ public class Expressions final PlannerContext plannerContext ) { + final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); + + // Is rhs aligned on granularity boundaries? + final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; + if (plannerContext.isUseBoundsAndSelectors()) { final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC); - final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); - - // Is rhs aligned on granularity boundaries? - final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - return getBoundTimeDimFilter(operatorKind, boundRefKey, rhsInterval, rhsAligned); } else { final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG); - final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis)); - - // Is rhs aligned on granularity boundaries? - final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis; - return getRangeTimeDimFilter(operatorKind, rangeRefKey, rhsInterval, rhsAligned); } } - private static DimFilter getBoundTimeDimFilter( SqlKind operatorKind, BoundRefKey boundRefKey, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java index 15819d1e155..cffee832535 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java @@ -41,9 +41,8 @@ import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; import javax.annotation.Nullable; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOperatorConversion { @@ -98,7 +97,9 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper // if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into // selector/equality filters on the individual array elements - if (leftExpr.isSimpleExtraction() && !(leftExpr.isDirectColumnAccess() && leftExpr.getDruidType() != null && leftExpr.getDruidType().isArray())) { + if (leftExpr.isSimpleExtraction() + && !isArray(leftExpr) + && (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) { Expr expr = plannerContext.parseExpression(rightExpr.getExpression()); // To convert this expression filter into an And of Selector filters, we need to extract all array elements. // For now, we can optimize only when rightExpr is a literal because there is no way to extract the array elements @@ -115,37 +116,34 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper // However, since both Calcite's SqlMultisetValueConstructor and Druid's ArrayConstructorFunction don't allow // to create an empty array with no argument, we just return null. return null; - } else if (arrayElements.length == 1) { - if (plannerContext.isUseBoundsAndSelectors()) { - return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); - } else { - return new EqualityFilter( - leftExpr.getSimpleExtraction().getColumn(), - ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())), - arrayElements[0], - null - ); - } } else { - final List selectFilters = Arrays - .stream(arrayElements) - .map(val -> { - if (plannerContext.isUseBoundsAndSelectors()) { - return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val)); - } else { - return new EqualityFilter( + final List filters = new ArrayList<>(arrayElements.length); + + for (final Object val : arrayElements) { + if (plannerContext.isUseBoundsAndSelectors()) { + filters.add(newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val))); + } else { + // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above. + filters.add( + new EqualityFilter( leftExpr.getSimpleExtraction().getColumn(), ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())), val, null - ); - } - }) - .collect(Collectors.toList()); - return new AndDimFilter(selectFilters); + ) + ); + } + } + + return filters.size() == 1 ? filters.get(0) : new AndDimFilter(filters); } } } return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions); } + + private static boolean isArray(final DruidExpression expr) + { + return expr.getDruidType() != null && expr.getDruidType().isArray(); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index ea2c590a2f9..8e09ea0c734 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -114,7 +114,7 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera } Expr expr = plannerContext.parseExpression(complexExpr.getExpression()); - if (expr.isLiteral()) { + if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) { // Evaluate the expression to take out the array elements. // We can safely pass null if the expression is literal. ExprEval exprEval = expr.eval(InputBindings.nilBindings()); @@ -129,6 +129,7 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera if (plannerContext.isUseBoundsAndSelectors()) { return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0])); } else { + // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above. return new EqualityFilter( simpleExtractionExpr.getSimpleExtraction().getColumn(), ExpressionType.toColumnType(exprEval.type()), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java index e980ce91c6e..6d5db69f9a9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java @@ -119,11 +119,7 @@ public class CaseOperatorConversion implements SqlOperatorConversion DimFilter thenFilter = null, elseFilter = null; final DimFilter isNull; if (plannerContext.isUseBoundsAndSelectors()) { - isNull = new SelectorDimFilter( - condition.getArguments().get(0).getDirectColumn(), - null, - null - ); + isNull = new SelectorDimFilter(condition.getArguments().get(0).getDirectColumn(), null, null); } else { isNull = NullFilter.forColumn(condition.getArguments().get(0).getDirectColumn()); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java index 7c066e663c4..2e634e69b89 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java @@ -44,6 +44,7 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.math.expr.ExpressionProcessing; +import org.apache.druid.math.expr.ExpressionProcessingConfig; import org.apache.druid.query.ordering.StringComparator; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.segment.column.ColumnType; @@ -132,9 +133,15 @@ public class Calcites } /** - * Convert {@link RelDataType} to the most appropriate {@link ValueType} - * Caller who want to coerce all ARRAY types to STRING can set `druid.expressions.allowArrayToStringCast` - * runtime property in {@link org.apache.druid.math.expr.ExpressionProcessingConfig} + * Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType} + * that is appropriate for this {@link RelDataType}. + * + * Equivalent to {@link #getValueTypeForRelDataTypeFull(RelDataType)}, except this method returns + * {@link ColumnType#STRING} when {@link ColumnType#isArray()} if + * {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} is set via the server property + * {@code druid.expressions.allowArrayToStringCast}. + * + * @return type, or null if there is no matching type */ @Nullable public static ColumnType getColumnTypeForRelDataType(final RelDataType type) @@ -148,7 +155,13 @@ public class Calcites } /** - * Convert {@link RelDataType} to the most appropriate {@link ValueType} + * Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType} + * that is appropriate for this {@link RelDataType}. + * + * Equivalent to {@link #getColumnTypeForRelDataType(RelDataType)}, but ignores + * {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} (and acts as if it is false). + * + * @return type, or null if there is no matching type */ @Nullable public static ColumnType getValueTypeForRelDataTypeFull(final RelDataType type) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java index d0230da06b1..ca6cb207ce6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java @@ -316,6 +316,14 @@ public class PlannerContext return stringifyArrays; } + /** + * Whether we should use {@link org.apache.druid.query.filter.BoundDimFilter} and + * {@link org.apache.druid.query.filter.SelectorDimFilter} (true) or {@link org.apache.druid.query.filter.RangeFilter}, + * {@link org.apache.druid.query.filter.EqualityFilter}, and {@link org.apache.druid.query.filter.NullFilter} (false). + * + * Typically true when {@link NullHandling#replaceWithDefault()} and false when {@link NullHandling#sqlCompatible()}. + * Can be overriden by the undocumented context parameter {@link #CTX_SQL_USE_BOUNDS_AND_SELECTORS}. + */ public boolean isUseBoundsAndSelectors() { return useBoundsAndSelectors; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 1a43a240f17..8bd82b7d255 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -41,6 +41,7 @@ import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.extraction.SubstringDimExtractionFn; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.query.filter.LikeDimFilter; @@ -517,6 +518,33 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayOverlapFilterWithExtractionFn() + { + testQuery( + "SELECT dim3 FROM druid.numfoo WHERE ARRAY_OVERLAP(SUBSTRING(dim3, 1, 1), ARRAY['a','b']) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + NullHandling.sqlCompatible() + ? expressionFilter("array_overlap(substring(\"dim3\", 0, 1),array('a','b'))") + : new InDimFilter("dim3", ImmutableList.of("a", "b"), new SubstringDimExtractionFn(0, 1)) + ) + .columns("dim3") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]"}, + new Object[]{"[\"b\",\"c\"]"} + ) + ); + } + @Test public void testArrayOverlapFilterNonLiteral() { @@ -566,6 +594,35 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testArrayContainsFilterWithExtractionFn() + { + testQuery( + "SELECT dim3 FROM druid.numfoo WHERE ARRAY_CONTAINS(SUBSTRING(dim3, 1, 1), ARRAY['a','b']) LIMIT 5", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters( + NullHandling.sqlCompatible() + ? expressionFilter("array_contains(substring(\"dim3\", 0, 1),array('a','b'))") + : and( + selector("dim3", "a", new SubstringDimExtractionFn(0, 1)), + selector("dim3", "b", new SubstringDimExtractionFn(0, 1)) + ) + ) + .columns("dim3") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]"} + ) + ); + } + @Test public void testArrayContainsArrayOfOneElement() {