diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index 83b3ed531f0..291a0727a1d 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -200,8 +200,9 @@ public class SqlExpressionBenchmark "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", // 37: time shift + expr agg (group by), uniform distribution high cardinality "SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3", - // 38: array filtering - "SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2" + // 38,39: array element filtering + "SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2", + "SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2" ); @Param({"5000000"}) @@ -260,7 +261,8 @@ public class SqlExpressionBenchmark "35", "36", "37", - "38" + "38", + "39" }) private String query; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java index b06f3e402f2..7faac91ab09 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java @@ -366,6 +366,11 @@ public class DruidExpression return Preconditions.checkNotNull(simpleExtraction); } + public boolean isArray() + { + return druidType != null && druidType.isArray(); + } + /** * Get sub {@link DruidExpression} arguments of this expression */ diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java index c961ed04016..9036d7e406d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java @@ -100,7 +100,7 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper // if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into // selector/equality filters on the individual array elements if (leftExpr.isSimpleExtraction() - && !isArray(leftExpr) + && !leftExpr.isArray() && (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) { Expr expr = plannerContext.parseExpression(rightExpr.getExpression()); // To convert this expression filter into an And of Selector filters, we need to extract all array elements. @@ -142,9 +142,9 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper } } // if the input is a direct array column, we can use sweet array filter - if (leftExpr.isDirectColumnAccess() && isArray(leftExpr)) { + if (leftExpr.isDirectColumnAccess() && leftExpr.isArray()) { Expr expr = plannerContext.parseExpression(rightExpr.getExpression()); - // To convert this expression filter into an And of ArrayContainsElement filters, we need to extract all array + // To convert this expression filter into an AND of ArrayContainsElement filters, we need to extract all array // elements. For now, we can optimize only when rightExpr is a literal because there is no way to extract the // array elements by traversing the Expr. Note that all implementations of Expr are defined as package-private // classes in a different package. @@ -154,6 +154,11 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper ExprEval exprEval = expr.eval(InputBindings.nilBindings()); if (exprEval.isArray()) { final Object[] arrayElements = exprEval.asArray(); + if (arrayElements.length == 0) { + // this isn't likely possible today because array constructor function does not accept empty argument list + // but just in case, return null + return null; + } final List filters = new ArrayList<>(arrayElements.length); final ColumnType elementType = ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())); for (final Object val : arrayElements) { @@ -180,9 +185,4 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper } return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions); } - - private static boolean isArray(final DruidExpression expr) - { - return expr.getDruidType() != null && expr.getDruidType().isArray(); - } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java index 24fac69d11d..af5c65d9f84 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java @@ -30,9 +30,12 @@ import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.apache.druid.query.filter.ArrayContainsElementFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.InDimFilter; +import org.apache.druid.query.filter.OrDimFilter; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; @@ -41,6 +44,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.List; public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOperatorConversion @@ -90,29 +94,29 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera } // Converts array_overlaps() function into an OR of Selector filters if possible. - final DruidExpression leftExpression = druidExpressions.get(0); - final DruidExpression rightExpression = druidExpressions.get(1); - final boolean leftSimpleExtractionExpr = leftExpression.isSimpleExtraction(); - final boolean leftArrayColumn = leftExpression.isDirectColumnAccess() && leftExpression.getDruidType() != null && leftExpression.getDruidType().isArray(); - final boolean rightSimpleExtractionExpr = rightExpression.isSimpleExtraction(); - final boolean rightArrayColumn = rightExpression.isDirectColumnAccess() && rightExpression.getDruidType() != null && rightExpression.getDruidType().isArray(); + final DruidExpression leftExpr = druidExpressions.get(0); + final DruidExpression rightExpr = druidExpressions.get(1); + final boolean leftSimpleExtractionExpr = leftExpr.isSimpleExtraction(); + final boolean rightSimpleExtractionExpr = rightExpr.isSimpleExtraction(); final DruidExpression simpleExtractionExpr; final DruidExpression complexExpr; - if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr && !(leftArrayColumn || rightArrayColumn)) { + if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr) { if (leftSimpleExtractionExpr) { - simpleExtractionExpr = leftExpression; - complexExpr = rightExpression; + simpleExtractionExpr = leftExpr; + complexExpr = rightExpr; } else { - simpleExtractionExpr = rightExpression; - complexExpr = leftExpression; + simpleExtractionExpr = rightExpr; + complexExpr = leftExpr; } } else { return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions); } - Expr expr = plannerContext.parseExpression(complexExpr.getExpression()); - if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) { + final Expr expr = plannerContext.parseExpression(complexExpr.getExpression()); + if (expr.isLiteral() + && !simpleExtractionExpr.isArray() + && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) { // Evaluate the expression to take out the array elements. // We can safely pass null if the expression is literal. ExprEval exprEval = expr.eval(InputBindings.nilBindings()); @@ -149,6 +153,47 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera ); } } + + // if the input is a direct array column, we can use sweet array filter + if (simpleExtractionExpr.isDirectColumnAccess() && simpleExtractionExpr.isArray()) { + // To convert this expression filter into an OR of ArrayContainsElement filters, we need to extract all array + // elements. + if (expr.isLiteral()) { + // Evaluate the expression to get out the array elements. + // We can safely pass a nil ObjectBinding if the expression is literal. + ExprEval exprEval = expr.eval(InputBindings.nilBindings()); + if (exprEval.isArray()) { + final Object[] arrayElements = exprEval.asArray(); + if (arrayElements.length == 0) { + // this isn't likely possible today because array constructor function does not accept empty argument list + // but just in case, return null + return null; + } + final List filters = new ArrayList<>(arrayElements.length); + final ColumnType elementType = ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())); + for (final Object val : arrayElements) { + filters.add( + new ArrayContainsElementFilter( + leftExpr.getSimpleExtraction().getColumn(), + elementType, + val, + null + ) + ); + } + + return filters.size() == 1 ? filters.get(0) : new OrDimFilter(filters); + } else { + return new ArrayContainsElementFilter( + leftExpr.getSimpleExtraction().getColumn(), + ExpressionType.toColumnType(exprEval.type()), + exprEval.valueOrDefault(), + null + ); + } + } + } + return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 0f6e86edb18..2f27011b056 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -860,7 +860,12 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest newScanQueryBuilder() .dataSource(DATA_SOURCE_ARRAYS) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(expressionFilter("array_overlap(\"arrayStringNulls\",array('a','b'))")) + .filters( + or( + new ArrayContainsElementFilter("arrayStringNulls", ColumnType.STRING, "a", null), + new ArrayContainsElementFilter("arrayStringNulls", ColumnType.STRING, "b", null) + ) + ) .columns("arrayStringNulls") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(5) @@ -886,7 +891,12 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest newScanQueryBuilder() .dataSource(DATA_SOURCE_ARRAYS) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(expressionFilter("array_overlap(\"arrayLongNulls\",array(1,2))")) + .filters( + or( + new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null), + new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null) + ) + ) .columns("arrayLongNulls") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(5) @@ -912,7 +922,12 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest newScanQueryBuilder() .dataSource(DATA_SOURCE_ARRAYS) .intervals(querySegmentSpec(Filtration.eternity())) - .filters(expressionFilter("array_overlap(\"arrayDoubleNulls\",array(1.1,2.2))")) + .filters( + or( + new ArrayContainsElementFilter("arrayDoubleNulls", ColumnType.DOUBLE, 1.1, null), + new ArrayContainsElementFilter("arrayDoubleNulls", ColumnType.DOUBLE, 2.2, null) + ) + ) .columns("arrayDoubleNulls") .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .limit(5) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 9c0677a0f15..22e16f6e7e1 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -1563,7 +1563,8 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .setDimFilter( or( new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null), - expressionFilter("array_overlap(\"arrayLongNulls\",array(2,3))") + new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null), + new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 3L, null) ) ) .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))