mirror of https://github.com/apache/druid.git
update ARRAY_OVERLAP to plan with ArrayContainsElement for ARRAY columns (#15451)
Updates ARRAY_OVERLAP to use the same ArrayContainsElement filter added in #15366 when filtering ARRAY typed columns so that it can also use indexes like ARRAY_CONTAINS.
This commit is contained in:
parent
0516d0dae4
commit
5ce4aab3b8
|
@ -200,8 +200,9 @@ public class SqlExpressionBenchmark
|
|||
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long4), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
|
||||
// 37: time shift + expr agg (group by), uniform distribution high cardinality
|
||||
"SELECT TIME_SHIFT(MILLIS_TO_TIMESTAMP(long5), 'PT1H', 1), string2, SUM(long1 * double4) FROM foo GROUP BY 1,2 ORDER BY 3",
|
||||
// 38: array filtering
|
||||
"SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2"
|
||||
// 38,39: array element filtering
|
||||
"SELECT string1, long1 FROM foo WHERE ARRAY_CONTAINS(\"multi-string3\", 100) GROUP BY 1,2",
|
||||
"SELECT string1, long1 FROM foo WHERE ARRAY_OVERLAP(\"multi-string3\", ARRAY[100, 200]) GROUP BY 1,2"
|
||||
);
|
||||
|
||||
@Param({"5000000"})
|
||||
|
@ -260,7 +261,8 @@ public class SqlExpressionBenchmark
|
|||
"35",
|
||||
"36",
|
||||
"37",
|
||||
"38"
|
||||
"38",
|
||||
"39"
|
||||
})
|
||||
private String query;
|
||||
|
||||
|
|
|
@ -366,6 +366,11 @@ public class DruidExpression
|
|||
return Preconditions.checkNotNull(simpleExtraction);
|
||||
}
|
||||
|
||||
public boolean isArray()
|
||||
{
|
||||
return druidType != null && druidType.isArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get sub {@link DruidExpression} arguments of this expression
|
||||
*/
|
||||
|
|
|
@ -100,7 +100,7 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper
|
|||
// if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into
|
||||
// selector/equality filters on the individual array elements
|
||||
if (leftExpr.isSimpleExtraction()
|
||||
&& !isArray(leftExpr)
|
||||
&& !leftExpr.isArray()
|
||||
&& (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) {
|
||||
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
|
||||
// To convert this expression filter into an And of Selector filters, we need to extract all array elements.
|
||||
|
@ -142,9 +142,9 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper
|
|||
}
|
||||
}
|
||||
// if the input is a direct array column, we can use sweet array filter
|
||||
if (leftExpr.isDirectColumnAccess() && isArray(leftExpr)) {
|
||||
if (leftExpr.isDirectColumnAccess() && leftExpr.isArray()) {
|
||||
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
|
||||
// To convert this expression filter into an And of ArrayContainsElement filters, we need to extract all array
|
||||
// To convert this expression filter into an AND of ArrayContainsElement filters, we need to extract all array
|
||||
// elements. For now, we can optimize only when rightExpr is a literal because there is no way to extract the
|
||||
// array elements by traversing the Expr. Note that all implementations of Expr are defined as package-private
|
||||
// classes in a different package.
|
||||
|
@ -154,6 +154,11 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper
|
|||
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
|
||||
if (exprEval.isArray()) {
|
||||
final Object[] arrayElements = exprEval.asArray();
|
||||
if (arrayElements.length == 0) {
|
||||
// this isn't likely possible today because array constructor function does not accept empty argument list
|
||||
// but just in case, return null
|
||||
return null;
|
||||
}
|
||||
final List<DimFilter> filters = new ArrayList<>(arrayElements.length);
|
||||
final ColumnType elementType = ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
|
||||
for (final Object val : arrayElements) {
|
||||
|
@ -180,9 +185,4 @@ public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOper
|
|||
}
|
||||
return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
|
||||
}
|
||||
|
||||
private static boolean isArray(final DruidExpression expr)
|
||||
{
|
||||
return expr.getDruidType() != null && expr.getDruidType().isArray();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,9 +30,12 @@ import org.apache.druid.math.expr.Expr;
|
|||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.math.expr.ExpressionType;
|
||||
import org.apache.druid.math.expr.InputBindings;
|
||||
import org.apache.druid.query.filter.ArrayContainsElementFilter;
|
||||
import org.apache.druid.query.filter.DimFilter;
|
||||
import org.apache.druid.query.filter.EqualityFilter;
|
||||
import org.apache.druid.query.filter.InDimFilter;
|
||||
import org.apache.druid.query.filter.OrDimFilter;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.column.RowSignature;
|
||||
import org.apache.druid.sql.calcite.expression.DruidExpression;
|
||||
import org.apache.druid.sql.calcite.expression.Expressions;
|
||||
|
@ -41,6 +44,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext;
|
|||
import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOperatorConversion
|
||||
|
@ -90,29 +94,29 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera
|
|||
}
|
||||
|
||||
// Converts array_overlaps() function into an OR of Selector filters if possible.
|
||||
final DruidExpression leftExpression = druidExpressions.get(0);
|
||||
final DruidExpression rightExpression = druidExpressions.get(1);
|
||||
final boolean leftSimpleExtractionExpr = leftExpression.isSimpleExtraction();
|
||||
final boolean leftArrayColumn = leftExpression.isDirectColumnAccess() && leftExpression.getDruidType() != null && leftExpression.getDruidType().isArray();
|
||||
final boolean rightSimpleExtractionExpr = rightExpression.isSimpleExtraction();
|
||||
final boolean rightArrayColumn = rightExpression.isDirectColumnAccess() && rightExpression.getDruidType() != null && rightExpression.getDruidType().isArray();
|
||||
final DruidExpression leftExpr = druidExpressions.get(0);
|
||||
final DruidExpression rightExpr = druidExpressions.get(1);
|
||||
final boolean leftSimpleExtractionExpr = leftExpr.isSimpleExtraction();
|
||||
final boolean rightSimpleExtractionExpr = rightExpr.isSimpleExtraction();
|
||||
final DruidExpression simpleExtractionExpr;
|
||||
final DruidExpression complexExpr;
|
||||
|
||||
if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr && !(leftArrayColumn || rightArrayColumn)) {
|
||||
if (leftSimpleExtractionExpr ^ rightSimpleExtractionExpr) {
|
||||
if (leftSimpleExtractionExpr) {
|
||||
simpleExtractionExpr = leftExpression;
|
||||
complexExpr = rightExpression;
|
||||
simpleExtractionExpr = leftExpr;
|
||||
complexExpr = rightExpr;
|
||||
} else {
|
||||
simpleExtractionExpr = rightExpression;
|
||||
complexExpr = leftExpression;
|
||||
simpleExtractionExpr = rightExpr;
|
||||
complexExpr = leftExpr;
|
||||
}
|
||||
} else {
|
||||
return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
|
||||
}
|
||||
|
||||
Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
|
||||
if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) {
|
||||
final Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
|
||||
if (expr.isLiteral()
|
||||
&& !simpleExtractionExpr.isArray()
|
||||
&& (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) {
|
||||
// Evaluate the expression to take out the array elements.
|
||||
// We can safely pass null if the expression is literal.
|
||||
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
|
||||
|
@ -149,6 +153,47 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
// if the input is a direct array column, we can use sweet array filter
|
||||
if (simpleExtractionExpr.isDirectColumnAccess() && simpleExtractionExpr.isArray()) {
|
||||
// To convert this expression filter into an OR of ArrayContainsElement filters, we need to extract all array
|
||||
// elements.
|
||||
if (expr.isLiteral()) {
|
||||
// Evaluate the expression to get out the array elements.
|
||||
// We can safely pass a nil ObjectBinding if the expression is literal.
|
||||
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
|
||||
if (exprEval.isArray()) {
|
||||
final Object[] arrayElements = exprEval.asArray();
|
||||
if (arrayElements.length == 0) {
|
||||
// this isn't likely possible today because array constructor function does not accept empty argument list
|
||||
// but just in case, return null
|
||||
return null;
|
||||
}
|
||||
final List<DimFilter> filters = new ArrayList<>(arrayElements.length);
|
||||
final ColumnType elementType = ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type()));
|
||||
for (final Object val : arrayElements) {
|
||||
filters.add(
|
||||
new ArrayContainsElementFilter(
|
||||
leftExpr.getSimpleExtraction().getColumn(),
|
||||
elementType,
|
||||
val,
|
||||
null
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
return filters.size() == 1 ? filters.get(0) : new OrDimFilter(filters);
|
||||
} else {
|
||||
return new ArrayContainsElementFilter(
|
||||
leftExpr.getSimpleExtraction().getColumn(),
|
||||
ExpressionType.toColumnType(exprEval.type()),
|
||||
exprEval.valueOrDefault(),
|
||||
null
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -860,7 +860,12 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
|
|||
newScanQueryBuilder()
|
||||
.dataSource(DATA_SOURCE_ARRAYS)
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.filters(expressionFilter("array_overlap(\"arrayStringNulls\",array('a','b'))"))
|
||||
.filters(
|
||||
or(
|
||||
new ArrayContainsElementFilter("arrayStringNulls", ColumnType.STRING, "a", null),
|
||||
new ArrayContainsElementFilter("arrayStringNulls", ColumnType.STRING, "b", null)
|
||||
)
|
||||
)
|
||||
.columns("arrayStringNulls")
|
||||
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
|
||||
.limit(5)
|
||||
|
@ -886,7 +891,12 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
|
|||
newScanQueryBuilder()
|
||||
.dataSource(DATA_SOURCE_ARRAYS)
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.filters(expressionFilter("array_overlap(\"arrayLongNulls\",array(1,2))"))
|
||||
.filters(
|
||||
or(
|
||||
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null),
|
||||
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null)
|
||||
)
|
||||
)
|
||||
.columns("arrayLongNulls")
|
||||
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
|
||||
.limit(5)
|
||||
|
@ -912,7 +922,12 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
|
|||
newScanQueryBuilder()
|
||||
.dataSource(DATA_SOURCE_ARRAYS)
|
||||
.intervals(querySegmentSpec(Filtration.eternity()))
|
||||
.filters(expressionFilter("array_overlap(\"arrayDoubleNulls\",array(1.1,2.2))"))
|
||||
.filters(
|
||||
or(
|
||||
new ArrayContainsElementFilter("arrayDoubleNulls", ColumnType.DOUBLE, 1.1, null),
|
||||
new ArrayContainsElementFilter("arrayDoubleNulls", ColumnType.DOUBLE, 2.2, null)
|
||||
)
|
||||
)
|
||||
.columns("arrayDoubleNulls")
|
||||
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
|
||||
.limit(5)
|
||||
|
|
|
@ -1563,7 +1563,8 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
|
|||
.setDimFilter(
|
||||
or(
|
||||
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 1L, null),
|
||||
expressionFilter("array_overlap(\"arrayLongNulls\",array(2,3))")
|
||||
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 2L, null),
|
||||
new ArrayContainsElementFilter("arrayLongNulls", ColumnType.LONG, 3L, null)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
|
||||
|
|
Loading…
Reference in New Issue