fix virtual column cycle bug, sql virtual column optimize bug (#12576)

* fix virtual column cycle bug, sql virtual column optimize bug

* more test
This commit is contained in:
Clint Wylie 2022-05-30 23:51:21 -07:00 committed by GitHub
parent 7291c92f4f
commit b746bf9129
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 229 additions and 7 deletions

View File

@ -464,12 +464,11 @@ public class VirtualColumns implements Cacheable
: Sets.newHashSet(columnNames);
for (String columnName : virtualColumn.requiredColumns()) {
final VirtualColumn dependency = getVirtualColumn(columnName);
if (dependency != null) {
if (!nextSet.add(columnName)) {
throw new IAE("Self-referential column[%s]", columnName);
}
final VirtualColumn dependency = getVirtualColumn(columnName);
if (dependency != null) {
detectCycles(dependency, nextSet);
}
}

View File

@ -392,6 +392,45 @@ public class VirtualColumnsTest extends InitializedNullHandlingTest
);
}
@Test
public void testCompositeVirtualColumnsCycles()
{
final ExpressionVirtualColumn expr1 = new ExpressionVirtualColumn("v1", "1 + x", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final ExpressionVirtualColumn expr2 = new ExpressionVirtualColumn("v2", "1 + y", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final ExpressionVirtualColumn expr0 = new ExpressionVirtualColumn("v0", "case_searched(notnull(1 + x), v1, v2)", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final VirtualColumns virtualColumns = VirtualColumns.create(ImmutableList.of(expr0, expr1, expr2));
Assert.assertTrue(virtualColumns.exists("v0"));
Assert.assertTrue(virtualColumns.exists("v1"));
Assert.assertTrue(virtualColumns.exists("v2"));
}
@Test
public void testCompositeVirtualColumnsCyclesSiblings()
{
final ExpressionVirtualColumn expr1 = new ExpressionVirtualColumn("v1", "1 + x", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final ExpressionVirtualColumn expr2 = new ExpressionVirtualColumn("v2", "1 + y", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final ExpressionVirtualColumn expr0 = new ExpressionVirtualColumn("v0", "case_searched(notnull(v1), v1, v2)", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final VirtualColumns virtualColumns = VirtualColumns.create(ImmutableList.of(expr0, expr1, expr2));
Assert.assertTrue(virtualColumns.exists("v0"));
Assert.assertTrue(virtualColumns.exists("v1"));
Assert.assertTrue(virtualColumns.exists("v2"));
}
@Test
public void testCompositeVirtualColumnsCyclesTree()
{
final ExpressionVirtualColumn expr1 = new ExpressionVirtualColumn("v1", "1 + x", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final ExpressionVirtualColumn expr2 = new ExpressionVirtualColumn("v2", "1 + v1", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final ExpressionVirtualColumn expr0 = new ExpressionVirtualColumn("v0", "v1 + v2", ColumnType.LONG, TestExprMacroTable.INSTANCE);
final VirtualColumns virtualColumns = VirtualColumns.create(ImmutableList.of(expr0, expr1, expr2));
Assert.assertTrue(virtualColumns.exists("v0"));
Assert.assertTrue(virtualColumns.exists("v1"));
Assert.assertTrue(virtualColumns.exists("v2"));
}
private VirtualColumns makeVirtualColumns()
{
final ExpressionVirtualColumn expr = new ExpressionVirtualColumn(

View File

@ -495,7 +495,7 @@ public class DruidExpression
{
List<DruidExpression> list = new ArrayList<>(expressions.size());
for (DruidExpression expr : expressions) {
list.add(visit(expr));
list.add(visit(expr.visit(this)));
}
return list;
}

View File

@ -19,7 +19,7 @@
package org.apache.druid.sql.calcite.expression.builtin;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlFunction;
@ -44,6 +44,8 @@ import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
/**
@ -340,6 +342,8 @@ public class MultiValueStringOperatorConversions
if (lit == null || lit.length == 0) {
return null;
}
HashSet<String> literals = Sets.newHashSetWithExpectedSize(lit.length);
literals.addAll(Arrays.asList(lit));
final DruidExpression.ExpressionGenerator builder = (args) -> {
final StringBuilder expressionBuilder;
@ -364,7 +368,7 @@ public class MultiValueStringOperatorConversions
(name, outputType, expression, macroTable) -> new ListFilteredVirtualColumn(
name,
druidExpressions.get(0).getSimpleExtraction().toDimensionSpec(druidExpressions.get(0).getDirectColumn(), outputType),
ImmutableSet.copyOf(lit),
literals,
isAllowList()
)
);

View File

@ -46,8 +46,10 @@ import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
{
@ -1195,6 +1197,184 @@ public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testMultiValueListFilterComposedNested() throws Exception
{
// Cannot vectorize due to usage of expressions.
cannotVectorize();
testQuery(
"SELECT COALESCE(MV_FILTER_ONLY(dim3, ARRAY['b']), 'no b'), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE3)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
expressionVirtualColumn(
"v0",
"case_searched(notnull(\"v1\"),\"v1\",'no b')",
ColumnType.STRING
),
new ListFilteredVirtualColumn(
"v1",
DefaultDimensionSpec.of("dim3"),
ImmutableSet.of("b"),
true
)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setLimitSpec(new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec(
"a0",
OrderByColumnSpec.Direction.DESCENDING,
StringComparators.NUMERIC
)),
Integer.MAX_VALUE
))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
// this is a bug
// instead of default values it should actually be 'no b';
//
// it happens when using 'notnull' on the mv-filtered virtual column because deferred expression selector
// returns a 0 sized row, which is treated as a missing value by the grouping and the expression which would
// evaluate it and return 'no b' is never evaluated (because we don't add a null value in the forwarding
// dictionary even if it exists in the underlying column).
//
// if the 'notnull' was instead using the array filtering fallback expression
// case_searched(notnull(filter((x) -> array_contains(array('b'), x), \"dim3\")),\"v1\",'no b')
// where it doesn't use the deferred selector because it is no longer a single input expression, it would still
// evaluate to null because the filter expression never returns null, only an empty array, which is not null,
// so it evaluates 'v1' which of course is null because it is an empty row.
useDefault ? ImmutableList.of(
new Object[]{"", 4L},
new Object[]{"b", 2L}
) : ImmutableList.of(
new Object[]{null, 4L},
new Object[]{"b", 2L}
)
);
}
@Test
public void testMultiValueListFilterComposedNested2Input() throws Exception
{
// Cannot vectorize due to usage of expressions.
cannotVectorize();
testQuery(
"SELECT COALESCE(MV_FILTER_ONLY(dim3, ARRAY['b']), dim1), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE3)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
expressionVirtualColumn(
"v0",
"case_searched(notnull(\"v1\"),\"v1\",\"dim1\")",
ColumnType.STRING
),
new ListFilteredVirtualColumn(
"v1",
DefaultDimensionSpec.of("dim3"),
ImmutableSet.of("b"),
true
)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setLimitSpec(new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec(
"a0",
OrderByColumnSpec.Direction.DESCENDING,
StringComparators.NUMERIC
)),
Integer.MAX_VALUE
))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{"b", 2L},
new Object[]{"1", 1L},
new Object[]{"2", 1L},
new Object[]{"abc", 1L},
new Object[]{"def", 1L}
)
);
}
@Test
public void testMultiValueListFilterComposedNestedNullLiteral() throws Exception
{
// Cannot vectorize due to usage of expressions.
cannotVectorize();
Set<String> filter = new HashSet<>();
filter.add(null);
filter.add("b");
testQuery(
"SELECT COALESCE(MV_FILTER_ONLY(dim3, ARRAY[NULL, 'b']), 'no b'), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE3)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
expressionVirtualColumn(
"v0",
"case_searched(notnull(\"v1\"),\"v1\",'no b')",
ColumnType.STRING
),
new ListFilteredVirtualColumn(
"v1",
DefaultDimensionSpec.of("dim3"),
filter,
true
)
)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setLimitSpec(new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec(
"a0",
OrderByColumnSpec.Direction.DESCENDING,
StringComparators.NUMERIC
)),
Integer.MAX_VALUE
))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
// unfortunately, unable to work around the bug by adding nulls to the allow list
useDefault ? ImmutableList.of(
new Object[]{"", 2L},
new Object[]{"b", 2L},
new Object[]{"no b", 2L}
) : ImmutableList.of(
new Object[]{null, 3L},
new Object[]{"b", 2L},
new Object[]{"no b", 1L}
)
);
}
@Test
public void testMultiValueListFilterComposedDeny() throws Exception
{