This commit is contained in:
Zoltan Haindrich 2024-11-13 18:00:52 +00:00
parent 96f572e679
commit 7d9693bd91
6 changed files with 208 additions and 31 deletions

View File

@ -37,6 +37,7 @@ import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
import org.apache.calcite.rel.rules.CoreRules;
import org.apache.calcite.rel.rules.DateRangeRules;
import org.apache.calcite.rel.rules.FilterCorrelateRule;
import org.apache.calcite.rel.rules.FilterJoinRule.FilterIntoJoinRule.FilterIntoJoinRuleConfig;
import org.apache.calcite.rel.rules.JoinExtractFilterRule;
import org.apache.calcite.rel.rules.JoinPushThroughJoinRule;
@ -280,6 +281,10 @@ public class CalciteRulesManager
builder.addMatchLimit(CalciteRulesManager.HEP_DEFAULT_MATCH_LIMIT);
builder.addRuleCollection(baseRuleSet(plannerContext));
builder.addRuleInstance(CoreRules.UNION_MERGE);
builder.addRuleInstance(FilterCorrelateRule.Config.DEFAULT.toRule());
builder.addRuleCollection(baseRuleSet(plannerContext));
builder.addRuleInstance(CoreRules.UNION_MERGE);
builder.addRuleInstance(FilterCorrelateRule.Config.DEFAULT.toRule());
builder.addRuleInstance(JoinExtractFilterRule.Config.DEFAULT.toRule());
builder.addRuleInstance(FilterIntoJoinRuleConfig.DEFAULT.withPredicate(DruidJoinRule::isSupportedPredicate).toRule());
builder.addRuleInstance(new LogicalUnnestRule());

View File

@ -1668,6 +1668,15 @@ public class BaseCalciteQueryTest extends CalciteTestBase
*/
public String ds(String colName)
{
return "_" + colName;
if (testBuilder().isDecoupledMode()) {
return colName;
} else {
return "_" + colName;
}
}
public String ds2(String colName)
{
return ds(ds(colName));
}
}

View File

@ -4598,7 +4598,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
}
// @DecoupledTestConfig(ignoreExpectedQueriesReason = IgnoreQueriesReason.UNNEST_EXTRA_SCANQUERY)
@DecoupledTestConfig(ignoreExpectedQueriesReason = IgnoreQueriesReason.UNNEST_EXTRA_SCANQUERY)
@Test
public void testUnnestThriceWithFiltersOnDimAndUnnestCol()
{
@ -4685,7 +4685,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
// @DecoupledTestConfig(ignoreExpectedQueriesReason = IgnoreQueriesReason.UNNEST_EXTRA_SCANQUERY)
@DecoupledTestConfig(ignoreExpectedQueriesReason = IgnoreQueriesReason.UNNEST_EXTRA_SCANQUERY)
@Test
public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumns()
{
@ -4889,6 +4889,8 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
// FIXME: thyis
// @DecoupledTestConfig(quidemReason = QuidemTestCaseReason.UNNEST_EXTRA_SCAN, separateDefaultModeTest = true)
@Test
public void testUnnestThriceWithFiltersOnDimAndAllUnnestColumnsArrayColumnsOrFilters()
{
@ -5338,7 +5340,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
// @DecoupledTestConfig(quidemReason = QuidemTestCaseReason.UNNEST_EXTRA_SCAN, separateDefaultModeTest = true)
@DecoupledTestConfig(quidemReason = QuidemTestCaseReason.UNNEST_EXTRA_SCAN, separateDefaultModeTest = true)
@Test
public void testUnnestWithFiltersWithExpressionInInnerQuery()
{
@ -5375,7 +5377,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
// @DecoupledTestConfig(quidemReason = QuidemTestCaseReason.UNNEST_EXTRA_SCAN, separateDefaultModeTest = true)
@DecoupledTestConfig(quidemReason = QuidemTestCaseReason.UNNEST_EXTRA_SCAN, separateDefaultModeTest = true)
@Test
public void testUnnestWithInFiltersWithExpressionInInnerQuery()
{
@ -5477,7 +5479,7 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
not(equality("dim1", "foo", ColumnType.STRING))
)
),
expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
nestedExpressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING),
not(equality("j0.unnest", "b", ColumnType.STRING))
))
.intervals(querySegmentSpec(Filtration.eternity()))
@ -5703,8 +5705,8 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
);
}
// @NotYetSupported(Modes.UNNEST_RESULT_MISMATCH)
// @DecoupledTestConfig(ignoreExpectedQueriesReason = IgnoreQueriesReason.UNNEST_EXTRA_SCANQUERY)
@NotYetSupported(Modes.UNNEST_RESULT_MISMATCH)
@DecoupledTestConfig(ignoreExpectedQueriesReason = IgnoreQueriesReason.UNNEST_EXTRA_SCANQUERY)
@Test
public void testUnnestWithJoinOnTheLeft()
{
@ -6985,16 +6987,16 @@ public class CalciteArraysQueryTest extends BaseCalciteQueryTest
expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY),
null
),
expressionVirtualColumn("_j0.unnest", "array(\"dim1\",\"dim2\")", ColumnType.STRING_ARRAY),
expressionVirtualColumn(ds("j0.unnest"), "array(\"dim1\",\"dim2\")", ColumnType.STRING_ARRAY),
null
),
nestedExpressionVirtualColumn("__j0.unnest", "\"dim3\"", ColumnType.STRING),
nestedExpressionVirtualColumn(ds2("j0.unnest"), "\"dim3\"", ColumnType.STRING),
null
))
.intervals(querySegmentSpec(Intervals.of("2000-01-02T00:00:00.000Z/2000-01-03T00:10:00.001Z")))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.context(QUERY_CONTEXT_UNNEST)
.columns("__j0.unnest")
.columns(ds2("j0.unnest"))
.columnTypes(ColumnType.STRING)
.build()
),

View File

@ -0,0 +1,157 @@
# testUnnestThriceWithFiltersOnDimAndAllUnnestColumnsArrayColumnsOrFilters@NullHandling=sql case-crc:727a851b
# quidem testcase reason: UNNEST_EXTRA_SCAN
!set debug true
!set defaultTimeout 300000
!set maxScatterGatherBytes 9223372036854775807
!set plannerStrategy DECOUPLED
!set sqlCurrentTimestamp 2000-01-01T00:00:00Z
!set sqlQueryId dummy
!set sqlStringifyArrays false
!set outputformat mysql
!use druidtest:///
SELECT arrayString, uln, udn, usn FROM
( SELECT * FROM
( SELECT * FROM arrays, UNNEST(arrayLongNulls) as ut(uln)) ,UNNEST(arrayDoubleNulls) as ut(udn)
), UNNEST(arrayStringNulls) as ut(usn) WHERE arrayString = ARRAY['a','b'] AND (uln = 1 OR udn = 2.2) AND usn = 'a';
+-------------+-----+-----+-----+
| arrayString | uln | udn | usn |
+-------------+-----+-----+-----+
| [a, b] | 1 | 1.1 | a |
| [a, b] | 1 | 2.2 | a |
| [a, b] | 1 | | a |
| [a, b] | 3 | 2.2 | a |
| [a, b] | | 2.2 | a |
+-------------+-----+-----+-----+
(5 rows)
!ok
LogicalProject(arrayString=[CAST(ARRAY('a':VARCHAR, 'b':VARCHAR)):VARCHAR ARRAY], uln=[$2], udn=[$3], usn=[$4])
LogicalUnnest(unnestExpr=[$1], filter=[=($0, 'a')])
LogicalProject(arrayString=[CAST(ARRAY('a':VARCHAR, 'b':VARCHAR)):VARCHAR ARRAY], arrayStringNulls=[$1], uln=[$3], udn=[$4])
LogicalFilter(condition=[OR(=($3, 1), =($4, 2.2))])
LogicalUnnest(unnestExpr=[$2])
LogicalProject(arrayString=[$0], arrayStringNulls=[$1], arrayDoubleNulls=[$3], uln=[$4])
LogicalUnnest(unnestExpr=[$2])
LogicalFilter(condition=[=($0, CAST(ARRAY('a', 'b')):VARCHAR ARRAY NOT NULL)])
LogicalProject(arrayString=[$1], arrayStringNulls=[$2], arrayLongNulls=[$4], arrayDoubleNulls=[$6])
LogicalTableScan(table=[[druid, arrays]])
!logicalPlan
DruidProject(arrayString=[CAST(ARRAY('a':VARCHAR, 'b':VARCHAR)):VARCHAR ARRAY], uln=[$2], udn=[$3], usn=[$4], druid=[logical])
DruidUnnest(unnestExpr=[$1], filter=[=($0, 'a')])
DruidProject(arrayString=[CAST(ARRAY('a':VARCHAR, 'b':VARCHAR)):VARCHAR ARRAY], arrayStringNulls=[$1], uln=[$3], udn=[$4], druid=[logical])
DruidFilter(condition=[OR(=($3, 1), =($4, 2.2))])
DruidUnnest(unnestExpr=[$2])
DruidProject(arrayString=[$0], arrayStringNulls=[$1], arrayDoubleNulls=[$3], uln=[$4], druid=[logical])
DruidUnnest(unnestExpr=[$2])
DruidFilter(condition=[=($0, CAST(ARRAY('a', 'b')):VARCHAR ARRAY NOT NULL)])
DruidProject(arrayString=[$1], arrayStringNulls=[$2], arrayLongNulls=[$4], arrayDoubleNulls=[$6], druid=[logical])
DruidTableScan(table=[[druid, arrays]], druid=[logical])
!druidPlan
{
"queryType" : "scan",
"dataSource" : {
"type" : "unnest",
"base" : {
"type" : "query",
"query" : {
"queryType" : "scan",
"dataSource" : {
"type" : "unnest",
"base" : {
"type" : "unnest",
"base" : {
"type" : "filter",
"base" : {
"type" : "table",
"name" : "arrays"
},
"filter" : {
"type" : "equals",
"column" : "arrayString",
"matchValueType" : "ARRAY<STRING>",
"matchValue" : [ "a", "b" ]
}
},
"virtualColumn" : {
"type" : "expression",
"name" : "j0.unnest",
"expression" : "\"arrayLongNulls\"",
"outputType" : "ARRAY<LONG>"
},
"unnestFilter" : null
},
"virtualColumn" : {
"type" : "expression",
"name" : "_j0.unnest",
"expression" : "\"arrayDoubleNulls\"",
"outputType" : "ARRAY<DOUBLE>"
},
"unnestFilter" : null
},
"intervals" : {
"type" : "intervals",
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
"type" : "expression",
"name" : "v0",
"expression" : "array('a','b')",
"outputType" : "ARRAY<STRING>"
} ],
"resultFormat" : "compactedList",
"filter" : {
"type" : "or",
"fields" : [ {
"type" : "equals",
"column" : "j0.unnest",
"matchValueType" : "LONG",
"matchValue" : 1
}, {
"type" : "equals",
"column" : "_j0.unnest",
"matchValueType" : "DOUBLE",
"matchValue" : 2.2
} ]
},
"columns" : [ "v0", "arrayStringNulls", "j0.unnest", "_j0.unnest" ],
"columnTypes" : [ "ARRAY<STRING>", "ARRAY<STRING>", "LONG", "DOUBLE" ],
"granularity" : {
"type" : "all"
},
"legacy" : false
}
},
"virtualColumn" : {
"type" : "expression",
"name" : "__j0.unnest",
"expression" : "\"arrayStringNulls\"",
"outputType" : "ARRAY<STRING>"
},
"unnestFilter" : {
"type" : "equals",
"column" : "__j0.unnest",
"matchValueType" : "STRING",
"matchValue" : "a"
}
},
"intervals" : {
"type" : "intervals",
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
"type" : "expression",
"name" : "_v0",
"expression" : "array('a','b')",
"outputType" : "ARRAY<STRING>"
} ],
"resultFormat" : "compactedList",
"columns" : [ "_v0", "j0.unnest", "_j0.unnest", "__j0.unnest" ],
"columnTypes" : [ "ARRAY<STRING>", "LONG", "DOUBLE", "STRING" ],
"granularity" : {
"type" : "all"
},
"legacy" : false
}
!nativePlan

View File

@ -20,18 +20,20 @@ SELECT t,d3 FROM (select FLOOR(__time to hour) t, dim3 from druid.numfoo where d
(3 rows)
!ok
LogicalProject(t=[$0], d3=[$3])
LogicalProject(t=[$0], d3=[$2])
LogicalUnnest(unnestExpr=[MV_TO_ARRAY($1)])
LogicalProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$3], __time=[$0])
LogicalFilter(condition=[=($2, 'a')])
LogicalTableScan(table=[[druid, numfoo]])
LogicalProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$2])
LogicalFilter(condition=[=($1, 'a')])
LogicalProject(__time=[$0], dim2=[$2], dim3=[$3])
LogicalTableScan(table=[[druid, numfoo]])
!logicalPlan
DruidProject(t=[$0], d3=[$3], druid=[logical])
DruidProject(t=[$0], d3=[$2], druid=[logical])
DruidUnnest(unnestExpr=[MV_TO_ARRAY($1)])
DruidProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$3], __time=[$0], druid=[logical])
DruidFilter(condition=[=($2, 'a')])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
DruidProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$2], druid=[logical])
DruidFilter(condition=[=($1, 'a')])
DruidProject(__time=[$0], dim2=[$2], dim3=[$3], druid=[logical])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
!druidPlan
{
@ -63,8 +65,8 @@ DruidProject(t=[$0], d3=[$3], druid=[logical])
"matchValueType" : "STRING",
"matchValue" : "a"
},
"columns" : [ "v0", "dim3", "__time" ],
"columnTypes" : [ "LONG", "STRING", "LONG" ],
"columns" : [ "v0", "dim3" ],
"columnTypes" : [ "LONG", "STRING" ],
"granularity" : {
"type" : "all"
},

View File

@ -20,18 +20,20 @@ SELECT t,d3 FROM (select FLOOR(__time to hour) t, dim3 from druid.numfoo where d
(3 rows)
!ok
LogicalProject(t=[$0], d3=[$3])
LogicalProject(t=[$0], d3=[$2])
LogicalUnnest(unnestExpr=[MV_TO_ARRAY($1)])
LogicalProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$3], __time=[$0])
LogicalFilter(condition=[SEARCH($2, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
LogicalTableScan(table=[[druid, numfoo]])
LogicalProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$2])
LogicalFilter(condition=[SEARCH($1, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
LogicalProject(__time=[$0], dim2=[$2], dim3=[$3])
LogicalTableScan(table=[[druid, numfoo]])
!logicalPlan
DruidProject(t=[$0], d3=[$3], druid=[logical])
DruidProject(t=[$0], d3=[$2], druid=[logical])
DruidUnnest(unnestExpr=[MV_TO_ARRAY($1)])
DruidProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$3], __time=[$0], druid=[logical])
DruidFilter(condition=[SEARCH($2, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
DruidProject($f0=[FLOOR($0, FLAG(HOUR))], dim3=[$2], druid=[logical])
DruidFilter(condition=[SEARCH($1, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
DruidProject(__time=[$0], dim2=[$2], dim3=[$3], druid=[logical])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
!druidPlan
{
@ -63,8 +65,8 @@ DruidProject(t=[$0], d3=[$3], druid=[logical])
"matchValueType" : "STRING",
"sortedValues" : [ "a", "b" ]
},
"columns" : [ "v0", "dim3", "__time" ],
"columnTypes" : [ "LONG", "STRING", "LONG" ],
"columns" : [ "v0", "dim3" ],
"columnTypes" : [ "LONG", "STRING" ],
"granularity" : {
"type" : "all"
},