cleanup/enhance rule order to simplify

This commit is contained in:
Zoltan Haindrich 2024-11-14 09:07:44 +00:00
parent f4d7ec2695
commit fd564fda72
8 changed files with 75 additions and 153 deletions

View File

@ -292,9 +292,11 @@ public class CalciteRulesManager
final HepProgramBuilder builder2 = HepProgram.builder();
builder2.addRuleInstance(new LogicalUnnestRule());
builder2.addRuleInstance(new UnnestInputCleanupRule());
builder2.addRuleInstance(FilterProjectTransposeRule.Config.DEFAULT.toRule());
builder2.addRuleInstance(CoreRules.PROJECT_MERGE);
return Programs.sequence(
Programs.of(builder.build(), true, DefaultRelMetadataProvider.INSTANCE),
new DruidTrimFieldsProgram(true),
new DruidTrimFieldsProgram(false),
Programs.of(builder2.build(), true, DefaultRelMetadataProvider.INSTANCE)
);
}
@ -558,8 +560,11 @@ public class CalciteRulesManager
/** Program that trims fields. */
private static class DruidTrimFieldsProgram implements Program {
private boolean trim;
public DruidTrimFieldsProgram(boolean trim)
{
this.trim = trim;
}
@Override public RelNode run(RelOptPlanner planner, RelNode rel,
@ -568,7 +573,8 @@ public class CalciteRulesManager
List<RelOptLattice> lattices) {
final RelBuilder relBuilder =
RelFactories.LOGICAL_BUILDER.create(rel.getCluster(), null);
return runFieldTrimmer(relBuilder, rel);
RelNode ret = new DruidRelFieldTrimmer(null, relBuilder, trim).trim(rel);
return ret;
}
}
@ -583,7 +589,7 @@ public class CalciteRulesManager
private static RelNode runFieldTrimmer(final RelBuilder relBuilder, final RelNode decorrelatedRel)
{
if(true) {
return new DruidRelFieldTrimmer(null, relBuilder).trim(decorrelatedRel);
return new DruidRelFieldTrimmer(null, relBuilder,true).trim(decorrelatedRel);
} else {
return new RelFieldTrimmer(null, relBuilder).trim(decorrelatedRel);
}

View File

@ -48,12 +48,13 @@ import java.util.Set;
public class DruidRelFieldTrimmer extends RelFieldTrimmer
{
private RelBuilder relBuilder;
private boolean trimTableScan = true;
private boolean trimTableScan;
public DruidRelFieldTrimmer(@Nullable SqlValidator validator, RelBuilder relBuilder)
public DruidRelFieldTrimmer(@Nullable SqlValidator validator, RelBuilder relBuilder, boolean trimTableScan)
{
super(validator, relBuilder);
this.relBuilder = relBuilder;
this.trimTableScan= trimTableScan;
}
@Override

View File

@ -20,15 +20,15 @@ SELECT substring(d3,1) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested
(3 rows)
!ok
LogicalProject(EXPR$0=[SUBSTRING($1, 1)])
LogicalUnnest(unnestExpr=[$0], filter=[<>(SUBSTRING($0, 1), 'b')])
LogicalProject($f17=[MV_TO_ARRAY($3)])
LogicalProject(EXPR$0=[SUBSTRING($2, 1)])
LogicalUnnest(unnestExpr=[MV_TO_ARRAY($1)], filter=[<>(SUBSTRING($0, 1), 'b')])
LogicalProject(__time=[$0], dim3=[$3])
LogicalTableScan(table=[[druid, numfoo]])
!logicalPlan
DruidProject(EXPR$0=[SUBSTRING($1, 1)], druid=[logical])
DruidUnnest(unnestExpr=[$0], filter=[<>(SUBSTRING($0, 1), 'b')])
DruidProject($f17=[MV_TO_ARRAY($3)], druid=[logical])
DruidProject(EXPR$0=[SUBSTRING($2, 1)], druid=[logical])
DruidUnnest(unnestExpr=[MV_TO_ARRAY($1)], filter=[<>(SUBSTRING($0, 1), 'b')])
DruidProject(__time=[$0], dim3=[$3], druid=[logical])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
!druidPlan
@ -37,36 +37,13 @@ DruidProject(EXPR$0=[SUBSTRING($1, 1)], druid=[logical])
"dataSource" : {
"type" : "unnest",
"base" : {
"type" : "query",
"query" : {
"queryType" : "scan",
"dataSource" : {
"type" : "table",
"name" : "numfoo"
},
"intervals" : {
"type" : "intervals",
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
"type" : "expression",
"name" : "v0",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
} ],
"resultFormat" : "compactedList",
"columns" : [ "v0" ],
"columnTypes" : [ "ARRAY<STRING>" ],
"granularity" : {
"type" : "all"
},
"legacy" : false
}
"type" : "table",
"name" : "numfoo"
},
"virtualColumn" : {
"type" : "expression",
"name" : "j0.unnest",
"expression" : "\"v0\"",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
},
"unnestFilter" : {
@ -80,12 +57,12 @@ DruidProject(EXPR$0=[SUBSTRING($1, 1)], druid=[logical])
},
"virtualColumns" : [ {
"type" : "expression",
"name" : "_v0",
"name" : "v0",
"expression" : "substring(\"j0.unnest\", 0, -1)",
"outputType" : "STRING"
} ],
"resultFormat" : "compactedList",
"columns" : [ "_v0" ],
"columns" : [ "v0" ],
"columnTypes" : [ "STRING" ],
"granularity" : {
"type" : "all"

View File

@ -1,5 +1,5 @@
# testUnnestThriceWithFiltersOnDimAndAllUnnestColumnsArrayColumnsOrFilters@NullHandling=sql case-crc:727a851b
# quidem testcase reason: UNNEST_EXTRA_SCAN
# testUnnestThriceWithFiltersOnDimAndAllUnnestColumnsArrayColumnsOrFilters@NullHandling=sql case-crc:8bc6fb6b
# quidem testcase reason: IMPROVED_PLAN
!set debug true
!set defaultTimeout 300000
!set maxScatterGatherBytes 9223372036854775807
@ -32,10 +32,9 @@ LogicalProject(arrayString=[CAST(ARRAY('a':VARCHAR, 'b':VARCHAR)):VARCHAR ARRAY]
LogicalUnnest(unnestExpr=[$1])
LogicalProject(arrayStringNulls=[$0], arrayDoubleNulls=[$2], uln=[$3])
LogicalUnnest(unnestExpr=[$1])
LogicalProject(arrayStringNulls=[$1], arrayLongNulls=[$2], arrayDoubleNulls=[$3])
LogicalFilter(condition=[=($0, CAST(ARRAY('a', 'b')):VARCHAR ARRAY NOT NULL)])
LogicalProject(arrayString=[$1], arrayStringNulls=[$2], arrayLongNulls=[$4], arrayDoubleNulls=[$6])
LogicalTableScan(table=[[druid, arrays]])
LogicalProject(arrayStringNulls=[$2], arrayLongNulls=[$4], arrayDoubleNulls=[$6])
LogicalFilter(condition=[=($1, CAST(ARRAY('a', 'b')):VARCHAR ARRAY NOT NULL)])
LogicalTableScan(table=[[druid, arrays]])
!logicalPlan
DruidProject(arrayString=[CAST(ARRAY('a':VARCHAR, 'b':VARCHAR)):VARCHAR ARRAY], uln=[$1], udn=[$2], usn=[$3], druid=[logical])
@ -45,10 +44,9 @@ DruidProject(arrayString=[CAST(ARRAY('a':VARCHAR, 'b':VARCHAR)):VARCHAR ARRAY],
DruidUnnest(unnestExpr=[$1])
DruidProject(arrayStringNulls=[$0], arrayDoubleNulls=[$2], uln=[$3], druid=[logical])
DruidUnnest(unnestExpr=[$1])
DruidProject(arrayStringNulls=[$1], arrayLongNulls=[$2], arrayDoubleNulls=[$3], druid=[logical])
DruidFilter(condition=[=($0, CAST(ARRAY('a', 'b')):VARCHAR ARRAY NOT NULL)])
DruidProject(arrayString=[$1], arrayStringNulls=[$2], arrayLongNulls=[$4], arrayDoubleNulls=[$6], druid=[logical])
DruidTableScan(table=[[druid, arrays]], druid=[logical])
DruidProject(arrayStringNulls=[$2], arrayLongNulls=[$4], arrayDoubleNulls=[$6], druid=[logical])
DruidFilter(condition=[=($1, CAST(ARRAY('a', 'b')):VARCHAR ARRAY NOT NULL)])
DruidTableScan(table=[[druid, arrays]], druid=[logical])
!druidPlan
{

View File

@ -20,20 +20,18 @@ SELECT t,d3 FROM (select FLOOR(__time to hour) t, dim3 from druid.numfoo where d
(3 rows)
!ok
LogicalProject(t=[$0], d3=[$2])
LogicalUnnest(unnestExpr=[$1])
LogicalProject(t=[FLOOR($0, FLAG(HOUR))], $f2=[MV_TO_ARRAY($2)])
LogicalFilter(condition=[=($1, 'a')])
LogicalProject(__time=[$0], dim2=[$2], dim3=[$3])
LogicalTableScan(table=[[druid, numfoo]])
LogicalProject(t=[$0], d3=[$3])
LogicalUnnest(unnestExpr=[MV_TO_ARRAY($2)])
LogicalProject($f0=[FLOOR($0, FLAG(HOUR))], __time=[$0], dim3=[$3])
LogicalFilter(condition=[=($2, 'a')])
LogicalTableScan(table=[[druid, numfoo]])
!logicalPlan
DruidProject(t=[$0], d3=[$2], druid=[logical])
DruidUnnest(unnestExpr=[$1])
DruidProject(t=[FLOOR($0, FLAG(HOUR))], $f2=[MV_TO_ARRAY($2)], druid=[logical])
DruidFilter(condition=[=($1, 'a')])
DruidProject(__time=[$0], dim2=[$2], dim3=[$3], druid=[logical])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
DruidProject(t=[$0], d3=[$3], druid=[logical])
DruidUnnest(unnestExpr=[MV_TO_ARRAY($2)])
DruidProject($f0=[FLOOR($0, FLAG(HOUR))], __time=[$0], dim3=[$3], druid=[logical])
DruidFilter(condition=[=($2, 'a')])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
!druidPlan
{
@ -57,11 +55,6 @@ DruidProject(t=[$0], d3=[$2], druid=[logical])
"name" : "v0",
"expression" : "timestamp_floor(\"__time\",'PT1H',null,'UTC')",
"outputType" : "LONG"
}, {
"type" : "expression",
"name" : "v1",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
} ],
"resultFormat" : "compactedList",
"filter" : {
@ -70,8 +63,8 @@ DruidProject(t=[$0], d3=[$2], druid=[logical])
"matchValueType" : "STRING",
"matchValue" : "a"
},
"columns" : [ "v0", "v1" ],
"columnTypes" : [ "LONG", "ARRAY<STRING>" ],
"columns" : [ "v0", "__time", "dim3" ],
"columnTypes" : [ "LONG", "LONG", "STRING" ],
"granularity" : {
"type" : "all"
},
@ -81,7 +74,7 @@ DruidProject(t=[$0], d3=[$2], druid=[logical])
"virtualColumn" : {
"type" : "expression",
"name" : "j0.unnest",
"expression" : "\"v1\"",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
},
"unnestFilter" : null

View File

@ -20,20 +20,18 @@ SELECT t,d3 FROM (select FLOOR(__time to hour) t, dim3 from druid.numfoo where d
(3 rows)
!ok
LogicalProject(t=[$0], d3=[$2])
LogicalUnnest(unnestExpr=[$1])
LogicalProject(t=[FLOOR($0, FLAG(HOUR))], $f2=[MV_TO_ARRAY($2)])
LogicalFilter(condition=[SEARCH($1, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
LogicalProject(__time=[$0], dim2=[$2], dim3=[$3])
LogicalTableScan(table=[[druid, numfoo]])
LogicalProject(t=[$0], d3=[$3])
LogicalUnnest(unnestExpr=[MV_TO_ARRAY($2)])
LogicalProject($f0=[FLOOR($0, FLAG(HOUR))], __time=[$0], dim3=[$3])
LogicalFilter(condition=[SEARCH($2, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
LogicalTableScan(table=[[druid, numfoo]])
!logicalPlan
DruidProject(t=[$0], d3=[$2], druid=[logical])
DruidUnnest(unnestExpr=[$1])
DruidProject(t=[FLOOR($0, FLAG(HOUR))], $f2=[MV_TO_ARRAY($2)], druid=[logical])
DruidFilter(condition=[SEARCH($1, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
DruidProject(__time=[$0], dim2=[$2], dim3=[$3], druid=[logical])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
DruidProject(t=[$0], d3=[$3], druid=[logical])
DruidUnnest(unnestExpr=[MV_TO_ARRAY($2)])
DruidProject($f0=[FLOOR($0, FLAG(HOUR))], __time=[$0], dim3=[$3], druid=[logical])
DruidFilter(condition=[SEARCH($2, Sarg['a':VARCHAR, 'b':VARCHAR]:VARCHAR)])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
!druidPlan
{
@ -57,11 +55,6 @@ DruidProject(t=[$0], d3=[$2], druid=[logical])
"name" : "v0",
"expression" : "timestamp_floor(\"__time\",'PT1H',null,'UTC')",
"outputType" : "LONG"
}, {
"type" : "expression",
"name" : "v1",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
} ],
"resultFormat" : "compactedList",
"filter" : {
@ -70,8 +63,8 @@ DruidProject(t=[$0], d3=[$2], druid=[logical])
"matchValueType" : "STRING",
"sortedValues" : [ "a", "b" ]
},
"columns" : [ "v0", "v1" ],
"columnTypes" : [ "LONG", "ARRAY<STRING>" ],
"columns" : [ "v0", "__time", "dim3" ],
"columnTypes" : [ "LONG", "LONG", "STRING" ],
"granularity" : {
"type" : "all"
},
@ -81,7 +74,7 @@ DruidProject(t=[$0], d3=[$2], druid=[logical])
"virtualColumn" : {
"type" : "expression",
"name" : "j0.unnest",
"expression" : "\"v1\"",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
},
"unnestFilter" : null

View File

@ -23,15 +23,15 @@ SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where st
(6 rows)
!ok
LogicalProject(d3=[$1])
LogicalUnnest(unnestExpr=[$0], filter=[OR(<(STRLEN($0), 2), =($0, 'd'))])
LogicalProject($f17=[MV_TO_ARRAY($3)])
LogicalProject(d3=[$2])
LogicalUnnest(unnestExpr=[MV_TO_ARRAY($1)], filter=[OR(<(STRLEN($0), 2), =($0, 'd'))])
LogicalProject(__time=[$0], dim3=[$3])
LogicalTableScan(table=[[druid, numfoo]])
!logicalPlan
DruidProject(d3=[$1], druid=[logical])
DruidUnnest(unnestExpr=[$0], filter=[OR(<(STRLEN($0), 2), =($0, 'd'))])
DruidProject($f17=[MV_TO_ARRAY($3)], druid=[logical])
DruidProject(d3=[$2], druid=[logical])
DruidUnnest(unnestExpr=[MV_TO_ARRAY($1)], filter=[OR(<(STRLEN($0), 2), =($0, 'd'))])
DruidProject(__time=[$0], dim3=[$3], druid=[logical])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
!druidPlan
@ -40,36 +40,13 @@ DruidProject(d3=[$1], druid=[logical])
"dataSource" : {
"type" : "unnest",
"base" : {
"type" : "query",
"query" : {
"queryType" : "scan",
"dataSource" : {
"type" : "table",
"name" : "numfoo"
},
"intervals" : {
"type" : "intervals",
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
"type" : "expression",
"name" : "v0",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
} ],
"resultFormat" : "compactedList",
"columns" : [ "v0" ],
"columnTypes" : [ "ARRAY<STRING>" ],
"granularity" : {
"type" : "all"
},
"legacy" : false
}
"type" : "table",
"name" : "numfoo"
},
"virtualColumn" : {
"type" : "expression",
"name" : "j0.unnest",
"expression" : "\"v0\"",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
},
"unnestFilter" : {

View File

@ -22,15 +22,15 @@ SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3
(5 rows)
!ok
LogicalProject(d3=[$1])
LogicalUnnest(unnestExpr=[$0], filter=[<>($0, 'd')])
LogicalProject($f17=[MV_TO_ARRAY($3)])
LogicalProject(d3=[$2])
LogicalUnnest(unnestExpr=[MV_TO_ARRAY($1)], filter=[<>($0, 'd')])
LogicalProject(__time=[$0], dim3=[$3])
LogicalTableScan(table=[[druid, numfoo]])
!logicalPlan
DruidProject(d3=[$1], druid=[logical])
DruidUnnest(unnestExpr=[$0], filter=[<>($0, 'd')])
DruidProject($f17=[MV_TO_ARRAY($3)], druid=[logical])
DruidProject(d3=[$2], druid=[logical])
DruidUnnest(unnestExpr=[MV_TO_ARRAY($1)], filter=[<>($0, 'd')])
DruidProject(__time=[$0], dim3=[$3], druid=[logical])
DruidTableScan(table=[[druid, numfoo]], druid=[logical])
!druidPlan
@ -39,36 +39,13 @@ DruidProject(d3=[$1], druid=[logical])
"dataSource" : {
"type" : "unnest",
"base" : {
"type" : "query",
"query" : {
"queryType" : "scan",
"dataSource" : {
"type" : "table",
"name" : "numfoo"
},
"intervals" : {
"type" : "intervals",
"intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
},
"virtualColumns" : [ {
"type" : "expression",
"name" : "v0",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
} ],
"resultFormat" : "compactedList",
"columns" : [ "v0" ],
"columnTypes" : [ "ARRAY<STRING>" ],
"granularity" : {
"type" : "all"
},
"legacy" : false
}
"type" : "table",
"name" : "numfoo"
},
"virtualColumn" : {
"type" : "expression",
"name" : "j0.unnest",
"expression" : "\"v0\"",
"expression" : "mv_to_array(\"dim3\")",
"outputType" : "ARRAY<STRING>"
},
"unnestFilter" : {