Run filter-into-join rule early for subqueries and disable project-filter rule (#15511)

FILTER_INTO_JOIN is mainly run along with the other rules with the Volcano planner; however if the query starts highly underdefined (join conditions in the where clauses) that generic query could give a lot of room for the other rules to play around with only enabled it for when the join uses subqueries for its inputs. 

PROJECT_FILTER rule is not that useful. and could increase planning times by providing new plans. This problem worsened after we started supporting inner joins with arbitrary join conditions in https://github.com/apache/druid/pull/15302
This commit is contained in:
Zoltan Haindrich 2024-01-04 11:03:45 +01:00 committed by GitHub
parent 5c3391a084
commit b9679d0884
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 2 deletions

View File

@ -21,6 +21,7 @@ package org.apache.druid.msq.test;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.inject.Injector;
import com.google.inject.Module;
import org.apache.druid.guice.DruidInjectorBuilder;
@ -194,4 +195,40 @@ public class CalciteSelectQueryMSQTest extends CalciteQueryTest
);
}
}
@Test(timeout = 40000)
public void testJoinMultipleTablesWithWhereCondition()
{
testBuilder()
.queryContext(
ImmutableMap.of(
"sqlJoinAlgorithm", "sortMerge"
)
)
.sql(
"SELECT f2.dim3,sum(f6.m1 * (1- f6.m2)) FROM"
+ " druid.foo as f5, "
+ " druid.foo as f6, "
+ " druid.numfoo as f7, "
+ " druid.foo2 as f2, "
+ " druid.numfoo as f3, "
+ " druid.foo as f4, "
+ " druid.numfoo as f1, "
+ " druid.foo2 as f8 "
+ "where true"
+ " and f1.dim1 = f2.dim2 "
+ " and f3.dim1 = f4.dim2 "
+ " and f5.dim1 = f6.dim2 "
+ " and f7.dim2 = f8.dim3 "
+ " and f2.dim1 = f4.dim2 "
+ " and f6.dim1 = f8.dim2 "
+ " and f1.dim1 = f7.dim2 "
+ " and f8.dim2 = 'x' "
+ " and f3.__time >= date '2011-11-11' "
+ " and f3.__time < date '2013-11-11' "
+ "group by 1 "
+ "order by 2 desc limit 1001"
)
.run();
}
}

View File

@ -87,6 +87,8 @@ public class CalciteRulesManager
* those functions).
* 3) {@link CoreRules#JOIN_COMMUTE}, {@link JoinPushThroughJoinRule#RIGHT}, {@link JoinPushThroughJoinRule#LEFT},
* and {@link CoreRules#FILTER_INTO_JOIN}, which are part of {@link #FANCY_JOIN_RULES}.
* 4) {@link CoreRules#PROJECT_FILTER_TRANSPOSE} because PartialDruidQuery would like to have the Project on top of the Filter -
* this rule could create a lot of non-usefull plans.
*/
private static final List<RelOptRule> BASE_RULES =
ImmutableList.of(
@ -94,7 +96,6 @@ public class CalciteRulesManager
CoreRules.AGGREGATE_PROJECT_STAR_TABLE,
CoreRules.PROJECT_MERGE,
CoreRules.FILTER_SCAN,
CoreRules.PROJECT_FILTER_TRANSPOSE,
CoreRules.FILTER_PROJECT_TRANSPOSE,
CoreRules.JOIN_PUSH_EXPRESSIONS,
CoreRules.AGGREGATE_EXPAND_WITHIN_DISTINCT,
@ -237,7 +238,7 @@ public class CalciteRulesManager
prePrograms.add(new LoggingProgram("Finished decorrelate and trim fields program", isDebug));
prePrograms.add(buildCoalesceProgram());
prePrograms.add(new LoggingProgram("Finished coalesce program", isDebug));
prePrograms.add(buildHepProgram(REDUCTION_RULES));
prePrograms.add(buildReductionProgram(plannerContext));
prePrograms.add(new LoggingProgram("Finished expression reduction program", isDebug));
final Program preProgram = Programs.sequence(prePrograms.toArray(new Program[0]));
@ -261,6 +262,18 @@ public class CalciteRulesManager
);
}
private Program buildReductionProgram(final PlannerContext plannerContext)
{
List<RelOptRule> hepRules = new ArrayList<RelOptRule>(REDUCTION_RULES);
// Apply CoreRules#FILTER_INTO_JOIN early to avoid exploring less optimal plans.
if (plannerContext.getJoinAlgorithm().requiresSubquery()) {
hepRules.add(CoreRules.FILTER_INTO_JOIN);
}
return buildHepProgram(
hepRules
);
}
private static class LoggingProgram implements Program
{
private final String stage;