From b9679d0884c96b29a48a1f398773d0b788c36240 Mon Sep 17 00:00:00 2001 From: Zoltan Haindrich Date: Thu, 4 Jan 2024 11:03:45 +0100 Subject: [PATCH] Run filter-into-join rule early for subqueries and disable project-filter rule (#15511) FILTER_INTO_JOIN is mainly run along with the other rules with the Volcano planner; however if the query starts highly underdefined (join conditions in the where clauses) that generic query could give a lot of room for the other rules to play around with only enabled it for when the join uses subqueries for its inputs. PROJECT_FILTER rule is not that useful. and could increase planning times by providing new plans. This problem worsened after we started supporting inner joins with arbitrary join conditions in https://github.com/apache/druid/pull/15302 --- .../msq/test/CalciteSelectQueryMSQTest.java | 37 +++++++++++++++++++ .../calcite/planner/CalciteRulesManager.java | 17 ++++++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java index 504999ae45b..fccce2e8402 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectQueryMSQTest.java @@ -21,6 +21,7 @@ package org.apache.druid.msq.test; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import com.google.inject.Module; import org.apache.druid.guice.DruidInjectorBuilder; @@ -194,4 +195,40 @@ public class CalciteSelectQueryMSQTest extends CalciteQueryTest ); } } + + @Test(timeout = 40000) + public void testJoinMultipleTablesWithWhereCondition() + { + testBuilder() + .queryContext( + ImmutableMap.of( + "sqlJoinAlgorithm", "sortMerge" + ) + ) + .sql( + "SELECT f2.dim3,sum(f6.m1 * (1- f6.m2)) FROM" + + " druid.foo as f5, " + + " druid.foo as f6, " + + " druid.numfoo as f7, " + + " druid.foo2 as f2, " + + " druid.numfoo as f3, " + + " druid.foo as f4, " + + " druid.numfoo as f1, " + + " druid.foo2 as f8 " + + "where true" + + " and f1.dim1 = f2.dim2 " + + " and f3.dim1 = f4.dim2 " + + " and f5.dim1 = f6.dim2 " + + " and f7.dim2 = f8.dim3 " + + " and f2.dim1 = f4.dim2 " + + " and f6.dim1 = f8.dim2 " + + " and f1.dim1 = f7.dim2 " + + " and f8.dim2 = 'x' " + + " and f3.__time >= date '2011-11-11' " + + " and f3.__time < date '2013-11-11' " + + "group by 1 " + + "order by 2 desc limit 1001" + ) + .run(); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java index bdcbf9b5149..06c73fa9d3b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java @@ -87,6 +87,8 @@ public class CalciteRulesManager * those functions). * 3) {@link CoreRules#JOIN_COMMUTE}, {@link JoinPushThroughJoinRule#RIGHT}, {@link JoinPushThroughJoinRule#LEFT}, * and {@link CoreRules#FILTER_INTO_JOIN}, which are part of {@link #FANCY_JOIN_RULES}. + * 4) {@link CoreRules#PROJECT_FILTER_TRANSPOSE} because PartialDruidQuery would like to have the Project on top of the Filter - + * this rule could create a lot of non-usefull plans. */ private static final List BASE_RULES = ImmutableList.of( @@ -94,7 +96,6 @@ public class CalciteRulesManager CoreRules.AGGREGATE_PROJECT_STAR_TABLE, CoreRules.PROJECT_MERGE, CoreRules.FILTER_SCAN, - CoreRules.PROJECT_FILTER_TRANSPOSE, CoreRules.FILTER_PROJECT_TRANSPOSE, CoreRules.JOIN_PUSH_EXPRESSIONS, CoreRules.AGGREGATE_EXPAND_WITHIN_DISTINCT, @@ -237,7 +238,7 @@ public class CalciteRulesManager prePrograms.add(new LoggingProgram("Finished decorrelate and trim fields program", isDebug)); prePrograms.add(buildCoalesceProgram()); prePrograms.add(new LoggingProgram("Finished coalesce program", isDebug)); - prePrograms.add(buildHepProgram(REDUCTION_RULES)); + prePrograms.add(buildReductionProgram(plannerContext)); prePrograms.add(new LoggingProgram("Finished expression reduction program", isDebug)); final Program preProgram = Programs.sequence(prePrograms.toArray(new Program[0])); @@ -261,6 +262,18 @@ public class CalciteRulesManager ); } + private Program buildReductionProgram(final PlannerContext plannerContext) + { + List hepRules = new ArrayList(REDUCTION_RULES); + // Apply CoreRules#FILTER_INTO_JOIN early to avoid exploring less optimal plans. + if (plannerContext.getJoinAlgorithm().requiresSubquery()) { + hepRules.add(CoreRules.FILTER_INTO_JOIN); + } + return buildHepProgram( + hepRules + ); + } + private static class LoggingProgram implements Program { private final String stage;