remove UnionMergeRule rules from SQL planner (#9797)

This commit is contained in:
Clint Wylie 2020-05-01 12:50:11 -07:00 committed by GitHub
parent 61295bd002
commit 9a293d554d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 237 additions and 7 deletions

View File

@ -70,8 +70,8 @@ import java.util.concurrent.TimeUnit;
*/
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 15)
@Measurement(iterations = 25)
@Warmup(iterations = 5)
@Measurement(iterations = 15)
public class SqlBenchmark
{
static {
@ -148,7 +148,225 @@ public class SqlBenchmark
// 17, 18: GroupBy long, filter by long, unordered; with and without aggregators
"SELECT maxLongUniform FROM foo WHERE maxLongUniform > 10 GROUP BY 1",
"SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo WHERE maxLongUniform > 10 GROUP BY 1"
"SELECT maxLongUniform, SUM(sumLongSequential), COUNT(*) FROM foo WHERE maxLongUniform > 10 GROUP BY 1",
// 19: ultra mega union matrix
"WITH matrix (dimZipf, dimSequential) AS (\n"
+ " (\n"
+ " SELECT '100', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '100'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '110', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '110'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '120', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '120'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '130', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '130'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '140', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '140'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '150', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '150'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '160', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '160'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '170', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '170'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '180', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '180'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '190', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '190'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '200', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '200'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '210', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '210'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '220', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '220'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '230', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '230'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '240', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '240'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '250', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '250'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '260', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '260'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '270', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '270'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '280', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '280'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '290', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '290'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '300', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '300'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '310', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '310'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '320', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '320'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '330', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '330'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '340', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '340'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '350', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '350'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '360', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '360'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '370', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '370'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT '380', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE dimZipf = '380'\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ "UNION ALL\n"
+ " (\n"
+ " SELECT 'other', dimSequential\n"
+ " FROM (SELECT * FROM foo WHERE dimUniform != 1)\n"
+ " WHERE\n"
+ " dimZipf NOT IN (\n"
+ " '100', '110', '120', '130', '140', '150', '160', '170', '180', '190',\n"
+ " '200', '210', '220', '230', '240', '250', '260', '270', '280', '290',\n"
+ " '300', '310', '320', '330', '340', '350', '360', '370', '380'\n"
+ " )\n"
+ " GROUP BY dimSequential\n"
+ " )\n"
+ ")\n"
+ "SELECT * FROM matrix"
);
@Param({"5000000"})
@ -226,4 +444,18 @@ public class SqlBenchmark
blackhole.consume(lastRow);
}
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void planSql(Blackhole blackhole) throws Exception
{
final Map<String, Object> context = ImmutableMap.of("vectorize", vectorize);
final AuthenticationResult authenticationResult = NoopEscalator.getInstance()
.createEscalatedAuthenticationResult();
try (final DruidPlanner planner = plannerFactory.createPlanner(context, ImmutableList.of(), authenticationResult)) {
final PlannerResult plannerResult = planner.plan(QUERIES.get(Integer.parseInt(query)));
blackhole.consume(plannerResult);
}
}
}

View File

@ -63,7 +63,6 @@ import org.apache.calcite.rel.rules.SortRemoveConstantKeysRule;
import org.apache.calcite.rel.rules.SortRemoveRule;
import org.apache.calcite.rel.rules.SortUnionTransposeRule;
import org.apache.calcite.rel.rules.TableScanRule;
import org.apache.calcite.rel.rules.UnionMergeRule;
import org.apache.calcite.rel.rules.UnionPullUpConstantsRule;
import org.apache.calcite.rel.rules.UnionToDistinctRule;
import org.apache.calcite.rel.rules.ValuesReduceRule;
@ -141,6 +140,8 @@ public class Rules
// Rules from RelOptUtil's registerAbstractRules.
// Omit DateRangeRules due to https://issues.apache.org/jira/browse/CALCITE-1601
// Omit UnionMergeRule since it isn't very effective given how Druid unions currently operate and is potentially
// expensive in terms of planning time.
private static final List<RelOptRule> ABSTRACT_RULES =
ImmutableList.of(
AggregateProjectPullUpConstantsRule.INSTANCE2,
@ -155,9 +156,6 @@ public class Rules
PruneEmptyRules.JOIN_LEFT_INSTANCE,
PruneEmptyRules.JOIN_RIGHT_INSTANCE,
PruneEmptyRules.SORT_FETCH_ZERO_INSTANCE,
UnionMergeRule.INSTANCE,
UnionMergeRule.INTERSECT_INSTANCE,
UnionMergeRule.MINUS_INSTANCE,
ProjectToWindowRule.PROJECT,
FilterMergeRule.INSTANCE,
IntersectToDistinctRule.INSTANCE