From 90f9932bd3e0f643ca6637cbc3bb89c9ce0ed085 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 24 Mar 2017 19:20:01 -0700 Subject: [PATCH] SQL: Rule to collapse sort chains. (#4085) Useful for queries like `SELECT * FROM (...) LIMIT X`, where the inner query has an order by or limit in it. --- .../io/druid/sql/calcite/planner/Rules.java | 3 + .../sql/calcite/rule/SortCollapseRule.java | 87 +++++++++++++++++++ .../druid/sql/calcite/CalciteQueryTest.java | 63 +++++++++++++- 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 sql/src/main/java/io/druid/sql/calcite/rule/SortCollapseRule.java diff --git a/sql/src/main/java/io/druid/sql/calcite/planner/Rules.java b/sql/src/main/java/io/druid/sql/calcite/planner/Rules.java index f6d2d34286d..b26cfcd96f0 100644 --- a/sql/src/main/java/io/druid/sql/calcite/planner/Rules.java +++ b/sql/src/main/java/io/druid/sql/calcite/planner/Rules.java @@ -28,6 +28,7 @@ import io.druid.sql.calcite.rule.DruidSemiJoinRule; import io.druid.sql.calcite.rule.DruidTableScanRule; import io.druid.sql.calcite.rule.GroupByRules; import io.druid.sql.calcite.rule.SelectRules; +import io.druid.sql.calcite.rule.SortCollapseRule; import org.apache.calcite.interpreter.Bindables; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.volcano.AbstractConverter; @@ -213,6 +214,8 @@ public class Rules rules.add(DruidRelToBindableRule.instance()); } + rules.add(SortCollapseRule.instance()); + // Druid-specific rules. rules.add(new DruidTableScanRule(queryMaker)); rules.add(new DruidFilterRule(operatorTable)); diff --git a/sql/src/main/java/io/druid/sql/calcite/rule/SortCollapseRule.java b/sql/src/main/java/io/druid/sql/calcite/rule/SortCollapseRule.java new file mode 100644 index 00000000000..ae958bac7a5 --- /dev/null +++ b/sql/src/main/java/io/druid/sql/calcite/rule/SortCollapseRule.java @@ -0,0 +1,87 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rex.RexLiteral; + +/** + * Collapses two adjacent Sort operations together. Useful for queries like + * {@code SELECT * FROM (SELECT DISTINCT dim2 FROM druid.foo ORDER BY dim2) LIMIT 10}. + */ +public class SortCollapseRule extends RelOptRule +{ + private static final SortCollapseRule INSTANCE = new SortCollapseRule(); + + public SortCollapseRule() + { + super(operand(Sort.class, operand(Sort.class, any()))); + } + + public static SortCollapseRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(final RelOptRuleCall call) + { + // First is the inner sort, second is the outer sort. + final Sort first = call.rel(1); + final Sort second = call.rel(0); + + if (second.collation.getFieldCollations().isEmpty()) { + // Add up the offsets. + final int firstOffset = (first.offset != null ? RexLiteral.intValue(first.offset) : 0); + final int secondOffset = (second.offset != null ? RexLiteral.intValue(second.offset) : 0); + + final int fetch; + + if (first.fetch == null && second.fetch == null) { + // Neither has a limit => no limit overall. + fetch = -1; + } else if (first.fetch == null) { + // Outer limit only. + fetch = RexLiteral.intValue(second.fetch); + } else if (second.fetch == null) { + // Inner limit only. + fetch = Math.max(0, RexLiteral.intValue(first.fetch) - secondOffset); + } else { + fetch = Math.max( + 0, + Math.min( + RexLiteral.intValue(first.fetch) - secondOffset, + RexLiteral.intValue(second.fetch) + ) + ); + } + + final RelNode combined = call.builder() + .push(first.getInput()) + .sortLimit(firstOffset + secondOffset, fetch, first.getChildExps()) + .build(); + + call.transformTo(combined); + } + } +} diff --git a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java index c088c93bbd2..12b7ee2097c 100644 --- a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java @@ -740,7 +740,8 @@ public class CalciteQueryTest "SELECT CHARACTER_LENGTH(dim1) + 1 FROM druid.foo GROUP BY CHARACTER_LENGTH(dim1) + 1", // Group by math "SELECT COUNT(*) FROM druid.foo x, druid.foo y", // Self-join "SELECT SUBSTRING(dim1, 2) FROM druid.foo GROUP BY dim1", // Project a dimension from GROUP BY - "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY SUBSTRING(dim1, 2)" // ORDER BY projection + "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY SUBSTRING(dim1, 2)", // ORDER BY projection + "SELECT DISTINCT dim2 FROM druid.foo ORDER BY dim2 LIMIT 2 OFFSET 5" // DISTINCT with OFFSET ); for (final String query : queries) { @@ -2004,6 +2005,66 @@ public class CalciteQueryTest ); } + @Test + public void testSelectDistinctWithSortAsOuterQuery() throws Exception + { + testQuery( + "SELECT * FROM (SELECT DISTINCT dim2 FROM druid.foo ORDER BY dim2) LIMIT 10", + ImmutableList.of( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(QSS(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(10) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{""}, + new Object[]{"a"}, + new Object[]{"abc"} + ) + ); + } + + @Test + public void testSelectDistinctWithSortAsOuterQuery2() throws Exception + { + testQuery( + "SELECT * FROM (SELECT DISTINCT dim2 FROM druid.foo ORDER BY dim2 LIMIT 5) LIMIT 10", + ImmutableList.of( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(QSS(Filtration.eternity())) + .granularity(Granularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(5) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{""}, + new Object[]{"a"}, + new Object[]{"abc"} + ) + ); + } + + @Test + public void testSelectDistinctWithSortAsOuterQuery3() throws Exception + { + // Query reduces to LIMIT 0. + + testQuery( + "SELECT * FROM (SELECT DISTINCT dim2 FROM druid.foo ORDER BY dim2 LIMIT 2 OFFSET 5) OFFSET 2", + ImmutableList.of(), + ImmutableList.of() + ); + } + @Test public void testCountDistinct() throws Exception {