From bb7c496d883d6dc921e6412fc8a2e1493f287704 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 20 Jan 2017 09:59:28 -0800 Subject: [PATCH] SQL: Use topN for single-dim queries with LIMIT but no ORDER BY. (#3867) --- .../sql/calcite/rel/DruidQueryBuilder.java | 26 +++++++++++++------ .../druid/sql/calcite/CalciteQueryTest.java | 25 ++++++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/sql/src/main/java/io/druid/sql/calcite/rel/DruidQueryBuilder.java b/sql/src/main/java/io/druid/sql/calcite/rel/DruidQueryBuilder.java index 1c7ebe73a13..38ea175937e 100644 --- a/sql/src/main/java/io/druid/sql/calcite/rel/DruidQueryBuilder.java +++ b/sql/src/main/java/io/druid/sql/calcite/rel/DruidQueryBuilder.java @@ -34,6 +34,7 @@ import io.druid.query.groupby.GroupByQuery; import io.druid.query.groupby.having.DimFilterHavingSpec; import io.druid.query.groupby.orderby.DefaultLimitSpec; import io.druid.query.groupby.orderby.OrderByColumnSpec; +import io.druid.query.ordering.StringComparators; import io.druid.query.select.PagingSpec; import io.druid.query.select.SelectQuery; import io.druid.query.timeseries.TimeseriesQuery; @@ -368,18 +369,27 @@ public class DruidQueryBuilder final boolean useApproximateTopN ) { - // Must have GROUP BY one column, ORDER BY one column, limit less than maxTopNLimit, and no HAVING. - if (grouping == null - || grouping.getDimensions().size() != 1 - || limitSpec == null - || limitSpec.getColumns().size() != 1 - || limitSpec.getLimit() > maxTopNLimit - || having != null) { + // Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING. + final boolean topNOk = grouping != null + && grouping.getDimensions().size() == 1 + && limitSpec != null + && (limitSpec.getColumns().size() <= 1 && limitSpec.getLimit() <= maxTopNLimit) + && having == null; + if (!topNOk) { return null; } final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions()); - final OrderByColumnSpec limitColumn = Iterables.getOnlyElement(limitSpec.getColumns()); + final OrderByColumnSpec limitColumn; + if (limitSpec.getColumns().isEmpty()) { + limitColumn = new OrderByColumnSpec( + dimensionSpec.getOutputName(), + OrderByColumnSpec.Direction.ASCENDING, + StringComparators.LEXICOGRAPHIC + ); + } else { + limitColumn = Iterables.getOnlyElement(limitSpec.getColumns()); + } final TopNMetricSpec topNMetricSpec; if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) { diff --git a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java index b8bc35bf6ef..1c8249e6afa 100644 --- a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java @@ -1597,6 +1597,31 @@ public class CalciteQueryTest ); } + @Test + public void testSelectDistinctWithLimit() throws Exception + { + // Should use topN even if approximate topNs are off, because this query is exact. + + testQuery( + "SELECT DISTINCT dim2 FROM druid.foo LIMIT 10", + ImmutableList.of( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(QSS(Filtration.eternity())) + .granularity(QueryGranularities.ALL) + .dimension(new DefaultDimensionSpec("dim2", "d0")) + .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) + .threshold(10) + .build() + ), + ImmutableList.of( + new Object[]{""}, + new Object[]{"a"}, + new Object[]{"abc"} + ) + ); + } + @Test public void testCountDistinct() throws Exception {