SQL: Use topN for single-dim queries with LIMIT but no ORDER BY. (#3867)

This commit is contained in:
Gian Merlino 2017-01-20 09:59:28 -08:00 committed by Fangjin Yang
parent af93a8d189
commit bb7c496d88
2 changed files with 43 additions and 8 deletions

View File

@ -34,6 +34,7 @@ import io.druid.query.groupby.GroupByQuery;
import io.druid.query.groupby.having.DimFilterHavingSpec;
import io.druid.query.groupby.orderby.DefaultLimitSpec;
import io.druid.query.groupby.orderby.OrderByColumnSpec;
import io.druid.query.ordering.StringComparators;
import io.druid.query.select.PagingSpec;
import io.druid.query.select.SelectQuery;
import io.druid.query.timeseries.TimeseriesQuery;
@ -368,18 +369,27 @@ public class DruidQueryBuilder
final boolean useApproximateTopN
)
{
// Must have GROUP BY one column, ORDER BY one column, limit less than maxTopNLimit, and no HAVING.
if (grouping == null
|| grouping.getDimensions().size() != 1
|| limitSpec == null
|| limitSpec.getColumns().size() != 1
|| limitSpec.getLimit() > maxTopNLimit
|| having != null) {
// Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING.
final boolean topNOk = grouping != null
&& grouping.getDimensions().size() == 1
&& limitSpec != null
&& (limitSpec.getColumns().size() <= 1 && limitSpec.getLimit() <= maxTopNLimit)
&& having == null;
if (!topNOk) {
return null;
}
final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions());
final OrderByColumnSpec limitColumn = Iterables.getOnlyElement(limitSpec.getColumns());
final OrderByColumnSpec limitColumn;
if (limitSpec.getColumns().isEmpty()) {
limitColumn = new OrderByColumnSpec(
dimensionSpec.getOutputName(),
OrderByColumnSpec.Direction.ASCENDING,
StringComparators.LEXICOGRAPHIC
);
} else {
limitColumn = Iterables.getOnlyElement(limitSpec.getColumns());
}
final TopNMetricSpec topNMetricSpec;
if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {

View File

@ -1597,6 +1597,31 @@ public class CalciteQueryTest
);
}
@Test
public void testSelectDistinctWithLimit() throws Exception
{
// Should use topN even if approximate topNs are off, because this query is exact.
testQuery(
"SELECT DISTINCT dim2 FROM druid.foo LIMIT 10",
ImmutableList.<Query>of(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(QSS(Filtration.eternity()))
.granularity(QueryGranularities.ALL)
.dimension(new DefaultDimensionSpec("dim2", "d0"))
.metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC))
.threshold(10)
.build()
),
ImmutableList.of(
new Object[]{""},
new Object[]{"a"},
new Object[]{"abc"}
)
);
}
@Test
public void testCountDistinct() throws Exception
{