mirror of https://github.com/apache/druid.git
SQL: Use topN for single-dim queries with LIMIT but no ORDER BY. (#3867)
This commit is contained in:
parent
af93a8d189
commit
bb7c496d88
|
@ -34,6 +34,7 @@ import io.druid.query.groupby.GroupByQuery;
|
|||
import io.druid.query.groupby.having.DimFilterHavingSpec;
|
||||
import io.druid.query.groupby.orderby.DefaultLimitSpec;
|
||||
import io.druid.query.groupby.orderby.OrderByColumnSpec;
|
||||
import io.druid.query.ordering.StringComparators;
|
||||
import io.druid.query.select.PagingSpec;
|
||||
import io.druid.query.select.SelectQuery;
|
||||
import io.druid.query.timeseries.TimeseriesQuery;
|
||||
|
@ -368,18 +369,27 @@ public class DruidQueryBuilder
|
|||
final boolean useApproximateTopN
|
||||
)
|
||||
{
|
||||
// Must have GROUP BY one column, ORDER BY one column, limit less than maxTopNLimit, and no HAVING.
|
||||
if (grouping == null
|
||||
|| grouping.getDimensions().size() != 1
|
||||
|| limitSpec == null
|
||||
|| limitSpec.getColumns().size() != 1
|
||||
|| limitSpec.getLimit() > maxTopNLimit
|
||||
|| having != null) {
|
||||
// Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING.
|
||||
final boolean topNOk = grouping != null
|
||||
&& grouping.getDimensions().size() == 1
|
||||
&& limitSpec != null
|
||||
&& (limitSpec.getColumns().size() <= 1 && limitSpec.getLimit() <= maxTopNLimit)
|
||||
&& having == null;
|
||||
if (!topNOk) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions());
|
||||
final OrderByColumnSpec limitColumn = Iterables.getOnlyElement(limitSpec.getColumns());
|
||||
final OrderByColumnSpec limitColumn;
|
||||
if (limitSpec.getColumns().isEmpty()) {
|
||||
limitColumn = new OrderByColumnSpec(
|
||||
dimensionSpec.getOutputName(),
|
||||
OrderByColumnSpec.Direction.ASCENDING,
|
||||
StringComparators.LEXICOGRAPHIC
|
||||
);
|
||||
} else {
|
||||
limitColumn = Iterables.getOnlyElement(limitSpec.getColumns());
|
||||
}
|
||||
final TopNMetricSpec topNMetricSpec;
|
||||
|
||||
if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
|
||||
|
|
|
@ -1597,6 +1597,31 @@ public class CalciteQueryTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSelectDistinctWithLimit() throws Exception
|
||||
{
|
||||
// Should use topN even if approximate topNs are off, because this query is exact.
|
||||
|
||||
testQuery(
|
||||
"SELECT DISTINCT dim2 FROM druid.foo LIMIT 10",
|
||||
ImmutableList.<Query>of(
|
||||
new TopNQueryBuilder()
|
||||
.dataSource(CalciteTests.DATASOURCE1)
|
||||
.intervals(QSS(Filtration.eternity()))
|
||||
.granularity(QueryGranularities.ALL)
|
||||
.dimension(new DefaultDimensionSpec("dim2", "d0"))
|
||||
.metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC))
|
||||
.threshold(10)
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{""},
|
||||
new Object[]{"a"},
|
||||
new Object[]{"abc"}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountDistinct() throws Exception
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue