mirror of https://github.com/apache/druid.git
SQL: Clarify approximate distinct count behavior. (#4000)
This commit is contained in:
parent
67d0ae3271
commit
af5a4cce3c
|
@ -104,10 +104,11 @@ You can access table and column metadata through JDBC using `connection.getMetaD
|
||||||
|
|
||||||
The following SQL queries and features may be executed using approximate algorithms:
|
The following SQL queries and features may be executed using approximate algorithms:
|
||||||
|
|
||||||
- `COUNT(DISTINCT col)` and `APPROX_COUNT_DISTINCT(col)` aggregations use
|
- `COUNT(DISTINCT col)` and `APPROX_COUNT_DISTINCT(col)` aggregations by default use
|
||||||
[HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf), a fast approximate distinct counting
|
[HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf), a fast approximate distinct counting
|
||||||
algorithm. If you need exact distinct counts, set "useApproximateCountDistinct" to "false", either through query
|
algorithm. To disable this behavior for `COUNT(DISTINCT col)`, and use exact distinct counts, set
|
||||||
context or through broker configuration.
|
"useApproximateCountDistinct" to "false", either through query context or through broker configuration.
|
||||||
|
`APPROX_COUNT_DISTINCT(col)` is always approximate, regardless of this setting.
|
||||||
- TopN-style queries with a single grouping column, like
|
- TopN-style queries with a single grouping column, like
|
||||||
`SELECT col1, SUM(col2) FROM data_source GROUP BY col1 ORDER BY SUM(col2) DESC LIMIT 100`, by default will be executed
|
`SELECT col1, SUM(col2) FROM data_source GROUP BY col1 ORDER BY SUM(col2) DESC LIMIT 100`, by default will be executed
|
||||||
as [TopN queries](topnquery.html), which use an approximate algorithm. To disable this behavior, and use exact
|
as [TopN queries](topnquery.html), which use an approximate algorithm. To disable this behavior, and use exact
|
||||||
|
|
|
@ -1928,6 +1928,38 @@ public class CalciteQueryTest
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testApproxCountDistinctWhenHllDisabled() throws Exception
|
||||||
|
{
|
||||||
|
// When HLL is disabled, APPROX_COUNT_DISTINCT is still approximate.
|
||||||
|
|
||||||
|
testQuery(
|
||||||
|
PLANNER_CONFIG_NO_HLL,
|
||||||
|
"SELECT APPROX_COUNT_DISTINCT(dim2) FROM druid.foo",
|
||||||
|
ImmutableList.<Query>of(
|
||||||
|
Druids.newTimeseriesQueryBuilder()
|
||||||
|
.dataSource(CalciteTests.DATASOURCE1)
|
||||||
|
.intervals(QSS(Filtration.eternity()))
|
||||||
|
.granularity(Granularities.ALL)
|
||||||
|
.aggregators(
|
||||||
|
AGGS(
|
||||||
|
new CardinalityAggregatorFactory(
|
||||||
|
"a0",
|
||||||
|
null,
|
||||||
|
DIMS(new DefaultDimensionSpec("dim2", null)),
|
||||||
|
false
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.context(TIMESERIES_CONTEXT_DEFAULT)
|
||||||
|
.build()
|
||||||
|
),
|
||||||
|
ImmutableList.of(
|
||||||
|
new Object[]{3L}
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExactCountDistinctWithGroupingAndOtherAggregators() throws Exception
|
public void testExactCountDistinctWithGroupingAndOtherAggregators() throws Exception
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue