mirror of https://github.com/apache/druid.git
SQL: Clarify approximate distinct count behavior. (#4000)
This commit is contained in:
parent
67d0ae3271
commit
af5a4cce3c
|
@ -104,10 +104,11 @@ You can access table and column metadata through JDBC using `connection.getMetaD
|
|||
|
||||
The following SQL queries and features may be executed using approximate algorithms:
|
||||
|
||||
- `COUNT(DISTINCT col)` and `APPROX_COUNT_DISTINCT(col)` aggregations use
|
||||
- `COUNT(DISTINCT col)` and `APPROX_COUNT_DISTINCT(col)` aggregations by default use
|
||||
[HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf), a fast approximate distinct counting
|
||||
algorithm. If you need exact distinct counts, set "useApproximateCountDistinct" to "false", either through query
|
||||
context or through broker configuration.
|
||||
algorithm. To disable this behavior for `COUNT(DISTINCT col)`, and use exact distinct counts, set
|
||||
"useApproximateCountDistinct" to "false", either through query context or through broker configuration.
|
||||
`APPROX_COUNT_DISTINCT(col)` is always approximate, regardless of this setting.
|
||||
- TopN-style queries with a single grouping column, like
|
||||
`SELECT col1, SUM(col2) FROM data_source GROUP BY col1 ORDER BY SUM(col2) DESC LIMIT 100`, by default will be executed
|
||||
as [TopN queries](topnquery.html), which use an approximate algorithm. To disable this behavior, and use exact
|
||||
|
|
|
@ -1928,6 +1928,38 @@ public class CalciteQueryTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApproxCountDistinctWhenHllDisabled() throws Exception
|
||||
{
|
||||
// When HLL is disabled, APPROX_COUNT_DISTINCT is still approximate.
|
||||
|
||||
testQuery(
|
||||
PLANNER_CONFIG_NO_HLL,
|
||||
"SELECT APPROX_COUNT_DISTINCT(dim2) FROM druid.foo",
|
||||
ImmutableList.<Query>of(
|
||||
Druids.newTimeseriesQueryBuilder()
|
||||
.dataSource(CalciteTests.DATASOURCE1)
|
||||
.intervals(QSS(Filtration.eternity()))
|
||||
.granularity(Granularities.ALL)
|
||||
.aggregators(
|
||||
AGGS(
|
||||
new CardinalityAggregatorFactory(
|
||||
"a0",
|
||||
null,
|
||||
DIMS(new DefaultDimensionSpec("dim2", null)),
|
||||
false
|
||||
)
|
||||
)
|
||||
)
|
||||
.context(TIMESERIES_CONTEXT_DEFAULT)
|
||||
.build()
|
||||
),
|
||||
ImmutableList.of(
|
||||
new Object[]{3L}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExactCountDistinctWithGroupingAndOtherAggregators() throws Exception
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue