From 8276c031c52af5ab1bc8398c28f4fce2f0d72702 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 25 Oct 2021 12:16:21 -0700 Subject: [PATCH] Add druid.sql.approxCountDistinct.function property. (#11181) * Add druid.sql.approxCountDistinct.function property. The new property allows admins to configure the implementation for APPROX_COUNT_DISTINCT and COUNT(DISTINCT expr) in approximate mode. The motivation for adding this setting is to enable site admins to switch the default HLL implementation to DataSketches. For example, an admin can set: druid.sql.approxCountDistinct.function = APPROX_COUNT_DISTINCT_DS_HLL * Fixes * Fix tests. * Remove erroneous cannotVectorize. * Remove unused import. * Remove unused test imports. --- docs/configuration/index.md | 1 + docs/querying/sql.md | 9 +- .../datasketches/hll/HllSketchModule.java | 6 + ...ketchApproxCountDistinctSqlAggregator.java | 3 +- .../datasketches/theta/SketchModule.java | 6 + ...ketchApproxCountDistinctSqlAggregator.java | 3 +- .../hll/sql/HllSketchSqlAggregatorTest.java | 33 ++++-- .../sql/ThetaSketchSqlAggregatorTest.java | 44 ++++--- .../filter/sql/BloomDimFilterSqlTest.java | 7 +- sql/pom.xml | 4 + .../ApproxCountDistinctSqlAggregator.java | 110 ++++++++++++++++++ .../aggregation/SqlAggregationModule.java | 44 ++++++- ...ltinApproxCountDistinctSqlAggregator.java} | 15 ++- .../builtin/CountSqlAggregator.java | 45 ++++--- .../calcite/planner/DruidOperatorTable.java | 8 +- .../druid/sql/guice/ApproxCountDistinct.java | 34 ++++++ .../apache/druid/sql/guice/SqlBindings.java | 20 ++++ .../org/apache/druid/sql/guice/SqlModule.java | 1 + .../druid/sql/calcite/CalciteQueryTest.java | 30 +++++ .../aggregation/SqlAggregationModuleTest.java | 18 ++- .../druid/sql/calcite/util/CalciteTests.java | 11 +- website/.spelling | 1 + 22 files changed, 378 insertions(+), 75 deletions(-) create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java rename sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/{ApproxCountDistinctSqlAggregator.java => BuiltinApproxCountDistinctSqlAggregator.java} (92%) create mode 100644 sql/src/main/java/org/apache/druid/sql/guice/ApproxCountDistinct.java diff --git a/docs/configuration/index.md b/docs/configuration/index.md index c20d801cf59..cc10007dd5c 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -1822,6 +1822,7 @@ The Druid SQL server is configured through the following properties on the Broke |`druid.sql.planner.metadataSegmentCacheEnable`|Whether to keep a cache of published segments in broker. If true, broker polls coordinator in background to get segments from metadata store and maintains a local cache. If false, coordinator's REST API will be invoked when broker needs published segments info.|false| |`druid.sql.planner.metadataSegmentPollPeriod`|How often to poll coordinator for published segments list if `druid.sql.planner.metadataSegmentCacheEnable` is set to true. Poll period is in milliseconds. |60000| |`druid.sql.planner.authorizeSystemTablesDirectly`|If true, Druid authorizes queries against any of the system schema tables (`sys` in SQL) as `SYSTEM_TABLE` resources which require `READ` access, in addition to permissions based content filtering.|false| +|`druid.sql.approxCountDistinct.function`|Implementation to use for the [`APPROX_COUNT_DISTINCT` function](../querying/sql.md#aggregation-functions). Without extensions loaded, the only valid value is `APPROX_COUNT_DISTINCT_BUILTIN` (a HyperLogLog, or HLL, based implementation). If the [DataSketches extension](../development/extensions-core/datasketches-extension.md) is loaded, this can also be `APPROX_COUNT_DISTINCT_DS_HLL` (alternative HLL implementation) or `APPROX_COUNT_DISTINCT_DS_THETA`.

Theta sketches use significantly more memory than HLL sketches, so you should prefer one of the two HLL implementations.|APPROX_COUNT_DISTINCT_BUILTIN| > Previous versions of Druid had properties named `druid.sql.planner.maxQueryCount` and `druid.sql.planner.maxSemiJoinRowsInMemory`. > These properties are no longer available. Since Druid 0.18.0, you can use `druid.server.http.maxSubqueryRows` to control the maximum diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 3d8c50deb9e..8d658e48b4f 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -328,14 +328,15 @@ Only the COUNT, ARRAY_AGG, and STRING_AGG aggregations can accept the DISTINCT k |Function|Notes|Default| |--------|-----|-------| |`COUNT(*)`|Counts the number of rows.|`0`| -|`COUNT(DISTINCT expr)`|Counts distinct values of expr, which can be string, numeric, or hyperUnique. By default this is approximate, using a variant of [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf). To get exact counts set "useApproximateCountDistinct" to "false". If you do this, expr must be string or numeric, since exact counts are not possible using hyperUnique columns. See also `APPROX_COUNT_DISTINCT(expr)`. In exact mode, only one distinct count per query is permitted unless `useGroupingSetForExactDistinct` is set to true in query contexts or broker configurations.|`0`| +|`COUNT(DISTINCT expr)`|Counts distinct values of expr.

When "useApproximateCountDistinct" is set to "true" (the default), this is an alias for APPROX_COUNT_DISTINCT. The specific algorithm that will be used depends on the value of [`druid.sql.approxCountDistinct.function`](../configuration/index.md#sql). In this mode, you can could strings, numbers, or prebuilt sketches. If counting prebuilt sketches, the prebuilt sketch type must match the selected algorithm.

When "useApproximateCountDistinct" is set to "false", the computation will be exact. In this case, expr must be string or numeric, since exact counts are not possible using prebuilt sketches. In exact mode, only one distinct count per query is permitted unless "useGroupingSetForExactDistinct" is enabled.| |`SUM(expr)`|Sums numbers.|`null` if `druid.generic.useDefaultValueForNull=false`, otherwise `0`| |`MIN(expr)`|Takes the minimum of numbers.|`null` if `druid.generic.useDefaultValueForNull=false`, otherwise `9223372036854775807` (maximum LONG value)| |`MAX(expr)`|Takes the maximum of numbers.|`null` if `druid.generic.useDefaultValueForNull=false`, otherwise `-9223372036854775808` (minimum LONG value)| |`AVG(expr)`|Averages numbers.|`null` if `druid.generic.useDefaultValueForNull=false`, otherwise `0`| -|`APPROX_COUNT_DISTINCT(expr)`|_Usage note:_ consider using `APPROX_COUNT_DISTINCT_DS_HLL` instead, which offers better accuracy in many cases.

Counts distinct values of expr, which can be a regular column or a hyperUnique column. This is always approximate, regardless of the value of "useApproximateCountDistinct". This uses Druid's built-in "cardinality" or "hyperUnique" aggregators. See also `COUNT(DISTINCT expr)`.|`0`| -|`APPROX_COUNT_DISTINCT_DS_HLL(expr, [lgK, tgtHllType])`|Counts distinct values of `expr`, which can be a regular column or an [HLL sketch](../development/extensions-core/datasketches-hll.md) column. Results are always approximate, regardless of the value of [`useApproximateCountDistinct`](#connection-context). The `lgK` and `tgtHllType` parameters here are, like the equivalents in the [aggregator](../development/extensions-core/datasketches-hll.md#aggregators), described in the HLL sketch documentation. The [DataSketches extension](../development/extensions-core/datasketches-extension.md) must be loaded to use this function. See also `COUNT(DISTINCT expr)`. |`0`| -|`APPROX_COUNT_DISTINCT_DS_THETA(expr, [size])`|Counts distinct values of expr, which can be a regular column or a [Theta sketch](../development/extensions-core/datasketches-theta.md) column. This is always approximate, regardless of the value of [`useApproximateCountDistinct`](#connection-context). The `size` parameter is described in the Theta sketch documentation. The [DataSketches extension](../development/extensions-core/datasketches-extension.md) must be loaded to use this function. See also `COUNT(DISTINCT expr)`. |`0`| +|`APPROX_COUNT_DISTINCT(expr)`|Counts distinct values of expr using an approximate algorithm. The expr can be a regular column or a prebuilt sketch column.

The specific algorithm that will be used depends on the value of [`druid.sql.approxCountDistinct.function`](../configuration/index.md#sql). By default, this is `APPROX_COUNT_DISTINCT_BUILTIN`. If the [DataSketches extension](../development/extensions-core/datasketches-extension.md) is loaded, this can also be set to `APPROX_COUNT_DISTINCT_DS_HLL` or `APPROX_COUNT_DISTINCT_DS_THETA`.

When run on prebuilt sketch columns, the sketch column type must match the implementation of this function. For example: when `druid.sql.approxCountDistinct.function` is set to `APPROX_COUNT_DISTINCT_BUILTIN`, this function will be able to run on prebuilt hyperUnique columns, but not on prebuilt HLLSketchBuild columns.| +|`APPROX_COUNT_DISTINCT_BUILTIN(expr)`|_Usage note:_ consider using `APPROX_COUNT_DISTINCT_DS_HLL` instead, which offers better accuracy in many cases.

Counts distinct values of expr using Druid's built-in "cardinality" or "hyperUnique" aggregators, which implement a variant of [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf). The expr can be a string, number, or prebuilt hyperUnique column. This is always approximate, regardless of the value of "useApproximateCountDistinct".| +|`APPROX_COUNT_DISTINCT_DS_HLL(expr, [lgK, tgtHllType])`|Counts distinct values of expr, which can be a regular column or an [HLL sketch](../development/extensions-core/datasketches-hll.md) column. Results are always approximate, regardless of the value of [`useApproximateCountDistinct`](#connection-context). The `lgK` and `tgtHllType` parameters here are, like the equivalents in the [aggregator](../development/extensions-core/datasketches-hll.md#aggregators), described in the HLL sketch documentation. The [DataSketches extension](../development/extensions-core/datasketches-extension.md) must be loaded to use this function. See also `COUNT(DISTINCT expr)`. |`0`| +|`APPROX_COUNT_DISTINCT_DS_THETA(expr, [size])`|Counts distinct values of expr, which can be a regular column or a [Theta sketch](../development/extensions-core/datasketches-theta.md) column. Results are always approximate, regardless of the value of [`useApproximateCountDistinct`](#connection-context). The `size` parameter is described in the Theta sketch documentation. The [DataSketches extension](../development/extensions-core/datasketches-extension.md) must be loaded to use this function. See also `COUNT(DISTINCT expr)`. |`0`| |`DS_HLL(expr, [lgK, tgtHllType])`|Creates an [HLL sketch](../development/extensions-core/datasketches-hll.md) on the values of expr, which can be a regular column or a column containing HLL sketches. The `lgK` and `tgtHllType` parameters are described in the HLL sketch documentation. The [DataSketches extension](../development/extensions-core/datasketches-extension.md) must be loaded to use this function.|`'0'` (STRING)| |`DS_THETA(expr, [size])`|Creates a [Theta sketch](../development/extensions-core/datasketches-theta.md) on the values of expr, which can be a regular column or a column containing Theta sketches. The `size` parameter is described in the Theta sketch documentation. The [DataSketches extension](../development/extensions-core/datasketches-extension.md) must be loaded to use this function.|`'0.0'` (STRING)| |`APPROX_QUANTILE(expr, probability, [resolution])`|_Deprecated._ Use `APPROX_QUANTILE_DS` instead, which provides a superior distribution-independent algorithm with formal error guarantees.

Computes approximate quantiles on numeric or [approxHistogram](../development/extensions-core/approximate-histograms.md#approximate-histogram-aggregator) exprs. The "probability" should be between 0 and 1 (exclusive). The "resolution" is the number of centroids to use for the computation. Higher resolutions will give more precise results but also have higher overhead. If not provided, the default resolution is 50. The [approximate histogram extension](../development/extensions-core/approximate-histograms.md) must be loaded to use this function.|`NaN`| diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchModule.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchModule.java index 4e7282b7754..5044068262d 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchModule.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchModule.java @@ -65,6 +65,12 @@ public class HllSketchModule implements DruidModule SqlBindings.addOperatorConversion(binder, HllSketchEstimateWithErrorBoundsOperatorConversion.class); SqlBindings.addOperatorConversion(binder, HllSketchSetUnionOperatorConversion.class); SqlBindings.addOperatorConversion(binder, HllSketchToStringOperatorConversion.class); + + SqlBindings.addApproxCountDistinctChoice( + binder, + HllSketchApproxCountDistinctSqlAggregator.NAME, + HllSketchApproxCountDistinctSqlAggregator.class + ); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java index 1628dd13f11..d47b15a7eaf 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java @@ -36,8 +36,9 @@ import java.util.Collections; public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlAggregator implements SqlAggregator { + public static final String NAME = "APPROX_COUNT_DISTINCT_DS_HLL"; + private static final SqlAggFunction FUNCTION_INSTANCE = new HllSketchApproxCountDistinctSqlAggFunction(); - private static final String NAME = "APPROX_COUNT_DISTINCT_DS_HLL"; @Override public SqlAggFunction calciteFunction() diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchModule.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchModule.java index b3b785c0e06..9d5746533ff 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchModule.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchModule.java @@ -65,6 +65,12 @@ public class SketchModule implements DruidModule SqlBindings.addOperatorConversion(binder, ThetaSketchSetIntersectOperatorConversion.class); SqlBindings.addOperatorConversion(binder, ThetaSketchSetUnionOperatorConversion.class); SqlBindings.addOperatorConversion(binder, ThetaSketchSetNotOperatorConversion.class); + + SqlBindings.addApproxCountDistinctChoice( + binder, + ThetaSketchApproxCountDistinctSqlAggregator.NAME, + ThetaSketchApproxCountDistinctSqlAggregator.class + ); } @Override diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java index e44dc49e991..cac6cdaee21 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java @@ -36,8 +36,9 @@ import java.util.Collections; public class ThetaSketchApproxCountDistinctSqlAggregator extends ThetaSketchBaseSqlAggregator implements SqlAggregator { + public static final String NAME = "APPROX_COUNT_DISTINCT_DS_THETA"; + private static final SqlAggFunction FUNCTION_INSTANCE = new ThetaSketchSqlAggFunction(); - private static final String NAME = "APPROX_COUNT_DISTINCT_DS_THETA"; @Override public SqlAggFunction calciteFunction() diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 1a89b67dd7e..4942c7272c5 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -57,6 +57,8 @@ import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.sql.calcite.BaseCalciteQueryTest; +import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator; +import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.DruidOperatorTable; import org.apache.druid.sql.calcite.util.CalciteTests; @@ -122,10 +124,17 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest @Override public DruidOperatorTable createOperatorTable() { + final HllSketchApproxCountDistinctSqlAggregator approxCountDistinctSqlAggregator = + new HllSketchApproxCountDistinctSqlAggregator(); + return new DruidOperatorTable( ImmutableSet.of( - new HllSketchApproxCountDistinctSqlAggregator(), - new HllSketchObjectSqlAggregator() + approxCountDistinctSqlAggregator, + new HllSketchObjectSqlAggregator(), + + // Use APPROX_COUNT_DISTINCT_DS_HLL as APPROX_COUNT_DISTINCT impl for these tests. + new CountSqlAggregator(new ApproxCountDistinctSqlAggregator(approxCountDistinctSqlAggregator)), + new ApproxCountDistinctSqlAggregator(approxCountDistinctSqlAggregator) ), ImmutableSet.of( new HllSketchSetUnionOperatorConversion(), @@ -142,6 +151,16 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest // Can't vectorize due to SUBSTRING expression. cannotVectorize(); + final String sql = "SELECT\n" + + " SUM(cnt),\n" + + " APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" // uppercase + + " APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" // lowercase; also, filtered + + " APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" // on extractionFn, using generic A.C.D. + + " COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" // on expression, using COUNT DISTINCT + + " APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" // on native HllSketch column + + " APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" // on native HllSketch column + + "FROM druid.foo"; + final List expectedResults; if (NullHandling.replaceWithDefault()) { @@ -155,15 +174,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest } testQuery( - "SELECT\n" - + " SUM(cnt),\n" - + " APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" // uppercase - + " APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" // lowercase; also, filtered - + " APPROX_COUNT_DISTINCT_DS_HLL(SUBSTRING(dim2, 1, 1)),\n" // on extractionFn - + " APPROX_COUNT_DISTINCT_DS_HLL(SUBSTRING(dim2, 1, 1) || 'x'),\n" // on expression - + " APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" // on native HllSketch column - + " APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" // on native HllSketch column - + "FROM druid.foo", + sql, ImmutableList.of( Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 31c43f7f420..5335a8dcef7 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -52,6 +52,8 @@ import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.sql.calcite.BaseCalciteQueryTest; +import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator; +import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.DruidOperatorTable; import org.apache.druid.sql.calcite.util.CalciteTests; @@ -118,10 +120,17 @@ public class ThetaSketchSqlAggregatorTest extends BaseCalciteQueryTest @Override public DruidOperatorTable createOperatorTable() { + final ThetaSketchApproxCountDistinctSqlAggregator approxCountDistinctSqlAggregator = + new ThetaSketchApproxCountDistinctSqlAggregator(); + return new DruidOperatorTable( ImmutableSet.of( new ThetaSketchApproxCountDistinctSqlAggregator(), - new ThetaSketchObjectSqlAggregator() + new ThetaSketchObjectSqlAggregator(), + + // Use APPROX_COUNT_DISTINCT_DS_THETA as APPROX_COUNT_DISTINCT impl for these tests. + new CountSqlAggregator(new ApproxCountDistinctSqlAggregator(approxCountDistinctSqlAggregator)), + new ApproxCountDistinctSqlAggregator(approxCountDistinctSqlAggregator) ), ImmutableSet.of( new ThetaSketchEstimateOperatorConversion(), @@ -133,13 +142,28 @@ public class ThetaSketchSqlAggregatorTest extends BaseCalciteQueryTest ); } - @Test public void testApproxCountDistinctThetaSketch() throws Exception { // Cannot vectorize due to SUBSTRING. cannotVectorize(); + final String sql = "SELECT\n" + + " SUM(cnt),\n" + + " APPROX_COUNT_DISTINCT_DS_THETA(dim2),\n" + // uppercase + + " APPROX_COUNT_DISTINCT_DS_THETA(dim2) FILTER(WHERE dim2 <> ''),\n" + // lowercase; also, filtered + + " APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on extractionFn, using A.C.D. + + " COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on expression, using COUNT DISTINCT + + " APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1, 32768),\n" + // on native theta sketch column + + " APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1)\n" + // on native theta sketch column + + "FROM druid.foo"; + final List expectedResults; if (NullHandling.replaceWithDefault()) { @@ -169,21 +193,7 @@ public class ThetaSketchSqlAggregatorTest extends BaseCalciteQueryTest } testQuery( - "SELECT\n" - + " SUM(cnt),\n" - + " APPROX_COUNT_DISTINCT_DS_THETA(dim2),\n" - // uppercase - + " APPROX_COUNT_DISTINCT_DS_THETA(dim2) FILTER(WHERE dim2 <> ''),\n" - // lowercase; also, filtered - + " APPROX_COUNT_DISTINCT_DS_THETA(SUBSTRING(dim2, 1, 1)),\n" - // on extractionFn - + " APPROX_COUNT_DISTINCT_DS_THETA(SUBSTRING(dim2, 1, 1) || 'x'),\n" - // on expression - + " APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1, 32768),\n" - // on native theta sketch column - + " APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1)\n" - // on native theta sketch column - + "FROM druid.foo", + sql, ImmutableList.of( Druids.newTimeseriesQueryBuilder() .dataSource(CalciteTests.DATASOURCE1) diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java index 97120c22c88..cc1b7c8a445 100644 --- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java +++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java @@ -39,6 +39,9 @@ import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.OrDimFilter; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.sql.calcite.BaseCalciteQueryTest; +import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator; +import org.apache.druid.sql.calcite.aggregation.builtin.BuiltinApproxCountDistinctSqlAggregator; +import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.DruidOperatorTable; import org.apache.druid.sql.calcite.util.CalciteTests; @@ -56,7 +59,9 @@ public class BloomDimFilterSqlTest extends BaseCalciteQueryTest { CalciteTests.getJsonMapper().registerModule(new BloomFilterSerializersModule()); return new DruidOperatorTable( - ImmutableSet.of(), + ImmutableSet.of( + new CountSqlAggregator(new ApproxCountDistinctSqlAggregator(new BuiltinApproxCountDistinctSqlAggregator())) + ), ImmutableSet.of(new BloomFilterOperatorConversion()) ); } diff --git a/sql/pom.xml b/sql/pom.xml index 28c4d0c3cbb..5b402c76210 100644 --- a/sql/pom.xml +++ b/sql/pom.xml @@ -133,6 +133,10 @@ com.google.inject.extensions guice-multibindings + + javax.inject + javax.inject + javax.ws.rs jsr311-api diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java new file mode 100644 index 00000000000..0ff7972657e --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.aggregation; + +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Optionality; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * Implementation for APPROX_COUNT_DISTINCT, and for COUNT(DISTINCT expr) when {@code useApproximateCountDistinct} + * is enabled. + * + * In production, the delegate is chosen based on the value of + * {@link org.apache.druid.sql.guice.SqlModule#PROPERTY_SQL_APPROX_COUNT_DISTINCT_CHOICE}. + */ +public class ApproxCountDistinctSqlAggregator implements SqlAggregator +{ + private static final SqlAggFunction FUNCTION_INSTANCE = new ApproxCountDistinctSqlAggFunction(); + private static final String NAME = "APPROX_COUNT_DISTINCT"; + + private final SqlAggregator delegate; + + public ApproxCountDistinctSqlAggregator(final SqlAggregator delegate) + { + this.delegate = delegate; + } + + @Override + public SqlAggFunction calciteFunction() + { + return FUNCTION_INSTANCE; + } + + @Nullable + @Override + public Aggregation toDruidAggregation( + PlannerContext plannerContext, + RowSignature rowSignature, + VirtualColumnRegistry virtualColumnRegistry, + RexBuilder rexBuilder, + String name, + AggregateCall aggregateCall, + Project project, + List existingAggregations, + boolean finalizeAggregations + ) + { + return delegate.toDruidAggregation( + plannerContext, + rowSignature, + virtualColumnRegistry, + rexBuilder, + name, + aggregateCall, + project, + existingAggregations, + finalizeAggregations + ); + } + + private static class ApproxCountDistinctSqlAggFunction extends SqlAggFunction + { + ApproxCountDistinctSqlAggFunction() + { + super( + NAME, + null, + SqlKind.OTHER_FUNCTION, + ReturnTypes.explicit(SqlTypeName.BIGINT), + InferTypes.VARCHAR_1024, + OperandTypes.ANY, + SqlFunctionCategory.STRING, + false, + false, + Optionality.FORBIDDEN + ); + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModule.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModule.java index 9dc7cdf4de9..bff4d701bfd 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModule.java @@ -20,19 +20,55 @@ package org.apache.druid.sql.calcite.aggregation; import com.google.inject.Binder; +import com.google.inject.Key; import com.google.inject.Module; -import com.google.inject.multibindings.Multibinder; +import com.google.inject.Provides; +import org.apache.druid.guice.PolyBind; +import org.apache.druid.sql.calcite.aggregation.builtin.BuiltinApproxCountDistinctSqlAggregator; +import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; +import org.apache.druid.sql.guice.ApproxCountDistinct; +import org.apache.druid.sql.guice.SqlBindings; +import org.apache.druid.sql.guice.SqlModule; /** * Module that provides SQL aggregations. - * To add an aggregation use {@link org.apache.druid.sql.guice.SqlBindings#addAggregator(Binder, Class)} + * + * To add an aggregation, use {@link SqlBindings#addAggregator}. + * + * To add an implementation option for APPROX_COUNT_DISTINCT, use {@link SqlBindings#addApproxCountDistinctChoice}. */ public class SqlAggregationModule implements Module { @Override public void configure(Binder binder) { - // Add empty SqlAggregator binder. - Multibinder.newSetBinder(binder, SqlAggregator.class); + // Set up APPROX_COUNT_DISTINCT. As an additional bonus effect, this line ensures that the Guice binding + // for SqlAggregator is set up. + SqlBindings.addAggregator(binder, ApproxCountDistinctSqlAggregator.class); + + PolyBind.createChoiceWithDefault( + binder, + SqlModule.PROPERTY_SQL_APPROX_COUNT_DISTINCT_CHOICE, + Key.get(SqlAggregator.class, ApproxCountDistinct.class), + BuiltinApproxCountDistinctSqlAggregator.NAME + ); + + SqlBindings.addApproxCountDistinctChoice( + binder, + BuiltinApproxCountDistinctSqlAggregator.NAME, + BuiltinApproxCountDistinctSqlAggregator.class + ); + + // Set up COUNT. Because it delegates to APPROX_COUNT_DISTINCT in certain cases, it must be added here + // so it can have APPROX_COUNT_DISTINCT injected. + SqlBindings.addAggregator(binder, CountSqlAggregator.class); + } + + @Provides + public ApproxCountDistinctSqlAggregator provideApproxCountDistinctSqlAggregator( + @ApproxCountDistinct SqlAggregator aggregator + ) + { + return new ApproxCountDistinctSqlAggregator(aggregator); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java similarity index 92% rename from sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ApproxCountDistinctSqlAggregator.java rename to sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index 761c8d9f267..453617037d2 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -33,6 +33,7 @@ import org.apache.calcite.sql.type.InferTypes; import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Optionality; import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory; @@ -56,10 +57,11 @@ import javax.annotation.Nullable; import java.util.Collections; import java.util.List; -public class ApproxCountDistinctSqlAggregator implements SqlAggregator +public class BuiltinApproxCountDistinctSqlAggregator implements SqlAggregator { - private static final SqlAggFunction FUNCTION_INSTANCE = new ApproxCountDistinctSqlAggFunction(); - private static final String NAME = "APPROX_COUNT_DISTINCT"; + public static final String NAME = "APPROX_COUNT_DISTINCT_BUILTIN"; + + private static final SqlAggFunction FUNCTION_INSTANCE = new BuiltinApproxCountDistinctSqlAggFunction(); @Override public SqlAggFunction calciteFunction() @@ -136,9 +138,9 @@ public class ApproxCountDistinctSqlAggregator implements SqlAggregator ); } - private static class ApproxCountDistinctSqlAggFunction extends SqlAggFunction + private static class BuiltinApproxCountDistinctSqlAggFunction extends SqlAggFunction { - ApproxCountDistinctSqlAggFunction() + BuiltinApproxCountDistinctSqlAggFunction() { super( NAME, @@ -149,7 +151,8 @@ public class ApproxCountDistinctSqlAggregator implements SqlAggregator OperandTypes.ANY, SqlFunctionCategory.STRING, false, - false + false, + Optionality.FORBIDDEN ); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/CountSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/CountSqlAggregator.java index 442b9ea12d4..9d0c55a3845 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/CountSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/CountSqlAggregator.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.DimFilter; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.Aggregations; +import org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; @@ -42,11 +43,18 @@ import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; import javax.annotation.Nullable; +import javax.inject.Inject; import java.util.List; public class CountSqlAggregator implements SqlAggregator { - private static final ApproxCountDistinctSqlAggregator APPROX_COUNT_DISTINCT = new ApproxCountDistinctSqlAggregator(); + private final ApproxCountDistinctSqlAggregator approxCountDistinctAggregator; + + @Inject + public CountSqlAggregator(ApproxCountDistinctSqlAggregator approxCountDistinctAggregator) + { + this.approxCountDistinctAggregator = approxCountDistinctAggregator; + } @Override public SqlAggFunction calciteFunction() @@ -55,27 +63,27 @@ public class CountSqlAggregator implements SqlAggregator } static AggregatorFactory createCountAggregatorFactory( - final String countName, - final PlannerContext plannerContext, - final RowSignature rowSignature, - final VirtualColumnRegistry virtualColumnRegistry, - final RexBuilder rexBuilder, - final AggregateCall aggregateCall, - final Project project + final String countName, + final PlannerContext plannerContext, + final RowSignature rowSignature, + final VirtualColumnRegistry virtualColumnRegistry, + final RexBuilder rexBuilder, + final AggregateCall aggregateCall, + final Project project ) { final RexNode rexNode = Expressions.fromFieldAccess( - rowSignature, - project, - Iterables.getOnlyElement(aggregateCall.getArgList()) + rowSignature, + project, + Iterables.getOnlyElement(aggregateCall.getArgList()) ); if (rexNode.getType().isNullable()) { final DimFilter nonNullFilter = Expressions.toFilter( - plannerContext, - rowSignature, - virtualColumnRegistry, - rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, ImmutableList.of(rexNode)) + plannerContext, + rowSignature, + virtualColumnRegistry, + rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, ImmutableList.of(rexNode)) ); if (nonNullFilter == null) { @@ -120,12 +128,15 @@ public class CountSqlAggregator implements SqlAggregator } else if (aggregateCall.isDistinct()) { // COUNT(DISTINCT x) if (plannerContext.getPlannerConfig().isUseApproximateCountDistinct()) { - return APPROX_COUNT_DISTINCT.toDruidAggregation( + return approxCountDistinctAggregator.toDruidAggregation( plannerContext, rowSignature, virtualColumnRegistry, rexBuilder, - name, aggregateCall, project, existingAggregations, + name, + aggregateCall, + project, + existingAggregations, finalizeAggregations ); } else { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index f5d9edc28c2..118b6efbc67 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -33,11 +33,10 @@ import org.apache.calcite.sql.validate.SqlNameMatcher; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; -import org.apache.druid.sql.calcite.aggregation.builtin.ApproxCountDistinctSqlAggregator; import org.apache.druid.sql.calcite.aggregation.builtin.ArraySqlAggregator; import org.apache.druid.sql.calcite.aggregation.builtin.AvgSqlAggregator; import org.apache.druid.sql.calcite.aggregation.builtin.BitwiseSqlAggregator; -import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; +import org.apache.druid.sql.calcite.aggregation.builtin.BuiltinApproxCountDistinctSqlAggregator; import org.apache.druid.sql.calcite.aggregation.builtin.EarliestLatestAnySqlAggregator; import org.apache.druid.sql.calcite.aggregation.builtin.GroupingSqlAggregator; import org.apache.druid.sql.calcite.aggregation.builtin.MaxSqlAggregator; @@ -125,11 +124,11 @@ import java.util.stream.Collectors; public class DruidOperatorTable implements SqlOperatorTable { + // COUNT and APPROX_COUNT_DISTINCT are not here because they are added by SqlAggregationModule. private static final List STANDARD_AGGREGATORS = ImmutableList.builder() - .add(new ApproxCountDistinctSqlAggregator()) + .add(new BuiltinApproxCountDistinctSqlAggregator()) .add(new AvgSqlAggregator()) - .add(new CountSqlAggregator()) .add(EarliestLatestAnySqlAggregator.EARLIEST) .add(EarliestLatestAnySqlAggregator.LATEST) .add(EarliestLatestAnySqlAggregator.ANY_VALUE) @@ -145,7 +144,6 @@ public class DruidOperatorTable implements SqlOperatorTable .add(new BitwiseSqlAggregator(BitwiseSqlAggregator.Op.XOR)) .build(); - // STRLEN has so many aliases. private static final SqlOperatorConversion CHARACTER_LENGTH_CONVERSION = new DirectOperatorConversion( SqlStdOperatorTable.CHARACTER_LENGTH, diff --git a/sql/src/main/java/org/apache/druid/sql/guice/ApproxCountDistinct.java b/sql/src/main/java/org/apache/druid/sql/guice/ApproxCountDistinct.java new file mode 100644 index 00000000000..9e479e35e6c --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/guice/ApproxCountDistinct.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.guice; + +import com.google.inject.BindingAnnotation; + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +/** + * Used by {@link SqlBindings#addApproxCountDistinctChoice}. + */ +@BindingAnnotation +@Retention(RetentionPolicy.RUNTIME) +public @interface ApproxCountDistinct +{ +} diff --git a/sql/src/main/java/org/apache/druid/sql/guice/SqlBindings.java b/sql/src/main/java/org/apache/druid/sql/guice/SqlBindings.java index 6694e2513d1..4dcf5165fa0 100644 --- a/sql/src/main/java/org/apache/druid/sql/guice/SqlBindings.java +++ b/sql/src/main/java/org/apache/druid/sql/guice/SqlBindings.java @@ -20,8 +20,10 @@ package org.apache.druid.sql.guice; import com.google.inject.Binder; +import com.google.inject.Key; import com.google.inject.Scopes; import com.google.inject.multibindings.Multibinder; +import org.apache.druid.guice.PolyBind; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; import org.apache.druid.sql.calcite.schema.NamedSchema; @@ -39,6 +41,24 @@ public class SqlBindings Multibinder.newSetBinder(binder, SqlAggregator.class).addBinding().to(aggregatorClass); } + /** + * Add a choice for {@code APPROX_COUNT_DISTINCT} implementation. + * + * The SqlAggregator's {@link SqlAggregator#calciteFunction()} method will be ignored and replaced by the + * version from {@link org.apache.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator}. For sane results, + * the provided aggregator class must be compatible with that function signature. + */ + public static void addApproxCountDistinctChoice( + final Binder binder, + final String name, + final Class clazz + ) + { + PolyBind.optionBinder(binder, Key.get(SqlAggregator.class, ApproxCountDistinct.class)) + .addBinding(name) + .to(clazz); + } + public static void addOperatorConversion( final Binder binder, final Class clazz diff --git a/sql/src/main/java/org/apache/druid/sql/guice/SqlModule.java b/sql/src/main/java/org/apache/druid/sql/guice/SqlModule.java index d513773cd6a..da12e7d0f45 100644 --- a/sql/src/main/java/org/apache/druid/sql/guice/SqlModule.java +++ b/sql/src/main/java/org/apache/druid/sql/guice/SqlModule.java @@ -46,6 +46,7 @@ public class SqlModule implements Module public static final String PROPERTY_SQL_ENABLE_JSON_OVER_HTTP = "druid.sql.http.enable"; public static final String PROPERTY_SQL_ENABLE_AVATICA = "druid.sql.avatica.enable"; public static final String PROPERTY_SQL_VIEW_MANAGER_TYPE = "druid.sql.viewmanager.type"; + public static final String PROPERTY_SQL_APPROX_COUNT_DISTINCT_CHOICE = "druid.sql.approxCountDistinct.function"; @Inject private Properties props; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 91d02f8720b..a632f0eab38 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -7984,6 +7984,36 @@ public class CalciteQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testApproxCountDistinctBuiltin() throws Exception + { + testQuery( + "SELECT APPROX_COUNT_DISTINCT_BUILTIN(dim2) FROM druid.foo", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .aggregators( + aggregators( + new CardinalityAggregatorFactory( + "a0", + null, + dimensions(new DefaultDimensionSpec("dim2", null)), + false, + true + ) + ) + ) + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{3L} + ) + ); + } + @Test public void testExactCountDistinctWithGroupingAndOtherAggregators() throws Exception { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModuleTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModuleTest.java index 2c4d4ef91a2..2c7f24c8311 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModuleTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/aggregation/SqlAggregationModuleTest.java @@ -23,11 +23,16 @@ import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.TypeLiteral; +import org.apache.druid.sql.calcite.aggregation.builtin.CountSqlAggregator; +import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.util.Comparator; +import java.util.List; import java.util.Set; +import java.util.stream.Collectors; public class SqlAggregationModuleTest { @@ -42,10 +47,17 @@ public class SqlAggregationModuleTest } @Test - public void testEmptySqlAggregatorsAreBound() + public void testDefaultSqlAggregatorsAreBound() { - Set sqlAggregators = injector.getInstance(Key.get(new TypeLiteral>(){})); + Set sqlAggregators = injector.getInstance(Key.get(new TypeLiteral>() {})); Assert.assertNotNull(sqlAggregators); - Assert.assertTrue(sqlAggregators.isEmpty()); + Assert.assertEquals(2, sqlAggregators.size()); + + final List aggregators = sqlAggregators.stream() + .sorted(Comparator.comparing(o -> o.getClass().getName())) + .collect(Collectors.toList()); + + Assert.assertThat(aggregators.get(0), CoreMatchers.instanceOf(ApproxCountDistinctSqlAggregator.class)); + Assert.assertThat(aggregators.get(1), CoreMatchers.instanceOf(CountSqlAggregator.class)); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java index b89d017f44c..063844eeba1 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/CalciteTests.java @@ -110,7 +110,7 @@ import org.apache.druid.server.security.Escalator; import org.apache.druid.server.security.NoopEscalator; import org.apache.druid.server.security.ResourceType; import org.apache.druid.sql.SqlLifecycleFactory; -import org.apache.druid.sql.calcite.expression.SqlOperatorConversion; +import org.apache.druid.sql.calcite.aggregation.SqlAggregationModule; import org.apache.druid.sql.calcite.expression.builtin.QueryLookupOperatorConversion; import org.apache.druid.sql.calcite.planner.DruidOperatorTable; import org.apache.druid.sql.calcite.planner.PlannerConfig; @@ -130,6 +130,7 @@ import org.apache.druid.sql.calcite.schema.ViewSchema; import org.apache.druid.sql.calcite.view.DruidViewMacroFactory; import org.apache.druid.sql.calcite.view.NoopViewManager; import org.apache.druid.sql.calcite.view.ViewManager; +import org.apache.druid.sql.guice.SqlBindings; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; import org.easymock.EasyMock; @@ -255,7 +256,9 @@ public class CalciteTests ) ); binder.bind(LookupExtractorFactoryContainerProvider.class).toInstance(lookupProvider); - } + SqlBindings.addOperatorConversion(binder, QueryLookupOperatorConversion.class); + }, + new SqlAggregationModule() ); private static final InputRowParser> PARSER = new MapInputRowParser( @@ -1024,9 +1027,7 @@ public class CalciteTests public static DruidOperatorTable createOperatorTable() { try { - final Set extractionOperators = new HashSet<>(); - extractionOperators.add(INJECTOR.getInstance(QueryLookupOperatorConversion.class)); - return new DruidOperatorTable(ImmutableSet.of(), extractionOperators); + return INJECTOR.getInstance(DruidOperatorTable.class); } catch (Exception e) { throw new RuntimeException(e); diff --git a/website/.spelling b/website/.spelling index 182bf1eeec1..9b298b702dc 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1693,6 +1693,7 @@ GiB 5MiB 8u60 Autoscaler +APPROX_COUNT_DISTINCT_BUILTIN AvaticaConnectionBalancer EventReceiverFirehose File.getFreeSpace