Hll Sketch and Theta sketch estimate can now be used as an expression (#14312)

* Hll Sketch estimate can now be used as an expression
* Theta sketch estimate now can be used as an expression
This commit is contained in:
Soumyava 2023-06-07 08:44:25 +05:30 committed by GitHub
parent 2d258a95ad
commit 01b22ca022
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 589 additions and 7 deletions

View File

@ -25,7 +25,9 @@ import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.common.annotations.VisibleForTesting;
import com.google.inject.Binder;
import org.apache.datasketches.hll.HllSketch;
import org.apache.druid.guice.ExpressionModule;
import org.apache.druid.initialization.DruidModule;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllPostAggExprMacros;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchEstimateOperatorConversion;
import org.apache.druid.query.aggregation.datasketches.hll.sql.HllSketchEstimateWithErrorBoundsOperatorConversion;
@ -64,6 +66,7 @@ public class HllSketchModule implements DruidModule
SqlBindings.addOperatorConversion(binder, HllSketchSetUnionOperatorConversion.class);
SqlBindings.addOperatorConversion(binder, HllSketchToStringOperatorConversion.class);
ExpressionModule.addExprMacro(binder, HllPostAggExprMacros.HLLSketchEstimateExprMacro.class);
SqlBindings.addApproxCountDistinctChoice(
binder,
HllSketchApproxCountDistinctSqlAggregator.NAME,

View File

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.datasketches.hll.sql;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
import javax.annotation.Nullable;
import java.util.List;
import java.util.stream.Collectors;
public class HllPostAggExprMacros
{
public static final String HLL_SKETCH_ESTIMATE = "hll_sketch_estimate";
public static class HLLSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
{
@Override
public Expr apply(List<Expr> args)
{
validationHelperCheckAnyOfArgumentCount(args, 1, 2);
return new HllSketchEstimateExpr(args);
}
@Override
public String name()
{
return HLL_SKETCH_ESTIMATE;
}
}
public static class HllSketchEstimateExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
private Expr estimateExpr;
private Expr isRound;
public HllSketchEstimateExpr(List<Expr> args)
{
super(HLL_SKETCH_ESTIMATE, args);
this.estimateExpr = args.get(0);
if (args.size() == 2) {
isRound = args.get(1);
}
}
@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return ExpressionType.DOUBLE;
}
@Override
public ExprEval eval(ObjectBinding bindings)
{
boolean round = false;
ExprEval eval = estimateExpr.eval(bindings);
if (isRound != null) {
round = isRound.eval(bindings).asBoolean();
}
final Object valObj = eval.value();
if (valObj == null) {
return ExprEval.of(null);
}
HllSketchHolder h = HllSketchHolder.fromObj(valObj);
double estimate = h.getEstimate();
return round ? ExprEval.of(Math.round(estimate)) : ExprEval.of(estimate);
}
@Override
public Expr visit(Shuttle shuttle)
{
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
return shuttle.visit(new HllSketchEstimateExpr(newArgs));
}
}
}

View File

@ -43,7 +43,7 @@ import java.util.List;
public class HllSketchEstimateOperatorConversion implements SqlOperatorConversion
{
private static final String FUNCTION_NAME = "HLL_SKETCH_ESTIMATE";
private static final String FUNCTION_NAME = "hll_sketch_estimate";
private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME))
.operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.BOOLEAN)
@ -64,7 +64,12 @@ public class HllSketchEstimateOperatorConversion implements SqlOperatorConversio
RexNode rexNode
)
{
return null;
return OperatorConversions.convertDirectCall(
plannerContext,
rowSignature,
rexNode,
FUNCTION_NAME
);
}
@Nullable

View File

@ -24,7 +24,9 @@ import com.fasterxml.jackson.databind.jsontype.NamedType;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.common.annotations.VisibleForTesting;
import com.google.inject.Binder;
import org.apache.druid.guice.ExpressionModule;
import org.apache.druid.initialization.DruidModule;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaPostAggMacros;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchApproxCountDistinctSqlAggregator;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchEstimateOperatorConversion;
import org.apache.druid.query.aggregation.datasketches.theta.sql.ThetaSketchEstimateWithErrorBoundsOperatorConversion;
@ -71,6 +73,7 @@ public class SketchModule implements DruidModule
ThetaSketchApproxCountDistinctSqlAggregator.NAME,
ThetaSketchApproxCountDistinctSqlAggregator.class
);
ExpressionModule.addExprMacro(binder, ThetaPostAggMacros.ThetaSketchEstimateExprMacro.class);
}
@Override

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.datasketches.theta.sql;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.query.aggregation.datasketches.theta.SketchHolder;
import javax.annotation.Nullable;
import java.util.List;
public class ThetaPostAggMacros
{
public static final String THETA_SKETCH_ESTIMATE = "theta_sketch_estimate";
public static class ThetaSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
{
@Override
public Expr apply(List<Expr> args)
{
validationHelperCheckArgumentCount(args, 1);
return new ThetaSketchEstimateExpr(args.get(0));
}
@Override
public String name()
{
return THETA_SKETCH_ESTIMATE;
}
}
public static class ThetaSketchEstimateExpr extends ExprMacroTable.BaseScalarUnivariateMacroFunctionExpr
{
private Expr estimateExpr;
public ThetaSketchEstimateExpr(Expr arg)
{
super(THETA_SKETCH_ESTIMATE, arg);
this.estimateExpr = arg;
}
@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval eval = estimateExpr.eval(bindings);
final Object valObj = eval.value();
if (valObj == null) {
return ExprEval.of(null);
}
if (valObj instanceof SketchHolder) {
SketchHolder thetaSketchHolder = (SketchHolder) valObj;
double estimate = thetaSketchHolder.getEstimate();
return ExprEval.of(estimate);
} else {
throw new IllegalArgumentException("requires a ThetaSketch as the argument");
}
}
@Override
public Expr visit(Shuttle shuttle)
{
return shuttle.visit(new ThetaSketchEstimateExpr(arg));
}
@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return ExpressionType.DOUBLE;
}
}
}

View File

@ -40,7 +40,7 @@ import java.util.List;
public class ThetaSketchEstimateOperatorConversion implements SqlOperatorConversion
{
private static final String FUNCTION_NAME = "THETA_SKETCH_ESTIMATE";
private static final String FUNCTION_NAME = "theta_sketch_estimate";
private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder(StringUtils.toUpperCase(FUNCTION_NAME))
.operandTypes(SqlTypeFamily.ANY)
@ -60,7 +60,7 @@ public class ThetaSketchEstimateOperatorConversion implements SqlOperatorConvers
RexNode rexNode
)
{
return null;
return OperatorConversions.convertDirectCall(plannerContext, rowSignature, rexNode, FUNCTION_NAME);
}
@Nullable

View File

@ -27,6 +27,7 @@ import org.apache.druid.guice.DruidInjectorBuilder;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.Druids;
import org.apache.druid.query.QueryDataSource;
@ -49,9 +50,15 @@ import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregat
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.timeseries.TimeseriesQuery;
import org.apache.druid.query.topn.InvertedTopNMetricSpec;
import org.apache.druid.query.topn.NumericTopNMetricSpec;
import org.apache.druid.query.topn.TopNQueryBuilder;
import org.apache.druid.segment.IndexBuilder;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.ColumnType;
@ -69,6 +76,7 @@ import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
@ -81,6 +89,12 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest
{
private static final boolean ROUND = true;
private static final ExprMacroTable MACRO_TABLE = new ExprMacroTable(
ImmutableList.of(
new HllPostAggExprMacros.HLLSketchEstimateExprMacro()
)
);
@Override
public void gatherProperties(Properties properties)
{
@ -121,6 +135,14 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest
null,
false,
ROUND
),
new HllSketchBuildAggregatorFactory(
"hllsketch_dim3",
"dim3",
null,
null,
false,
false
)
)
.withRollup(false)
@ -129,6 +151,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest
.rows(TestDataBuilder.ROWS1)
.buildMMappedIndex();
return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(
DataSegment.builder()
.dataSource(CalciteTests.DATASOURCE1)
@ -888,4 +911,193 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest
ImmutableList.of(new Object[]{"a", 0L, "0"})
);
}
@Test
public void testHllEstimateAsVirtualColumn()
{
testQuery(
"SELECT"
+ " HLL_SKETCH_ESTIMATE(hllsketch_dim1)"
+ " FROM druid.foo",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(new ExpressionVirtualColumn(
"v0",
"hll_sketch_estimate(\"hllsketch_dim1\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.columns("v0")
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{0.0D},
new Object[]{1.0D},
new Object[]{1.0D},
new Object[]{1.0D},
new Object[]{1.0D},
new Object[]{1.0D}
)
);
}
@Test
public void testHllEstimateAsVirtualColumnWithRound()
{
testQuery(
"SELECT"
+ " HLL_SKETCH_ESTIMATE(hllsketch_dim3, FALSE), HLL_SKETCH_ESTIMATE(hllsketch_dim3, TRUE)"
+ " FROM druid.foo",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(new ExpressionVirtualColumn(
"v0",
"hll_sketch_estimate(\"hllsketch_dim3\",0)",
ColumnType.DOUBLE,
MACRO_TABLE
), new ExpressionVirtualColumn(
"v1",
"hll_sketch_estimate(\"hllsketch_dim3\",1)",
ColumnType.DOUBLE,
MACRO_TABLE
))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.columns("v0", "v1")
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{2.000000004967054D, 2.0D},
new Object[]{2.000000004967054D, 2.0D},
new Object[]{1.0D, 1.0D},
new Object[]{0.0D, 0.0D},
new Object[]{0.0D, 0.0D},
new Object[]{0.0D, 0.0D}
)
);
}
@Test
public void testHllEstimateAsVirtualColumnOnNonHllCol()
{
try {
testQuery(
"SELECT"
+ " HLL_SKETCH_ESTIMATE(dim2)"
+ " FROM druid.foo",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(new ExpressionVirtualColumn(
"v0",
"hll_sketch_estimate(\"dim2\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.columns("v0")
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of()
);
}
catch (IllegalArgumentException e) {
Assert.assertTrue(
e.getMessage().contains("Input byte[] should at least have 2 bytes for base64 bytes")
);
}
}
@Test
public void testHllEstimateAsVirtualColumnWithGroupByOrderBy()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT"
+ " HLL_SKETCH_ESTIMATE(hllsketch_dim1), count(*)"
+ " FROM druid.foo"
+ " GROUP BY 1"
+ " ORDER BY 2 DESC",
ImmutableList.of(
GroupByQuery.builder()
.setInterval(querySegmentSpec(Filtration.eternity()))
.setDataSource(CalciteTests.DATASOURCE1)
.setGranularity(Granularities.ALL)
.setVirtualColumns(new ExpressionVirtualColumn(
"v0",
"hll_sketch_estimate(\"hllsketch_dim1\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.setDimensions(
new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE))
.setAggregatorSpecs(
aggregators(
new CountAggregatorFactory("a0")
)
)
.setLimitSpec(
DefaultLimitSpec
.builder()
.orderBy(
new OrderByColumnSpec(
"a0",
OrderByColumnSpec.Direction.DESCENDING,
StringComparators.NUMERIC
)
)
.build()
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{1.0D, 5L},
new Object[]{0.0D, 1L}
)
);
}
@Test
public void testHllEstimateAsVirtualColumnWithTopN()
{
testQuery(
"SELECT"
+ " HLL_SKETCH_ESTIMATE(hllsketch_dim1), COUNT(*)"
+ " FROM druid.foo"
+ " GROUP BY 1 ORDER BY 2"
+ " LIMIT 2",
ImmutableList.of(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.dimension(new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE))
.virtualColumns(new ExpressionVirtualColumn(
"v0",
"hll_sketch_estimate(\"hllsketch_dim1\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.metric(new InvertedTopNMetricSpec(new NumericTopNMetricSpec("a0")))
.threshold(2)
.aggregators(new CountAggregatorFactory("a0"))
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{0.0D, 1L},
new Object[]{1.0D, 5L}
)
);
}
}

View File

@ -27,6 +27,7 @@ import org.apache.druid.guice.DruidInjectorBuilder;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.Druids;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
@ -44,8 +45,13 @@ import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.topn.DimensionTopNMetricSpec;
import org.apache.druid.query.topn.TopNQueryBuilder;
import org.apache.druid.segment.IndexBuilder;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.ColumnType;
@ -63,6 +69,7 @@ import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
@ -74,6 +81,11 @@ import java.util.Properties;
public class ThetaSketchSqlAggregatorTest extends BaseCalciteQueryTest
{
private static final String DATA_SOURCE = "foo";
private static final ExprMacroTable MACRO_TABLE = new ExprMacroTable(
ImmutableList.of(
new ThetaPostAggMacros.ThetaSketchEstimateExprMacro()
)
);
@Override
public void gatherProperties(Properties properties)
@ -81,7 +93,10 @@ public class ThetaSketchSqlAggregatorTest extends BaseCalciteQueryTest
super.gatherProperties(properties);
// Use APPROX_COUNT_DISTINCT_DS_THETA as APPROX_COUNT_DISTINCT impl for these tests.
properties.put(SqlModule.PROPERTY_SQL_APPROX_COUNT_DISTINCT_CHOICE, ThetaSketchApproxCountDistinctSqlAggregator.NAME);
properties.put(
SqlModule.PROPERTY_SQL_APPROX_COUNT_DISTINCT_CHOICE,
ThetaSketchApproxCountDistinctSqlAggregator.NAME
);
}
@Override
@ -1018,8 +1033,160 @@ public class ThetaSketchSqlAggregatorTest extends BaseCalciteQueryTest
@Test
public void testThetaSketchIntersectOnScalarExpression()
{
assertQueryIsUnplannable("SELECT THETA_SKETCH_INTERSECT(NULL, NULL) FROM foo",
assertQueryIsUnplannable(
"SELECT THETA_SKETCH_INTERSECT(NULL, NULL) FROM foo",
"Possible error: THETA_SKETCH_INTERSECT can only be used on aggregates. " +
"It cannot be used directly on a column or on a scalar expression.");
"It cannot be used directly on a column or on a scalar expression."
);
}
@Test
public void testThetaSketchEstimateAsVirtualColumn()
{
testQuery(
"SELECT"
+ " THETA_SKETCH_ESTIMATE(thetasketch_dim1)"
+ " FROM foo",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(new ExpressionVirtualColumn(
"v0",
"theta_sketch_estimate(\"thetasketch_dim1\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.columns("v0")
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
NullHandling.replaceWithDefault() ? new Object[]{null} : new Object[]{0.0D},
new Object[]{1.0D},
new Object[]{1.0D},
new Object[]{1.0D},
new Object[]{1.0D},
new Object[]{1.0D}
)
);
}
@Test
public void testThetaEstimateAsVirtualColumnOnNonThetaCol()
{
try {
testQuery(
"SELECT"
+ " THETA_SKETCH_ESTIMATE(dim2)"
+ " FROM druid.foo",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(new ExpressionVirtualColumn(
"v0",
"theta_sketch_estimate(\"dim2\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.columns("v0")
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of()
);
}
catch (IllegalArgumentException e) {
Assert.assertTrue(
e.getMessage().contains("requires a ThetaSketch as the argument")
);
}
}
@Test
public void testThetaEstimateAsVirtualColumnWithGroupByOrderBy()
{
skipVectorize();
cannotVectorize();
testQuery(
"SELECT"
+ " THETA_SKETCH_ESTIMATE(thetasketch_dim1), count(*)"
+ " FROM druid.foo"
+ " GROUP BY 1"
+ " ORDER BY 2 DESC",
ImmutableList.of(
GroupByQuery.builder()
.setInterval(querySegmentSpec(Filtration.eternity()))
.setDataSource(CalciteTests.DATASOURCE1)
.setGranularity(Granularities.ALL)
.setVirtualColumns(new ExpressionVirtualColumn(
"v0",
"theta_sketch_estimate(\"thetasketch_dim1\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.setDimensions(
new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE))
.setAggregatorSpecs(
aggregators(
new CountAggregatorFactory("a0")
)
)
.setLimitSpec(
DefaultLimitSpec
.builder()
.orderBy(
new OrderByColumnSpec(
"a0",
OrderByColumnSpec.Direction.DESCENDING,
StringComparators.NUMERIC
)
)
.build()
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{1.0D, 5L},
new Object[]{0.0D, 1L}
)
);
}
@Test
public void testThetaEstimateAsVirtualColumnWithTopN()
{
testQuery(
"SELECT"
+ " THETA_SKETCH_ESTIMATE(thetasketch_dim1)"
+ " FROM druid.foo"
+ " GROUP BY 1 ORDER BY 1"
+ " LIMIT 2",
ImmutableList.of(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.dimension(new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE))
.virtualColumns(new ExpressionVirtualColumn(
"v0",
"theta_sketch_estimate(\"thetasketch_dim1\")",
ColumnType.DOUBLE,
MACRO_TABLE
))
.metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC))
.threshold(2)
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{0.0D},
new Object[]{1.0D}
)
);
}
}