From 44523a8ce31730031452471e4d265ba778291cf4 Mon Sep 17 00:00:00 2001 From: Zoltan Haindrich Date: Thu, 14 Nov 2024 17:09:08 +0000 Subject: [PATCH] update --- .../hll/sql/HllSketchSqlAggregatorTest.java | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 074955625a6..795cc9b868f 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -289,7 +289,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest new HllSketchBuildAggregatorFactory("hllsketch_m1", "m1", null, null, null, false, ROUND), new HllSketchBuildAggregatorFactory("hllsketch_f1", "f1", null, null, null, false, ROUND), new HllSketchBuildAggregatorFactory("hllsketch_l1", "l1", null, null, null, false, ROUND), - new HllSketchBuildAggregatorFactory("hllsketch_d1", "d1", null, null, null, false, ROUND) + new HllSketchBuildAggregatorFactory("hllsketch_dbl1", "dbl1", null, null, null, false, ROUND) ) .withRollup(false) .build() @@ -502,7 +502,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest GroupByQuery.builder() .setInterval(querySegmentSpec(Filtration.eternity())) .setDataSource(CalciteTests.DATASOURCE1) - .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "_d0"))) + .setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))) .setGranularity(Granularities.ALL) .setAggregatorSpecs( aggregators( @@ -574,7 +574,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .setInterval(new MultipleIntervalSegmentSpec(Collections.singletonList(Filtration.eternity()))) .setGranularity(Granularities.ALL) .setVirtualColumns(VirtualColumns.create(EXPECTED_PA_VIRTUAL_COLUMNS)) - .setDimensions(new DefaultDimensionSpec("cnt", "_d0", ColumnType.LONG)) + .setDimensions(new DefaultDimensionSpec("cnt", "d0", ColumnType.LONG)) .setAggregatorSpecs(EXPECTED_FILTERED_AGGREGATORS) .setPostAggregatorSpecs(EXPECTED_FILTERED_POST_AGGREGATORS) .setContext(QUERY_CONTEXT_DEFAULT) @@ -669,7 +669,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .setInterval(new MultipleIntervalSegmentSpec(Collections.singletonList(Filtration.eternity()))) .setGranularity(Granularities.ALL) .setVirtualColumns(VirtualColumns.create(EXPECTED_PA_VIRTUAL_COLUMNS)) - .setDimensions(new DefaultDimensionSpec("cnt", "_d0", ColumnType.LONG)) + .setDimensions(new DefaultDimensionSpec("cnt", "d0", ColumnType.LONG)) .setAggregatorSpecs(EXPECTED_PA_AGGREGATORS) .setPostAggregatorSpecs(EXPECTED_PA_POST_AGGREGATORS) .setContext(QUERY_CONTEXT_DEFAULT) @@ -950,7 +950,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) - .setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)) + .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) .setAggregatorSpecs( aggregators( new FilteredAggregatorFactory( @@ -1009,7 +1009,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .setDimFilter(equality("dim2", "a", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) - .setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)) + .setDimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)) .setAggregatorSpecs( aggregators( new FilteredAggregatorFactory( @@ -1035,7 +1035,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest testQuery( "SELECT" + " HLL_SKETCH_ESTIMATE(hllsketch_dim1)," - + " HLL_SKETCH_ESTIMATE(hllsketch_d1)," + + " HLL_SKETCH_ESTIMATE(hllsketch_dbl1)," + " HLL_SKETCH_ESTIMATE(hllsketch_l1)," + " HLL_SKETCH_ESTIMATE(hllsketch_f1)" + " FROM druid.foo", @@ -1045,7 +1045,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .intervals(querySegmentSpec(Filtration.eternity())) .virtualColumns( makeSketchEstimateExpression("v0", "hllsketch_dim1"), - makeSketchEstimateExpression("v1", "hllsketch_d1"), + makeSketchEstimateExpression("v1", "hllsketch_dbl1"), makeSketchEstimateExpression("v2", "hllsketch_l1"), makeSketchEstimateExpression("v3", "hllsketch_f1") ) @@ -1155,7 +1155,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .setGranularity(Granularities.ALL) .setVirtualColumns(makeSketchEstimateExpression("v0", "hllsketch_dim1")) .setDimensions( - new DefaultDimensionSpec("v0", "_d0", ColumnType.DOUBLE)) + new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE)) .setAggregatorSpecs( aggregators( new CountAggregatorFactory("a0") @@ -1197,7 +1197,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .dataSource(CalciteTests.DATASOURCE1) .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) - .dimension(new DefaultDimensionSpec("v0", "_d0", ColumnType.DOUBLE)) + .dimension(new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE)) .virtualColumns(makeSketchEstimateExpression("v0", "hllsketch_dim1")) .metric(new InvertedTopNMetricSpec(new NumericTopNMetricSpec("a0"))) .threshold(2) @@ -1266,9 +1266,9 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest /** * This is an extremely subtle test, so we explain with a comment. The `m1` column in the input data looks like - * `["1.0", "2.0", "3.0", "4.0", "5.0", "6.0"]` while the `d1` column looks like - * `[1.0, 1.7, 0.0]`. That is, "m1" is numbers-as-strings, while d1 is numbers-as-numbers. If you take the - * uniques across both columns, you expect no overlap, so 9 entries. However, if the `1.0` from `d1` gets + * `["1.0", "2.0", "3.0", "4.0", "5.0", "6.0"]` while the `dbl1` column looks like + * `[1.0, 1.7, 0.0]`. That is, "m1" is numbers-as-strings, while dbl1 is numbers-as-numbers. If you take the + * uniques across both columns, you expect no overlap, so 9 entries. However, if the `1.0` from `dbl1` gets * converted into `"1.0"` or vice-versa, the result can become 8 because then the sketch will hash the same * value multiple times considering them duplicates. This test validates that the aggregator properly builds * the sketches preserving the initial type of the data as it came in. Specifically, the test was added when @@ -1280,7 +1280,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest { testQuery( "SELECT" - + " HLL_SKETCH_ESTIMATE(HLL_SKETCH_UNION(DS_HLL(hllsketch_d1), DS_HLL(hllsketch_m1)), true)" + + " HLL_SKETCH_ESTIMATE(HLL_SKETCH_UNION(DS_HLL(hllsketch_dbl1), DS_HLL(hllsketch_m1)), true)" + " FROM druid.foo", ImmutableList.of( Druids.newTimeseriesQueryBuilder() @@ -1288,7 +1288,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .aggregators( - new HllSketchMergeAggregatorFactory("a0", "hllsketch_d1", null, null, null, false, true), + new HllSketchMergeAggregatorFactory("a0", "hllsketch_dbl1", null, null, null, false, true), new HllSketchMergeAggregatorFactory("a1", "hllsketch_m1", null, null, null, false, true) ) .postAggregators( @@ -1320,7 +1320,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest * This is a test in a similar vein to {@link #testEstimateStringAndDoubleAreDifferent()} except here we are * ensuring that float values and doubles values are considered equivalent. The expected initial inputs were *

- * 1. d1 -> [1.0, 1.7, 0.0] + * 1. dbl1 -> [1.0, 1.7, 0.0] * 2. f1 -> [1.0f, 0.1f, 0.0f] *

* If we assume that doubles and floats are the same, that means that there are 4 unique values, not 6 @@ -1331,7 +1331,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest // This is a test in a similar vein to testEstimateStringAndDoubleAreDifferent above testQuery( "SELECT" - + " HLL_SKETCH_ESTIMATE(HLL_SKETCH_UNION(DS_HLL(hllsketch_d1), DS_HLL(hllsketch_f1)), true)" + + " HLL_SKETCH_ESTIMATE(HLL_SKETCH_UNION(DS_HLL(hllsketch_dbl1), DS_HLL(hllsketch_f1)), true)" + " FROM druid.foo", ImmutableList.of( Druids.newTimeseriesQueryBuilder() @@ -1339,7 +1339,7 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest .intervals(querySegmentSpec(Filtration.eternity())) .granularity(Granularities.ALL) .aggregators( - new HllSketchMergeAggregatorFactory("a0", "hllsketch_d1", null, null, null, false, true), + new HllSketchMergeAggregatorFactory("a0", "hllsketch_dbl1", null, null, null, false, true), new HllSketchMergeAggregatorFactory("a1", "hllsketch_f1", null, null, null, false, true) ) .postAggregators( @@ -1372,8 +1372,8 @@ public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest // this query was not planable: https://github.com/apache/druid/issues/15353 testBuilder() .sql( - "SELECT d1,dim2,APPROX_COUNT_DISTINCT_DS_HLL(dim2, 18) as val" - + " FROM (select d1,dim1,dim2 from druid.foo group by d1,dim1,dim2 order by dim1 limit 3) t " + "SELECT dbl1,dim2,APPROX_COUNT_DISTINCT_DS_HLL(dim2, 18) as val" + + " FROM (select dbl1,dim1,dim2 from druid.foo group by dbl1,dim1,dim2 order by dim1 limit 3) t " + " group by 1,2" ) .expectedResults(