From 661976f2665eaad40600632a043f296085554041 Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Fri, 23 Aug 2019 02:22:40 -0500 Subject: [PATCH] Reset sketch combiner in AggregatorCombiner (#8368) * Reset union in AggregateCombiner * Use newer sketch objects for test * Add empty sketch objects --- .../theta/SketchAggregatorFactory.java | 1 + .../theta/SketchAggregationTest.java | 35 +++++++++++++++++++ .../src/test/resources/empty_sketch_data.tsv | 16 +++++++++ .../empty_sketch_data_record_parser.json | 24 +++++++++++++ .../empty_sketch_group_by_query.json | 20 +++++++++++ .../empty_sketch_test_data_aggregators.json | 9 +++++ 6 files changed, 105 insertions(+) create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json create mode 100644 extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index b01cf207f7f..23aba36f289 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -107,6 +107,7 @@ public abstract class SketchAggregatorFactory extends AggregatorFactory public void reset(ColumnValueSelector selector) { union.reset(); + combined.invalidateCache(); fold(selector); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 980a0932edc..e2dd685f04d 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -146,6 +146,41 @@ public class SketchAggregationTest ); } + @Test + public void testEmptySketchAggregateCombine() throws Exception + { + final String groupByQueryString = readFileFromClasspathAsString("empty_sketch_group_by_query.json"); + final GroupByQuery groupByQuery = (GroupByQuery) helper.getObjectMapper() + .readValue(groupByQueryString, Query.class); + + final Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(SketchAggregationTest.class.getClassLoader().getResource("empty_sketch_data.tsv").getFile()), + readFileFromClasspathAsString("empty_sketch_data_record_parser.json"), + readFileFromClasspathAsString("empty_sketch_test_data_aggregators.json"), + 0, + Granularities.NONE, + 5, + groupByQueryString + ); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals( + ResultRow.fromLegacyRow( + new MapBasedRow( + DateTimes.of("2019-07-14T00:00:00.000Z"), + ImmutableMap + .builder() + .put("product", "product_b") + .put("sketch_count", 0.0) + .build() + ), + groupByQuery + ), + results.get(0) + ); + } + @Test public void testThetaCardinalityOnSimpleColumn() throws Exception { diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv new file mode 100644 index 00000000000..e9d175d28e2 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_data.tsv @@ -0,0 +1,16 @@ +2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ= +2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/EOmCmVerjm4= +2019071401 product_d ZN AQMDAAAezJM= +2019071401 product_a DN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= +2019071401 product_b ZN AQMDAAAezJM= +2019071401 product_a CN AwEDAAAAAgABAAAAAAAAAP////////9/o31ldGC0E2s= +2019071401 product_a GN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= +2019071401 product_b ZN AQMDAAAezJM= +2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38= +2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/jDwgknTL/S0= +2019071401 product_d LN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs= +2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/4EyBvXLM/xs= +2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38= +2019071401 product_d ZN AQMDAAAezJM= +2019071401 product_a ZN AwEDAAAAAgABAAAAAAAAAP////////9//mseeN0UsgU= +2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/Rn5CRuhP3h4= diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json b/extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json new file mode 100644 index 00000000000..42a00c3cd93 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_data_record_parser.json @@ -0,0 +1,24 @@ +{ + "type": "string", + "parseSpec": { + "format": "tsv", + "timestampSpec": { + "column": "timestamp", + "format": "yyyyMMddHH" + }, + "dimensionsSpec": { + "dimensions": [ + "product", + "product_code" + ], + "dimensionExclusions": [], + "spatialDimensions": [] + }, + "columns": [ + "timestamp", + "product", + "product_code", + "product_sketch" + ] + } +} diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json b/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json new file mode 100644 index 00000000000..7d2622a7763 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_group_by_query.json @@ -0,0 +1,20 @@ +{ + "queryType": "groupBy", + "dataSource": "test_datasource", + "granularity":"ALL", + "dimensions": ["product"], + "filter" : { + "type" : "selector", "dimension" : "product", "value" : "product_b" + }, + "aggregations": [ + { + "type": "thetaSketch", + "name": "sketch_count", + "fieldName": "product_sketch", + "size": 16384 + } + ], + "intervals": [ + "2019-07-14T00:00:00.000Z/2019-07-15T00:00:00.000Z" + ] +} diff --git a/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json b/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json new file mode 100644 index 00000000000..1d098018363 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/empty_sketch_test_data_aggregators.json @@ -0,0 +1,9 @@ +[ + { + "type": "thetaSketch", + "name": "product_sketch", + "fieldName": "product_sketch", + "isInputThetaSketch": true, + "size": 16384 + } +]