Reset sketch combiner in AggregatorCombiner (#8368)

* Reset union in AggregateCombiner

* Use newer sketch objects for test

* Add empty sketch objects
This commit is contained in:
Atul Mohan 2019-08-23 02:22:40 -05:00 committed by Gian Merlino
parent bfbae76031
commit 661976f266
6 changed files with 105 additions and 0 deletions

View File

@ -107,6 +107,7 @@ public abstract class SketchAggregatorFactory extends AggregatorFactory
public void reset(ColumnValueSelector selector) public void reset(ColumnValueSelector selector)
{ {
union.reset(); union.reset();
combined.invalidateCache();
fold(selector); fold(selector);
} }

View File

@ -146,6 +146,41 @@ public class SketchAggregationTest
); );
} }
@Test
public void testEmptySketchAggregateCombine() throws Exception
{
final String groupByQueryString = readFileFromClasspathAsString("empty_sketch_group_by_query.json");
final GroupByQuery groupByQuery = (GroupByQuery) helper.getObjectMapper()
.readValue(groupByQueryString, Query.class);
final Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(
new File(SketchAggregationTest.class.getClassLoader().getResource("empty_sketch_data.tsv").getFile()),
readFileFromClasspathAsString("empty_sketch_data_record_parser.json"),
readFileFromClasspathAsString("empty_sketch_test_data_aggregators.json"),
0,
Granularities.NONE,
5,
groupByQueryString
);
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
Assert.assertEquals(
ResultRow.fromLegacyRow(
new MapBasedRow(
DateTimes.of("2019-07-14T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_b")
.put("sketch_count", 0.0)
.build()
),
groupByQuery
),
results.get(0)
);
}
@Test @Test
public void testThetaCardinalityOnSimpleColumn() throws Exception public void testThetaCardinalityOnSimpleColumn() throws Exception
{ {

View File

@ -0,0 +1,16 @@
2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ=
2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/EOmCmVerjm4=
2019071401 product_d ZN AQMDAAAezJM=
2019071401 product_a DN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs=
2019071401 product_b ZN AQMDAAAezJM=
2019071401 product_a CN AwEDAAAAAgABAAAAAAAAAP////////9/o31ldGC0E2s=
2019071401 product_a GN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs=
2019071401 product_b ZN AQMDAAAezJM=
2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38=
2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/jDwgknTL/S0=
2019071401 product_d LN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs=
2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/4EyBvXLM/xs=
2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38=
2019071401 product_d ZN AQMDAAAezJM=
2019071401 product_a ZN AwEDAAAAAgABAAAAAAAAAP////////9//mseeN0UsgU=
2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/Rn5CRuhP3h4=
1 2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ=
2 2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/EOmCmVerjm4=
3 2019071401 product_d ZN AQMDAAAezJM=
4 2019071401 product_a DN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs=
5 2019071401 product_b ZN AQMDAAAezJM=
6 2019071401 product_a CN AwEDAAAAAgABAAAAAAAAAP////////9/o31ldGC0E2s=
7 2019071401 product_a GN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs=
8 2019071401 product_b ZN AQMDAAAezJM=
9 2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38=
10 2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/jDwgknTL/S0=
11 2019071401 product_d LN AwEDAAAAAgABAAAAAAAAAP////////9/i3zstpLhWWs=
12 2019071401 product_a SN AwEDAAAAAgABAAAAAAAAAP////////9/4EyBvXLM/xs=
13 2019071401 product_a LN AwEDAAAAAgABAAAAAAAAAP////////9/KKcfz0hRe38=
14 2019071401 product_d ZN AQMDAAAezJM=
15 2019071401 product_a ZN AwEDAAAAAgABAAAAAAAAAP////////9//mseeN0UsgU=
16 2019071401 product_c LN AwEDAAAAAgABAAAAAAAAAP////////9/Rn5CRuhP3h4=

View File

@ -0,0 +1,24 @@
{
"type": "string",
"parseSpec": {
"format": "tsv",
"timestampSpec": {
"column": "timestamp",
"format": "yyyyMMddHH"
},
"dimensionsSpec": {
"dimensions": [
"product",
"product_code"
],
"dimensionExclusions": [],
"spatialDimensions": []
},
"columns": [
"timestamp",
"product",
"product_code",
"product_sketch"
]
}
}

View File

@ -0,0 +1,20 @@
{
"queryType": "groupBy",
"dataSource": "test_datasource",
"granularity":"ALL",
"dimensions": ["product"],
"filter" : {
"type" : "selector", "dimension" : "product", "value" : "product_b"
},
"aggregations": [
{
"type": "thetaSketch",
"name": "sketch_count",
"fieldName": "product_sketch",
"size": 16384
}
],
"intervals": [
"2019-07-14T00:00:00.000Z/2019-07-15T00:00:00.000Z"
]
}

View File

@ -0,0 +1,9 @@
[
{
"type": "thetaSketch",
"name": "product_sketch",
"fieldName": "product_sketch",
"isInputThetaSketch": true,
"size": 16384
}
]