diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBuildAggregatorFactory.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBuildAggregatorFactory.java deleted file mode 100644 index 509f3c0ff22..00000000000 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchBuildAggregatorFactory.java +++ /dev/null @@ -1,69 +0,0 @@ -/* -* Licensed to Metamarkets Group Inc. (Metamarkets) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. Metamarkets licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -*/ - -package io.druid.query.aggregation.datasketches.theta; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.yahoo.sketches.theta.Sketch; -import io.druid.query.aggregation.AggregatorFactory; - -import java.util.Arrays; -import java.util.List; - -/** - */ -public class SketchBuildAggregatorFactory extends SketchAggregatorFactory -{ - private static final byte CACHE_TYPE_ID = 17; - - @JsonCreator - public SketchBuildAggregatorFactory( - @JsonProperty("name") String name, - @JsonProperty("fieldName") String fieldName, - @JsonProperty("size") Integer size - ) - { - super(name, fieldName, size, CACHE_TYPE_ID); - } - - @Override - public AggregatorFactory getCombiningFactory() - { - return new SketchBuildAggregatorFactory(name, name, size); - } - - @Override - public Object finalizeComputation(Object object) - { - return ((Sketch) object).getEstimate(); - } - - @Override - public List getRequiredColumns() - { - return Arrays.asList(new SketchBuildAggregatorFactory(fieldName, fieldName, size)); - } - - @Override - public String getTypeName() - { - return SketchModule.THETA_SKETCH_BUILD_AGG; - } -} diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java index e254b1727f4..dadb1ca15fe 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchMergeAggregatorFactory.java @@ -33,29 +33,40 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory private static final byte CACHE_TYPE_ID = 15; private final boolean shouldFinalize; + private final boolean isInputThetaSketch; @JsonCreator public SketchMergeAggregatorFactory( @JsonProperty("name") String name, @JsonProperty("fieldName") String fieldName, @JsonProperty("size") Integer size, - @JsonProperty("shouldFinalize") Boolean shouldFinalize + @JsonProperty("shouldFinalize") Boolean shouldFinalize, + @JsonProperty("isInputThetaSketch") Boolean isInputThetaSketch ) { super(name, fieldName, size, CACHE_TYPE_ID); this.shouldFinalize = (shouldFinalize == null) ? true : shouldFinalize.booleanValue(); + this.isInputThetaSketch = (isInputThetaSketch == null) ? false : isInputThetaSketch.booleanValue(); } @Override public List getRequiredColumns() { - return Collections.singletonList(new SketchMergeAggregatorFactory(fieldName, fieldName, size, shouldFinalize)); + return Collections.singletonList( + new SketchMergeAggregatorFactory( + fieldName, + fieldName, + size, + shouldFinalize, + isInputThetaSketch + ) + ); } @Override public AggregatorFactory getCombiningFactory() { - return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize); + return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize, isInputThetaSketch); } @JsonProperty @@ -64,6 +75,12 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory return shouldFinalize; } + @JsonProperty + public boolean getIsInputThetaSketch() + { + return isInputThetaSketch; + } + /** * Finalize the computation on sketch object and returns estimate from underlying * sketch. @@ -84,7 +101,11 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory @Override public String getTypeName() { - return SketchModule.THETA_SKETCH_MERGE_AGG; + if (isInputThetaSketch) { + return SketchModule.THETA_SKETCH_MERGE_AGG; + } else { + return SketchModule.THETA_SKETCH_BUILD_AGG; + } } @Override @@ -102,7 +123,10 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory SketchMergeAggregatorFactory that = (SketchMergeAggregatorFactory) o; - return shouldFinalize == that.shouldFinalize; + if (shouldFinalize != that.shouldFinalize) { + return false; + } + return isInputThetaSketch == that.isInputThetaSketch; } @@ -111,17 +135,19 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory { int result = super.hashCode(); result = 31 * result + (shouldFinalize ? 1 : 0); + result = 31 * result + (isInputThetaSketch ? 1 : 0); return result; } @Override public String toString() { - return getClass().getSimpleName() + "{" - + "fieldName='" + fieldName + '\'' - + ", name='" + name + '\'' - + ", size=" + size + '\'' - + ", shouldFinalize=" + shouldFinalize + - + '}'; + return "SketchMergeAggregatorFactory{" + + "fieldName=" + fieldName + + ", name=" + name + + ", size=" + size + + ",shouldFinalize=" + shouldFinalize + + ", isInputThetaSketch=" + isInputThetaSketch + + "}"; } } diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java index c115b793f42..333b7f8a8b4 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java @@ -62,8 +62,7 @@ public class SketchModule implements DruidModule return Arrays.asList( new SimpleModule("ThetaSketchModule") .registerSubtypes( - new NamedType(SketchBuildAggregatorFactory.class, THETA_SKETCH_BUILD_AGG), - new NamedType(SketchMergeAggregatorFactory.class, THETA_SKETCH_MERGE_AGG), + new NamedType(SketchMergeAggregatorFactory.class, THETA_SKETCH), new NamedType(SketchEstimatePostAggregator.class, THETA_SKETCH_ESTIMATE_POST_AGG), new NamedType(SketchSetPostAggregator.class, THETA_SKETCH_SET_OP_POST_AGG) ) diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java index ed5ea477309..5b5bc334699 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchBuildAggregatorFactory.java @@ -21,11 +21,11 @@ package io.druid.query.aggregation.datasketches.theta.oldapi; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; -import io.druid.query.aggregation.datasketches.theta.SketchBuildAggregatorFactory; +import io.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory; /** */ -public class OldSketchBuildAggregatorFactory extends SketchBuildAggregatorFactory +public class OldSketchBuildAggregatorFactory extends SketchMergeAggregatorFactory { @JsonCreator public OldSketchBuildAggregatorFactory( @@ -34,6 +34,6 @@ public class OldSketchBuildAggregatorFactory extends SketchBuildAggregatorFactor @JsonProperty("size") Integer size ) { - super(name, fieldName, size); + super(name, fieldName, size, true, false); } } diff --git a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java index d386889a38c..82beb7a3537 100644 --- a/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java +++ b/extensions/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/oldapi/OldSketchMergeAggregatorFactory.java @@ -35,6 +35,6 @@ public class OldSketchMergeAggregatorFactory extends SketchMergeAggregatorFactor @JsonProperty("shouldFinalize") Boolean shouldFinalize ) { - super(name, fieldName, size, shouldFinalize); + super(name, fieldName, size, shouldFinalize, true); } } diff --git a/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 1dd85c3668e..91385406407 100644 --- a/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -126,9 +126,9 @@ public class SketchAggregationTest @Test public void testSketchMergeAggregatorFactorySerde() throws Exception { - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false)); - assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false, true)); + assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false)); } @Test @@ -136,22 +136,16 @@ public class SketchAggregationTest { Sketch sketch = Sketches.updateSketchBuilder().build(128); - SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null); + SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null); Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null); Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001); - agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false); + agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false, null); Assert.assertEquals(sketch, agg.finalizeComputation(sketch)); } - @Test - public void testSketchBuildAggregatorFactorySerde() throws Exception - { - assertAggregatorFactorySerde(new SketchBuildAggregatorFactory("name", "fieldName", 16)); - } - private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception{ Assert.assertEquals( agg, diff --git a/extensions/datasketches/src/test/resources/simple_test_data_aggregators.json b/extensions/datasketches/src/test/resources/simple_test_data_aggregators.json index 02879448ba9..c98b9718787 100644 --- a/extensions/datasketches/src/test/resources/simple_test_data_aggregators.json +++ b/extensions/datasketches/src/test/resources/simple_test_data_aggregators.json @@ -1,11 +1,11 @@ [ { - "type": "thetaSketchBuild", + "type": "thetaSketch", "name": "pty_country", "fieldName": "pty_country" }, { - "type": "thetaSketchBuild", + "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col" } diff --git a/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json b/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json index 3f808b487ea..1badd1382ba 100644 --- a/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json +++ b/extensions/datasketches/src/test/resources/simple_test_data_group_by_query.json @@ -4,8 +4,8 @@ "granularity": "ALL", "dimensions": [], "aggregations": [ - { "type": "thetaSketchMerge", "name": "sketch_count", "fieldName": "pty_country", "size": 16384 }, - { "type": "thetaSketchMerge", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } + { "type": "thetaSketch", "name": "sketch_count", "fieldName": "pty_country", "size": 16384 }, + { "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } ], "postAggregations": [ { diff --git a/extensions/datasketches/src/test/resources/sketch_test_data.tsv b/extensions/datasketches/src/test/resources/sketch_test_data.tsv index a5a9ab26b20..8180d80647c 100644 --- a/extensions/datasketches/src/test/resources/sketch_test_data.tsv +++ b/extensions/datasketches/src/test/resources/sketch_test_data.tsv @@ -1,4 +1,4 @@ -2014102000 product_1 AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ= +2014102000 product_1 AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ 2014102000 product_14 AwEDAAAAAgABAAAAAAAAAP////////9/V3s8Tt0yshQ= 2014102000 product_10 AwEDAAAAAgABAAAAAAAAAP////////9/h3XDlGJinVg= 2014102000 product_7 AwEDAAAAAgABAAAAAAAAAP////////9/srmtgZz8Slg= diff --git a/extensions/datasketches/src/test/resources/sketch_test_data_aggregators.json b/extensions/datasketches/src/test/resources/sketch_test_data_aggregators.json index 1ea20198125..05cd0baa59b 100644 --- a/extensions/datasketches/src/test/resources/sketch_test_data_aggregators.json +++ b/extensions/datasketches/src/test/resources/sketch_test_data_aggregators.json @@ -1,14 +1,16 @@ [ { - "type": "thetaSketchMerge", + "type": "thetaSketch", "name": "sids_sketch", "fieldName": "sketch", + "isInputThetaSketch": true, "size": 16384 }, { - "type": "thetaSketchMerge", + "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col", + "isInputThetaSketch": true, "size": 16384 } ] diff --git a/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json b/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json index 9c1902fcd58..2a7251ef6db 100644 --- a/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json +++ b/extensions/datasketches/src/test/resources/sketch_test_data_group_by_query.json @@ -4,8 +4,8 @@ "granularity": "ALL", "dimensions": [], "aggregations": [ - { "type": "thetaSketchMerge", "name": "sids_sketch_count", "fieldName": "sids_sketch", "size": 16384 }, - { "type": "thetaSketchMerge", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } + { "type": "thetaSketch", "name": "sids_sketch_count", "fieldName": "sids_sketch", "size": 16384 }, + { "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 } ], "postAggregations": [ {