further simplifying the api, users just need to use thetaSketch as aggregator

This commit is contained in:
Himanshu Gupta 2015-11-10 01:04:19 -06:00
parent 88ae3c43f9
commit 338f88b86b
11 changed files with 59 additions and 107 deletions

View File

@ -1,69 +0,0 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.aggregation.datasketches.theta;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.yahoo.sketches.theta.Sketch;
import io.druid.query.aggregation.AggregatorFactory;
import java.util.Arrays;
import java.util.List;
/**
*/
public class SketchBuildAggregatorFactory extends SketchAggregatorFactory
{
private static final byte CACHE_TYPE_ID = 17;
@JsonCreator
public SketchBuildAggregatorFactory(
@JsonProperty("name") String name,
@JsonProperty("fieldName") String fieldName,
@JsonProperty("size") Integer size
)
{
super(name, fieldName, size, CACHE_TYPE_ID);
}
@Override
public AggregatorFactory getCombiningFactory()
{
return new SketchBuildAggregatorFactory(name, name, size);
}
@Override
public Object finalizeComputation(Object object)
{
return ((Sketch) object).getEstimate();
}
@Override
public List<AggregatorFactory> getRequiredColumns()
{
return Arrays.<AggregatorFactory>asList(new SketchBuildAggregatorFactory(fieldName, fieldName, size));
}
@Override
public String getTypeName()
{
return SketchModule.THETA_SKETCH_BUILD_AGG;
}
}

View File

@ -33,29 +33,40 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory
private static final byte CACHE_TYPE_ID = 15;
private final boolean shouldFinalize;
private final boolean isInputThetaSketch;
@JsonCreator
public SketchMergeAggregatorFactory(
@JsonProperty("name") String name,
@JsonProperty("fieldName") String fieldName,
@JsonProperty("size") Integer size,
@JsonProperty("shouldFinalize") Boolean shouldFinalize
@JsonProperty("shouldFinalize") Boolean shouldFinalize,
@JsonProperty("isInputThetaSketch") Boolean isInputThetaSketch
)
{
super(name, fieldName, size, CACHE_TYPE_ID);
this.shouldFinalize = (shouldFinalize == null) ? true : shouldFinalize.booleanValue();
this.isInputThetaSketch = (isInputThetaSketch == null) ? false : isInputThetaSketch.booleanValue();
}
@Override
public List<AggregatorFactory> getRequiredColumns()
{
return Collections.<AggregatorFactory>singletonList(new SketchMergeAggregatorFactory(fieldName, fieldName, size, shouldFinalize));
return Collections.<AggregatorFactory>singletonList(
new SketchMergeAggregatorFactory(
fieldName,
fieldName,
size,
shouldFinalize,
isInputThetaSketch
)
);
}
@Override
public AggregatorFactory getCombiningFactory()
{
return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize);
return new SketchMergeAggregatorFactory(name, name, size, shouldFinalize, isInputThetaSketch);
}
@JsonProperty
@ -64,6 +75,12 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory
return shouldFinalize;
}
@JsonProperty
public boolean getIsInputThetaSketch()
{
return isInputThetaSketch;
}
/**
* Finalize the computation on sketch object and returns estimate from underlying
* sketch.
@ -84,7 +101,11 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory
@Override
public String getTypeName()
{
return SketchModule.THETA_SKETCH_MERGE_AGG;
if (isInputThetaSketch) {
return SketchModule.THETA_SKETCH_MERGE_AGG;
} else {
return SketchModule.THETA_SKETCH_BUILD_AGG;
}
}
@Override
@ -102,7 +123,10 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory
SketchMergeAggregatorFactory that = (SketchMergeAggregatorFactory) o;
return shouldFinalize == that.shouldFinalize;
if (shouldFinalize != that.shouldFinalize) {
return false;
}
return isInputThetaSketch == that.isInputThetaSketch;
}
@ -111,17 +135,19 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory
{
int result = super.hashCode();
result = 31 * result + (shouldFinalize ? 1 : 0);
result = 31 * result + (isInputThetaSketch ? 1 : 0);
return result;
}
@Override
public String toString()
{
return getClass().getSimpleName() + "{"
+ "fieldName='" + fieldName + '\''
+ ", name='" + name + '\''
+ ", size=" + size + '\''
+ ", shouldFinalize=" + shouldFinalize +
+ '}';
return "SketchMergeAggregatorFactory{"
+ "fieldName=" + fieldName
+ ", name=" + name
+ ", size=" + size
+ ",shouldFinalize=" + shouldFinalize
+ ", isInputThetaSketch=" + isInputThetaSketch
+ "}";
}
}

View File

@ -62,8 +62,7 @@ public class SketchModule implements DruidModule
return Arrays.<Module>asList(
new SimpleModule("ThetaSketchModule")
.registerSubtypes(
new NamedType(SketchBuildAggregatorFactory.class, THETA_SKETCH_BUILD_AGG),
new NamedType(SketchMergeAggregatorFactory.class, THETA_SKETCH_MERGE_AGG),
new NamedType(SketchMergeAggregatorFactory.class, THETA_SKETCH),
new NamedType(SketchEstimatePostAggregator.class, THETA_SKETCH_ESTIMATE_POST_AGG),
new NamedType(SketchSetPostAggregator.class, THETA_SKETCH_SET_OP_POST_AGG)
)

View File

@ -21,11 +21,11 @@ package io.druid.query.aggregation.datasketches.theta.oldapi;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.druid.query.aggregation.datasketches.theta.SketchBuildAggregatorFactory;
import io.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory;
/**
*/
public class OldSketchBuildAggregatorFactory extends SketchBuildAggregatorFactory
public class OldSketchBuildAggregatorFactory extends SketchMergeAggregatorFactory
{
@JsonCreator
public OldSketchBuildAggregatorFactory(
@ -34,6 +34,6 @@ public class OldSketchBuildAggregatorFactory extends SketchBuildAggregatorFactor
@JsonProperty("size") Integer size
)
{
super(name, fieldName, size);
super(name, fieldName, size, true, false);
}
}

View File

@ -35,6 +35,6 @@ public class OldSketchMergeAggregatorFactory extends SketchMergeAggregatorFactor
@JsonProperty("shouldFinalize") Boolean shouldFinalize
)
{
super(name, fieldName, size, shouldFinalize);
super(name, fieldName, size, shouldFinalize, true);
}
}

View File

@ -126,9 +126,9 @@ public class SketchAggregationTest
@Test
public void testSketchMergeAggregatorFactorySerde() throws Exception
{
assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null));
assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false));
assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true));
assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null));
assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, false, true));
assertAggregatorFactorySerde(new SketchMergeAggregatorFactory("name", "fieldName", 16, true, false));
}
@Test
@ -136,22 +136,16 @@ public class SketchAggregationTest
{
Sketch sketch = Sketches.updateSketchBuilder().build(128);
SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null);
SketchMergeAggregatorFactory agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, null, null);
Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001);
agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true);
agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, true, null);
Assert.assertEquals(0.0, ((Double) agg.finalizeComputation(sketch)).doubleValue(), 0.0001);
agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false);
agg = new SketchMergeAggregatorFactory("name", "fieldName", 16, false, null);
Assert.assertEquals(sketch, agg.finalizeComputation(sketch));
}
@Test
public void testSketchBuildAggregatorFactorySerde() throws Exception
{
assertAggregatorFactorySerde(new SketchBuildAggregatorFactory("name", "fieldName", 16));
}
private void assertAggregatorFactorySerde(AggregatorFactory agg) throws Exception{
Assert.assertEquals(
agg,

View File

@ -1,11 +1,11 @@
[
{
"type": "thetaSketchBuild",
"type": "thetaSketch",
"name": "pty_country",
"fieldName": "pty_country"
},
{
"type": "thetaSketchBuild",
"type": "thetaSketch",
"name": "non_existing_col_validation",
"fieldName": "non_existing_col"
}

View File

@ -4,8 +4,8 @@
"granularity": "ALL",
"dimensions": [],
"aggregations": [
{ "type": "thetaSketchMerge", "name": "sketch_count", "fieldName": "pty_country", "size": 16384 },
{ "type": "thetaSketchMerge", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 }
{ "type": "thetaSketch", "name": "sketch_count", "fieldName": "pty_country", "size": 16384 },
{ "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 }
],
"postAggregations": [
{

View File

@ -1,4 +1,4 @@
2014102000 product_1 AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ=
2014102000 product_1 AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ
2014102000 product_14 AwEDAAAAAgABAAAAAAAAAP////////9/V3s8Tt0yshQ=
2014102000 product_10 AwEDAAAAAgABAAAAAAAAAP////////9/h3XDlGJinVg=
2014102000 product_7 AwEDAAAAAgABAAAAAAAAAP////////9/srmtgZz8Slg=

1 2014102000 product_1 AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ= AwEDAAAAAgABAAAAAAAAAP////////9/RI5olqYUtnQ
2 2014102000 product_14 AwEDAAAAAgABAAAAAAAAAP////////9/V3s8Tt0yshQ=
3 2014102000 product_10 AwEDAAAAAgABAAAAAAAAAP////////9/h3XDlGJinVg=
4 2014102000 product_7 AwEDAAAAAgABAAAAAAAAAP////////9/srmtgZz8Slg=

View File

@ -1,14 +1,16 @@
[
{
"type": "thetaSketchMerge",
"type": "thetaSketch",
"name": "sids_sketch",
"fieldName": "sketch",
"isInputThetaSketch": true,
"size": 16384
},
{
"type": "thetaSketchMerge",
"type": "thetaSketch",
"name": "non_existing_col_validation",
"fieldName": "non_existing_col",
"isInputThetaSketch": true,
"size": 16384
}
]

View File

@ -4,8 +4,8 @@
"granularity": "ALL",
"dimensions": [],
"aggregations": [
{ "type": "thetaSketchMerge", "name": "sids_sketch_count", "fieldName": "sids_sketch", "size": 16384 },
{ "type": "thetaSketchMerge", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 }
{ "type": "thetaSketch", "name": "sids_sketch_count", "fieldName": "sids_sketch", "size": 16384 },
{ "type": "thetaSketch", "name": "non_existing_col_validation", "fieldName": "non_existing_col", "size": 16384 }
],
"postAggregations": [
{