Merge pull request #2710 from himanshug/fix_sketch_estimate_comparator

fix SketchEstimate post aggregator's getComparator() and test changes to verify same
This commit is contained in:
Fangjin Yang 2016-03-24 13:34:32 -07:00
commit d4a96843f9
4 changed files with 2430 additions and 2290 deletions

View File

@ -22,7 +22,9 @@ package io.druid.query.aggregation.datasketches.theta;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.common.primitives.Doubles;
import com.yahoo.sketches.theta.Sketch;
import io.druid.query.aggregation.PostAggregator;
@ -58,9 +60,23 @@ public class SketchEstimatePostAggregator implements PostAggregator
}
@Override
public Comparator<Sketch> getComparator()
public Comparator getComparator()
{
return SketchAggregatorFactory.COMPARATOR;
if (errorBoundsStdDev == null) {
return Ordering.natural();
} else {
return new Comparator()
{
@Override
public int compare(Object o1, Object o2)
{
return Doubles.compare(
((SketchEstimateWithErrorBounds) o1).getEstimate(),
((SketchEstimateWithErrorBounds) o2).getEstimate()
);
}
};
}
}
@Override

View File

@ -19,6 +19,7 @@
package io.druid.query.aggregation.datasketches.theta;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
@ -77,21 +78,76 @@ public class SketchAggregationTest
);
List<Row> results = Sequences.toList(seq, Lists.<Row>newArrayList());
Assert.assertEquals(1, results.size());
Assert.assertEquals(5, results.size());
Assert.assertEquals(
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("sketch_count", 50.0)
.put("sketchEstimatePostAgg", 50.0)
.put("sketchUnionPostAggEstimate", 50.0)
.put("sketchIntersectionPostAggEstimate", 50.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
ImmutableList.of(
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_3")
.put("sketch_count", 38.0)
.put("sketchEstimatePostAgg", 38.0)
.put("sketchUnionPostAggEstimate", 38.0)
.put("sketchIntersectionPostAggEstimate", 38.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_1")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_2")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_4")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_5")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
)
),
results.get(0)
results
);
}
@ -118,7 +174,7 @@ public class SketchAggregationTest
Result<SelectResultValue> result = (Result<SelectResultValue>) Iterables.getOnlyElement(Sequences.toList(seq, Lists.newArrayList()));
Assert.assertEquals(new DateTime("2014-10-20T00:00:00.000Z"), result.getTimestamp());
Assert.assertEquals(100, result.getValue().getEvents().size());
Assert.assertEquals("AgMDAAAazJMBAAAAAACAPzz9j7pWTMdR", result.getValue().getEvents().get(0).getEvent().get("pty_country"));
Assert.assertEquals("AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=", result.getValue().getEvents().get(0).getEvent().get("pty_country"));
}
@Test
@ -176,21 +232,76 @@ public class SketchAggregationTest
);
List<Row> results = Sequences.toList(seq, Lists.<Row>newArrayList());
Assert.assertEquals(1, results.size());
Assert.assertEquals(5, results.size());
Assert.assertEquals(
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("sketch_count", 50.0)
.put("sketchEstimatePostAgg", 50.0)
.put("sketchUnionPostAggEstimate", 50.0)
.put("sketchIntersectionPostAggEstimate", 50.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
ImmutableList.of(
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_3")
.put("sketch_count", 38.0)
.put("sketchEstimatePostAgg", 38.0)
.put("sketchUnionPostAggEstimate", 38.0)
.put("sketchIntersectionPostAggEstimate", 38.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_1")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_2")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_4")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
),
new MapBasedRow(
DateTime.parse("2014-10-19T00:00:00.000Z"),
ImmutableMap
.<String, Object>builder()
.put("product", "product_5")
.put("sketch_count", 42.0)
.put("sketchEstimatePostAgg", 42.0)
.put("sketchUnionPostAggEstimate", 42.0)
.put("sketchIntersectionPostAggEstimate", 42.0)
.put("sketchAnotBPostAggEstimate", 0.0)
.put("non_existing_col_validation", 0.0)
.build()
)
),
results.get(0)
results
);
}

View File

@ -1,8 +1,8 @@
{
"queryType": "groupBy",
"dataSource": "test_datasource",
"granularity": "ALL",
"dimensions": [],
"granularity":"ALL",
"dimensions": ["product"],
"aggregations": [
{
"type": "thetaSketch",
@ -89,5 +89,18 @@
],
"intervals": [
"2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z"
]
],
"limitSpec": {
"type": "default",
"columns": [
{
"dimension": "sketchEstimatePostAgg",
"direction": "ASC"
},
{
"dimension": "product",
"direction": "ASC"
}
]
}
}