handle empty sketches (#7526)

* handle empty sketches

* return array of NaN in case of empty sketch

* noinspection ForLoopReplaceableByForEach in tests

* style fixes
This commit is contained in:
Alexander Saydakov 2019-04-25 14:28:41 -07:00 committed by Jonathan Wei
parent 8308ffef1f
commit 9d8f934e68
4 changed files with 180 additions and 0 deletions

View File

@ -56,6 +56,11 @@ public class DoublesSketchToHistogramPostAggregator implements PostAggregator
public Object compute(final Map<String, Object> combinedAggregators)
{
final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
if (sketch.isEmpty()) {
final double[] histogram = new double[splitPoints.length + 1];
Arrays.fill(histogram, Double.NaN);
return histogram;
}
final double[] histogram = sketch.getPMF(splitPoints);
for (int i = 0; i < histogram.length; i++) {
histogram[i] *= sketch.getN();

View File

@ -75,6 +75,11 @@ public class DoublesSketchToQuantilesPostAggregator implements PostAggregator
public Object compute(final Map<String, Object> combinedAggregators)
{
final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
if (sketch.isEmpty()) {
final double[] quantiles = new double[fractions.length];
Arrays.fill(quantiles, Double.NaN);
return quantiles;
}
return sketch.getQuantiles(fractions);
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.datasketches.quantiles;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl;
import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
import org.junit.Assert;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
public class DoublesSketchToHistogramPostAggregatorTest
{
@Test
public void emptySketch()
{
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null);
final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());
final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator(
"histogram",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {3.5}
);
final double[] histogram = (double[]) postAgg.compute(fields);
Assert.assertNotNull(histogram);
Assert.assertEquals(2, histogram.length);
Assert.assertTrue(Double.isNaN(histogram[0]));
Assert.assertTrue(Double.isNaN(histogram[1]));
}
@Test
public void normalCase()
{
final double[] values = new double[] {1, 2, 3, 4, 5, 6};
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values);
final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
//noinspection ForLoopReplaceableByForEach
for (int i = 0; i < values.length; i++) {
agg.aggregate();
selector.increment();
}
final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());
final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator(
"histogram",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {3.5} // splits distribution in two buckets of equal mass
);
final double[] histogram = (double[]) postAgg.compute(fields);
Assert.assertNotNull(histogram);
Assert.assertEquals(2, histogram.length);
Assert.assertEquals(3.0, histogram[0], 0);
Assert.assertEquals(3.0, histogram[1], 0);
}
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.datasketches.quantiles;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl;
import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
import org.junit.Assert;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
public class DoublesSketchToQuantilesPostAggregatorTest
{
@Test
public void emptySketch()
{
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(null);
final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());
final PostAggregator postAgg = new DoublesSketchToQuantilesPostAggregator(
"quantiles",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {0, 0.5, 1}
);
final double[] quantiles = (double[]) postAgg.compute(fields);
Assert.assertNotNull(quantiles);
Assert.assertEquals(3, quantiles.length);
Assert.assertTrue(Double.isNaN(quantiles[0]));
Assert.assertTrue(Double.isNaN(quantiles[1]));
Assert.assertTrue(Double.isNaN(quantiles[2]));
}
@Test
public void normalCase()
{
final double[] values = new double[] {1, 2, 3, 4, 5};
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values);
final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
//noinspection ForLoopReplaceableByForEach
for (int i = 0; i < values.length; i++) {
agg.aggregate();
selector.increment();
}
final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());
final PostAggregator postAgg = new DoublesSketchToQuantilesPostAggregator(
"quantiles",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {0, 0.5, 1}
);
final double[] quantiles = (double[]) postAgg.compute(fields);
Assert.assertNotNull(quantiles);
Assert.assertEquals(3, quantiles.length);
Assert.assertEquals(1.0, quantiles[0], 0);
Assert.assertEquals(3.0, quantiles[1], 0);
Assert.assertEquals(5.0, quantiles[2], 0);
}
}