mirror of https://github.com/apache/druid.git
Null handling fixes for DS HLL and Theta sketches. (#11830)
* Null handling fixes for DS HLL and Theta sketches. For HLL, this fixes an NPE when processing a null in a multi-value dimension. For both, empty strings are now properly treated as nulls (and ignored) in replace-with-default mode. Behavior in SQL-compatible mode is unchanged. * Fix expectation.
This commit is contained in:
parent
cb9bc15e95
commit
b7a4c79314
|
@ -21,6 +21,7 @@ package org.apache.druid.query.aggregation.datasketches.hll;
|
|||
|
||||
import org.apache.datasketches.hll.HllSketch;
|
||||
import org.apache.datasketches.hll.TgtHllType;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
import org.apache.druid.query.aggregation.Aggregator;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
@ -102,10 +103,14 @@ public class HllSketchBuildAggregator implements Aggregator
|
|||
} else if (value instanceof String) {
|
||||
sketch.update(((String) value).toCharArray());
|
||||
} else if (value instanceof List) {
|
||||
// noinspection unchecked
|
||||
List<String> list = (List<String>) value;
|
||||
for (String v : list) {
|
||||
sketch.update(v.toCharArray());
|
||||
// noinspection rawtypes
|
||||
for (Object entry : (List) value) {
|
||||
if (entry != null) {
|
||||
final String asString = entry.toString();
|
||||
if (!NullHandling.isNullOrEquivalent(asString)) {
|
||||
sketch.update(asString);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (value instanceof char[]) {
|
||||
sketch.update((char[]) value);
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
|
|||
import org.apache.datasketches.Family;
|
||||
import org.apache.datasketches.theta.SetOperation;
|
||||
import org.apache.datasketches.theta.Union;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.query.aggregation.Aggregator;
|
||||
import org.apache.druid.segment.BaseObjectColumnValueSelector;
|
||||
|
@ -122,7 +123,10 @@ public class SketchAggregator implements Aggregator
|
|||
} else if (update instanceof List) {
|
||||
for (Object entry : (List) update) {
|
||||
if (entry != null) {
|
||||
union.update(entry.toString());
|
||||
final String asString = entry.toString();
|
||||
if (!NullHandling.isNullOrEquivalent(asString)) {
|
||||
union.update(asString);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||
|
||||
import org.apache.datasketches.hll.HllSketch;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Tests for {@link HllSketchBuildAggregator#updateSketch}.
|
||||
*
|
||||
* Tests of the aggregator generally should go in {@link HllSketchAggregatorTest} instead.
|
||||
*/
|
||||
public class HllSketchBuildAggregatorTest extends InitializedNullHandlingTest
|
||||
{
|
||||
private final HllSketch sketch = new HllSketch(HllSketch.DEFAULT_LG_K);
|
||||
|
||||
@Test
|
||||
public void testUpdateSketchVariousNumbers()
|
||||
{
|
||||
updateSketch(1L, -2L, 1L, -2, 1L, 2.0, 2f, Double.doubleToLongBits(2.0), 3.0);
|
||||
assertSketchEstimate(4);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateSketchStrings()
|
||||
{
|
||||
updateSketch("foo", null, "bar", "");
|
||||
assertSketchEstimate(2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateSketchListsOfStrings()
|
||||
{
|
||||
updateSketch(
|
||||
Arrays.asList("1", "2"),
|
||||
Arrays.asList("2", "", "3", "11"),
|
||||
Arrays.asList("1", null, "3", "12"),
|
||||
Arrays.asList("1", "3", "13")
|
||||
);
|
||||
|
||||
assertSketchEstimate(6);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateSketchCharArray()
|
||||
{
|
||||
updateSketch(
|
||||
new char[]{1, 2},
|
||||
new char[]{2, 3, 11},
|
||||
new char[]{1, 2},
|
||||
new char[]{1, 3, 13}
|
||||
);
|
||||
|
||||
assertSketchEstimate(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateSketchByteArray()
|
||||
{
|
||||
updateSketch(
|
||||
new byte[]{1, 2},
|
||||
new byte[]{2, 3, 11},
|
||||
new byte[]{1, 2},
|
||||
new byte[]{1, 3, 13}
|
||||
);
|
||||
|
||||
assertSketchEstimate(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateSketchIntArray()
|
||||
{
|
||||
updateSketch(
|
||||
new int[]{1, 2},
|
||||
new int[]{2, 3, 11},
|
||||
new int[]{1, 2},
|
||||
new int[]{1, 3, 13}
|
||||
);
|
||||
|
||||
assertSketchEstimate(3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateSketchLongArray()
|
||||
{
|
||||
updateSketch(
|
||||
new long[]{1, 2},
|
||||
new long[]{2, 3, 11},
|
||||
new long[]{1, 2},
|
||||
new long[]{1, 3, 13}
|
||||
);
|
||||
|
||||
assertSketchEstimate(3);
|
||||
}
|
||||
|
||||
private void updateSketch(final Object first, final Object... others)
|
||||
{
|
||||
// first != null check mimics how updateSketch is called: it's always guarded by a null check on the outer value.
|
||||
if (first != null) {
|
||||
HllSketchBuildAggregator.updateSketch(sketch, first);
|
||||
}
|
||||
|
||||
for (final Object o : others) {
|
||||
if (o != null) {
|
||||
HllSketchBuildAggregator.updateSketch(sketch, o);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSketchEstimate(final long estimate)
|
||||
{
|
||||
Assert.assertEquals((double) estimate, sketch.getEstimate(), 0.1);
|
||||
}
|
||||
}
|
|
@ -517,6 +517,7 @@ public class SketchAggregationTest
|
|||
List<String> value = new ArrayList<>();
|
||||
value.add("foo");
|
||||
value.add(null);
|
||||
value.add("");
|
||||
value.add("bar");
|
||||
List[] columnValues = new List[]{value};
|
||||
final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues);
|
||||
|
|
Loading…
Reference in New Issue