mirror of https://github.com/apache/druid.git
Null handling fixes for DS HLL and Theta sketches. (#11830)
* Null handling fixes for DS HLL and Theta sketches. For HLL, this fixes an NPE when processing a null in a multi-value dimension. For both, empty strings are now properly treated as nulls (and ignored) in replace-with-default mode. Behavior in SQL-compatible mode is unchanged. * Fix expectation.
This commit is contained in:
parent
cb9bc15e95
commit
b7a4c79314
|
@ -21,6 +21,7 @@ package org.apache.druid.query.aggregation.datasketches.hll;
|
||||||
|
|
||||||
import org.apache.datasketches.hll.HllSketch;
|
import org.apache.datasketches.hll.HllSketch;
|
||||||
import org.apache.datasketches.hll.TgtHllType;
|
import org.apache.datasketches.hll.TgtHllType;
|
||||||
|
import org.apache.druid.common.config.NullHandling;
|
||||||
import org.apache.druid.java.util.common.IAE;
|
import org.apache.druid.java.util.common.IAE;
|
||||||
import org.apache.druid.query.aggregation.Aggregator;
|
import org.apache.druid.query.aggregation.Aggregator;
|
||||||
import org.apache.druid.segment.ColumnValueSelector;
|
import org.apache.druid.segment.ColumnValueSelector;
|
||||||
|
@ -102,10 +103,14 @@ public class HllSketchBuildAggregator implements Aggregator
|
||||||
} else if (value instanceof String) {
|
} else if (value instanceof String) {
|
||||||
sketch.update(((String) value).toCharArray());
|
sketch.update(((String) value).toCharArray());
|
||||||
} else if (value instanceof List) {
|
} else if (value instanceof List) {
|
||||||
// noinspection unchecked
|
// noinspection rawtypes
|
||||||
List<String> list = (List<String>) value;
|
for (Object entry : (List) value) {
|
||||||
for (String v : list) {
|
if (entry != null) {
|
||||||
sketch.update(v.toCharArray());
|
final String asString = entry.toString();
|
||||||
|
if (!NullHandling.isNullOrEquivalent(asString)) {
|
||||||
|
sketch.update(asString);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (value instanceof char[]) {
|
} else if (value instanceof char[]) {
|
||||||
sketch.update((char[]) value);
|
sketch.update((char[]) value);
|
||||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
|
||||||
import org.apache.datasketches.Family;
|
import org.apache.datasketches.Family;
|
||||||
import org.apache.datasketches.theta.SetOperation;
|
import org.apache.datasketches.theta.SetOperation;
|
||||||
import org.apache.datasketches.theta.Union;
|
import org.apache.datasketches.theta.Union;
|
||||||
|
import org.apache.druid.common.config.NullHandling;
|
||||||
import org.apache.druid.java.util.common.ISE;
|
import org.apache.druid.java.util.common.ISE;
|
||||||
import org.apache.druid.query.aggregation.Aggregator;
|
import org.apache.druid.query.aggregation.Aggregator;
|
||||||
import org.apache.druid.segment.BaseObjectColumnValueSelector;
|
import org.apache.druid.segment.BaseObjectColumnValueSelector;
|
||||||
|
@ -122,7 +123,10 @@ public class SketchAggregator implements Aggregator
|
||||||
} else if (update instanceof List) {
|
} else if (update instanceof List) {
|
||||||
for (Object entry : (List) update) {
|
for (Object entry : (List) update) {
|
||||||
if (entry != null) {
|
if (entry != null) {
|
||||||
union.update(entry.toString());
|
final String asString = entry.toString();
|
||||||
|
if (!NullHandling.isNullOrEquivalent(asString)) {
|
||||||
|
union.update(asString);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -0,0 +1,135 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.query.aggregation.datasketches.hll;
|
||||||
|
|
||||||
|
import org.apache.datasketches.hll.HllSketch;
|
||||||
|
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for {@link HllSketchBuildAggregator#updateSketch}.
|
||||||
|
*
|
||||||
|
* Tests of the aggregator generally should go in {@link HllSketchAggregatorTest} instead.
|
||||||
|
*/
|
||||||
|
public class HllSketchBuildAggregatorTest extends InitializedNullHandlingTest
|
||||||
|
{
|
||||||
|
private final HllSketch sketch = new HllSketch(HllSketch.DEFAULT_LG_K);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateSketchVariousNumbers()
|
||||||
|
{
|
||||||
|
updateSketch(1L, -2L, 1L, -2, 1L, 2.0, 2f, Double.doubleToLongBits(2.0), 3.0);
|
||||||
|
assertSketchEstimate(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateSketchStrings()
|
||||||
|
{
|
||||||
|
updateSketch("foo", null, "bar", "");
|
||||||
|
assertSketchEstimate(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateSketchListsOfStrings()
|
||||||
|
{
|
||||||
|
updateSketch(
|
||||||
|
Arrays.asList("1", "2"),
|
||||||
|
Arrays.asList("2", "", "3", "11"),
|
||||||
|
Arrays.asList("1", null, "3", "12"),
|
||||||
|
Arrays.asList("1", "3", "13")
|
||||||
|
);
|
||||||
|
|
||||||
|
assertSketchEstimate(6);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateSketchCharArray()
|
||||||
|
{
|
||||||
|
updateSketch(
|
||||||
|
new char[]{1, 2},
|
||||||
|
new char[]{2, 3, 11},
|
||||||
|
new char[]{1, 2},
|
||||||
|
new char[]{1, 3, 13}
|
||||||
|
);
|
||||||
|
|
||||||
|
assertSketchEstimate(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateSketchByteArray()
|
||||||
|
{
|
||||||
|
updateSketch(
|
||||||
|
new byte[]{1, 2},
|
||||||
|
new byte[]{2, 3, 11},
|
||||||
|
new byte[]{1, 2},
|
||||||
|
new byte[]{1, 3, 13}
|
||||||
|
);
|
||||||
|
|
||||||
|
assertSketchEstimate(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateSketchIntArray()
|
||||||
|
{
|
||||||
|
updateSketch(
|
||||||
|
new int[]{1, 2},
|
||||||
|
new int[]{2, 3, 11},
|
||||||
|
new int[]{1, 2},
|
||||||
|
new int[]{1, 3, 13}
|
||||||
|
);
|
||||||
|
|
||||||
|
assertSketchEstimate(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateSketchLongArray()
|
||||||
|
{
|
||||||
|
updateSketch(
|
||||||
|
new long[]{1, 2},
|
||||||
|
new long[]{2, 3, 11},
|
||||||
|
new long[]{1, 2},
|
||||||
|
new long[]{1, 3, 13}
|
||||||
|
);
|
||||||
|
|
||||||
|
assertSketchEstimate(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateSketch(final Object first, final Object... others)
|
||||||
|
{
|
||||||
|
// first != null check mimics how updateSketch is called: it's always guarded by a null check on the outer value.
|
||||||
|
if (first != null) {
|
||||||
|
HllSketchBuildAggregator.updateSketch(sketch, first);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (final Object o : others) {
|
||||||
|
if (o != null) {
|
||||||
|
HllSketchBuildAggregator.updateSketch(sketch, o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertSketchEstimate(final long estimate)
|
||||||
|
{
|
||||||
|
Assert.assertEquals((double) estimate, sketch.getEstimate(), 0.1);
|
||||||
|
}
|
||||||
|
}
|
|
@ -517,6 +517,7 @@ public class SketchAggregationTest
|
||||||
List<String> value = new ArrayList<>();
|
List<String> value = new ArrayList<>();
|
||||||
value.add("foo");
|
value.add("foo");
|
||||||
value.add(null);
|
value.add(null);
|
||||||
|
value.add("");
|
||||||
value.add("bar");
|
value.add("bar");
|
||||||
List[] columnValues = new List[]{value};
|
List[] columnValues = new List[]{value};
|
||||||
final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues);
|
final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues);
|
||||||
|
|
Loading…
Reference in New Issue