Null handling fixes for DS HLL and Theta sketches. (#11830)

* Null handling fixes for DS HLL and Theta sketches.

For HLL, this fixes an NPE when processing a null in a multi-value dimension.

For both, empty strings are now properly treated as nulls (and ignored) in
replace-with-default mode. Behavior in SQL-compatible mode is unchanged.

* Fix expectation.
This commit is contained in:
Gian Merlino 2021-10-22 19:09:00 -07:00 committed by GitHub
parent cb9bc15e95
commit b7a4c79314
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 150 additions and 5 deletions

View File

@ -21,6 +21,7 @@ package org.apache.druid.query.aggregation.datasketches.hll;
import org.apache.datasketches.hll.HllSketch;
import org.apache.datasketches.hll.TgtHllType;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.segment.ColumnValueSelector;
@ -102,10 +103,14 @@ public class HllSketchBuildAggregator implements Aggregator
} else if (value instanceof String) {
sketch.update(((String) value).toCharArray());
} else if (value instanceof List) {
// noinspection unchecked
List<String> list = (List<String>) value;
for (String v : list) {
sketch.update(v.toCharArray());
// noinspection rawtypes
for (Object entry : (List) value) {
if (entry != null) {
final String asString = entry.toString();
if (!NullHandling.isNullOrEquivalent(asString)) {
sketch.update(asString);
}
}
}
} else if (value instanceof char[]) {
sketch.update((char[]) value);

View File

@ -22,6 +22,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
import org.apache.datasketches.Family;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
@ -122,7 +123,10 @@ public class SketchAggregator implements Aggregator
} else if (update instanceof List) {
for (Object entry : (List) update) {
if (entry != null) {
union.update(entry.toString());
final String asString = entry.toString();
if (!NullHandling.isNullOrEquivalent(asString)) {
union.update(asString);
}
}
}
} else {

View File

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.datasketches.hll;
import org.apache.datasketches.hll.HllSketch;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
/**
* Tests for {@link HllSketchBuildAggregator#updateSketch}.
*
* Tests of the aggregator generally should go in {@link HllSketchAggregatorTest} instead.
*/
public class HllSketchBuildAggregatorTest extends InitializedNullHandlingTest
{
private final HllSketch sketch = new HllSketch(HllSketch.DEFAULT_LG_K);
@Test
public void testUpdateSketchVariousNumbers()
{
updateSketch(1L, -2L, 1L, -2, 1L, 2.0, 2f, Double.doubleToLongBits(2.0), 3.0);
assertSketchEstimate(4);
}
@Test
public void testUpdateSketchStrings()
{
updateSketch("foo", null, "bar", "");
assertSketchEstimate(2);
}
@Test
public void testUpdateSketchListsOfStrings()
{
updateSketch(
Arrays.asList("1", "2"),
Arrays.asList("2", "", "3", "11"),
Arrays.asList("1", null, "3", "12"),
Arrays.asList("1", "3", "13")
);
assertSketchEstimate(6);
}
@Test
public void testUpdateSketchCharArray()
{
updateSketch(
new char[]{1, 2},
new char[]{2, 3, 11},
new char[]{1, 2},
new char[]{1, 3, 13}
);
assertSketchEstimate(3);
}
@Test
public void testUpdateSketchByteArray()
{
updateSketch(
new byte[]{1, 2},
new byte[]{2, 3, 11},
new byte[]{1, 2},
new byte[]{1, 3, 13}
);
assertSketchEstimate(3);
}
@Test
public void testUpdateSketchIntArray()
{
updateSketch(
new int[]{1, 2},
new int[]{2, 3, 11},
new int[]{1, 2},
new int[]{1, 3, 13}
);
assertSketchEstimate(3);
}
@Test
public void testUpdateSketchLongArray()
{
updateSketch(
new long[]{1, 2},
new long[]{2, 3, 11},
new long[]{1, 2},
new long[]{1, 3, 13}
);
assertSketchEstimate(3);
}
private void updateSketch(final Object first, final Object... others)
{
// first != null check mimics how updateSketch is called: it's always guarded by a null check on the outer value.
if (first != null) {
HllSketchBuildAggregator.updateSketch(sketch, first);
}
for (final Object o : others) {
if (o != null) {
HllSketchBuildAggregator.updateSketch(sketch, o);
}
}
}
private void assertSketchEstimate(final long estimate)
{
Assert.assertEquals((double) estimate, sketch.getEstimate(), 0.1);
}
}

View File

@ -517,6 +517,7 @@ public class SketchAggregationTest
List<String> value = new ArrayList<>();
value.add("foo");
value.add(null);
value.add("");
value.add("bar");
List[] columnValues = new List[]{value};
final TestObjectColumnSelector selector = new TestObjectColumnSelector(columnValues);