From 08fea7a46a0dddbf9449b791bca8c9e783054642 Mon Sep 17 00:00:00 2001 From: somu-imply <93540295+somu-imply@users.noreply.github.com> Date: Tue, 11 Jan 2022 12:00:14 -0800 Subject: [PATCH] input type validation for datasketches hll "build" aggregator factory (#12131) * Ingestion will fail for HLLSketchBuild instead of creating with incorrect values * Addressing review comments for HLL< updated error message introduced test case --- .../hll/HllSketchBuildAggregatorFactory.java | 22 ++++++++++++++ .../hll/HllSketchAggregatorTest.java | 29 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java index d42ca9a0260..076cea4a0f5 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchBuildAggregatorFactory.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.query.aggregation.BufferAggregator; @@ -30,7 +31,9 @@ import org.apache.druid.query.aggregation.VectorAggregator; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -55,6 +58,7 @@ public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory super(name, fieldName, lgK, tgtHllType, round); } + @Override public ColumnType getIntermediateType() { @@ -71,6 +75,7 @@ public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory) { final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); + validateInputs(columnSelectorFactory.getColumnCapabilities(getFieldName())); return new HllSketchBuildAggregator(selector, getLgK(), TgtHllType.valueOf(getTgtHllType())); } @@ -78,6 +83,7 @@ public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory public BufferAggregator factorizeBuffered(final ColumnSelectorFactory columnSelectorFactory) { final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); + validateInputs(columnSelectorFactory.getColumnCapabilities(getFieldName())); return new HllSketchBuildBufferAggregator( selector, getLgK(), @@ -95,6 +101,7 @@ public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { + validateInputs(selectorFactory.getColumnCapabilities(getFieldName())); return new HllSketchBuildVectorAggregator( selectorFactory, getFieldName(), @@ -114,4 +121,19 @@ public class HllSketchBuildAggregatorFactory extends HllSketchAggregatorFactory return HllSketch.getMaxUpdatableSerializationBytes(getLgK(), TgtHllType.valueOf(getTgtHllType())); } + private void validateInputs(@Nullable ColumnCapabilities capabilities) + { + if (capabilities != null) { + if (capabilities.is(ValueType.COMPLEX)) { + throw new ISE( + "Invalid input [%s] of type [%s] for [%s] aggregator [%s]", + getFieldName(), + capabilities.asTypeString(), + HllSketchModule.BUILD_TYPE_NAME, + getName() + ); + } + } + } + } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java index afac5739db9..fa41db64d84 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchAggregatorTest.java @@ -143,6 +143,35 @@ public class HllSketchAggregatorTest extends InitializedNullHandlingTest Assert.assertEquals(200, (double) row.get(0), 0.1); } + @Test + public void unsuccessfulComplexTypesInHLL() throws Exception + { + String metricSpec = "[{" + + "\"type\": \"hyperUnique\"," + + "\"name\": \"index_hll\"," + + "\"fieldName\": \"id\"" + + "}]"; + try { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("hll/hll_sketches.tsv").getFile()), + buildParserJson( + Arrays.asList("dim", "multiDim", "id"), + Arrays.asList("timestamp", "dim", "multiDim", "id") + ), + metricSpec, + 0, // minTimestamp + Granularities.NONE, + 200, // maxRowCount + buildGroupByQueryJson("HLLSketchBuild", "index_hll", !ROUND) + ); + } + catch (RuntimeException e) { + Assert.assertTrue( + e.getMessage().contains("Invalid input [index_hll] of type [COMPLEX] for [HLLSketchBuild]")); + } + + } + @Test public void buildSketchesAtQueryTimeMultiValue() throws Exception {