diff --git a/processing/src/main/java/org/apache/druid/math/expr/Evals.java b/processing/src/main/java/org/apache/druid/math/expr/Evals.java index a4c7f3b47ce..0010679a814 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/Evals.java +++ b/processing/src/main/java/org/apache/druid/math/expr/Evals.java @@ -69,6 +69,28 @@ public class Evals return !NullHandling.isNullOrEquivalent(x) && Boolean.parseBoolean(x); } + /** + * Best effort try to turn a value into a boolean: + * {@link Boolean} will be passed directly through + * {@link String} will use {@link #asBoolean(String)} + * {@link Long} will use {@link #asBoolean(long)} + * {@link Number} will use {@link #asBoolean(double)} + * everything else, including null will be false + */ + public static boolean objectAsBoolean(@Nullable Object val) + { + if (val instanceof Boolean) { + return (Boolean) val; + } else if (val instanceof String) { + return Evals.asBoolean((String) val); + } else if (val instanceof Long) { + return Evals.asBoolean((Long) val); + } else if (val instanceof Number) { + return Evals.asBoolean(((Number) val).doubleValue()); + } + return false; + } + /** * Call {@link Object#toString()} on a non-null value */ diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index cca67bc12ae..b2856c6c5ee 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -30,6 +30,7 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.column.NullableTypeStrategy; import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.nested.StructuredData; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -353,7 +354,7 @@ public abstract class ExprEval case STRING: return ExprEval.of(String.valueOf(value)); default: - throw new IllegalArgumentException("invalid type " + type); + throw new IllegalArgumentException("Invalid type, cannot coerce [" + type + "] to boolean"); } } @@ -367,6 +368,9 @@ public abstract class ExprEval public static ExprEval ofComplex(ExpressionType outputType, @Nullable Object value) { + if (ExpressionType.NESTED_DATA.equals(outputType)) { + return new NestedDataExprEval(value); + } return new ComplexExprEval(outputType, value); } @@ -494,7 +498,7 @@ public abstract class ExprEval } // is this cool? - return new ComplexExprEval(ExpressionType.UNKNOWN_COMPLEX, val); + return ofComplex(ExpressionType.UNKNOWN_COMPLEX, val); } public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object value) @@ -549,10 +553,8 @@ public abstract class ExprEval } return ofDouble(null); case COMPLEX: - // json isn't currently defined in druid-core, this can be reworked once - // https://github.com/apache/druid/pull/13698 is merged (or COMPLEX is promoted to a real built-in type(s) - if ("json".equals(type.getComplexTypeName())) { - return ofComplex(type, value); + if (ExpressionType.NESTED_DATA.equals(type)) { + return ofComplex(type, StructuredData.unwrap(value)); } byte[] bytes = null; if (value instanceof String) { @@ -844,8 +846,13 @@ public abstract class ExprEval case STRING: return ExprEval.ofStringArray(value == null ? null : new Object[] {value.toString()}); } + break; + case COMPLEX: + if (ExpressionType.NESTED_DATA.equals(castTo)) { + return new NestedDataExprEval(value); + } } - throw new IAE("invalid type cannot cast " + type() + " to " + castTo); + throw invalidCast(type(), castTo); } @Override @@ -918,8 +925,13 @@ public abstract class ExprEval case STRING: return ExprEval.ofStringArray(value == null ? null : new Object[] {value.toString()}); } + break; + case COMPLEX: + if (ExpressionType.NESTED_DATA.equals(castTo)) { + return new NestedDataExprEval(value); + } } - throw new IAE("invalid type cannot cast " + type() + " to " + castTo); + throw invalidCast(type(), castTo); } @Override @@ -1089,8 +1101,12 @@ public abstract class ExprEval case STRING: return ExprEval.ofStringArray(value == null ? null : new Object[] {value}); } + case COMPLEX: + if (ExpressionType.NESTED_DATA.equals(castTo)) { + return new NestedDataExprEval(value); + } } - throw new IAE("invalid type cannot cast " + type() + " to " + castTo); + throw invalidCast(type(), castTo); } @Override @@ -1272,9 +1288,13 @@ public abstract class ExprEval cast[i] = ExprEval.ofType(elementType(), value[i]).castTo(elementType).value(); } return ExprEval.ofArray(castTo, cast); + case COMPLEX: + if (ExpressionType.NESTED_DATA.equals(castTo)) { + return new NestedDataExprEval(value); + } } - throw new IAE("invalid type cannot cast " + type() + " to " + castTo); + throw invalidCast(type(), castTo); } @Override @@ -1314,7 +1334,7 @@ public abstract class ExprEval @Override public boolean isNumericNull() { - return false; + return true; } @Override @@ -1340,12 +1360,11 @@ public abstract class ExprEval { return false; } - @Nullable @Override public Object[] asArray() { - return new Object[0]; + return null; } @Override @@ -1355,10 +1374,10 @@ public abstract class ExprEval return this; } // allow cast of unknown complex to some other complex type - if (expressionType.getComplexTypeName() == null) { - return new ComplexExprEval(castTo, value); + if (expressionType.getComplexTypeName() == null && castTo.getType().equals(ExprType.COMPLEX)) { + return ofComplex(castTo, value); } - throw new IAE("invalid type cannot cast " + expressionType + " to " + castTo); + throw invalidCast(expressionType, castTo); } @Override @@ -1367,4 +1386,123 @@ public abstract class ExprEval return new ComplexExpr(expressionType, value); } } + + private static class NestedDataExprEval extends ExprEval + { + @Nullable + private Number number; + private boolean computedNumber = false; + + private NestedDataExprEval(@Nullable Object value) + { + super(value); + } + + @Override + public ExpressionType type() + { + return ExpressionType.NESTED_DATA; + } + + @Override + public boolean isNumericNull() + { + computeNumber(); + return number == null; + } + + @Override + public int asInt() + { + computeNumber(); + if (number != null) { + return number.intValue(); + } + return 0; + } + + @Override + public long asLong() + { + computeNumber(); + if (number != null) { + return number.longValue(); + } + return 0L; + } + + @Override + public double asDouble() + { + computeNumber(); + if (number != null) { + return number.doubleValue(); + } + return 0.0; + } + + @Override + public boolean asBoolean() + { + Object val = StructuredData.unwrap(value); + if (val != null) { + return Evals.objectAsBoolean(val); + } + return false; + } + + private void computeNumber() + { + if (!computedNumber && value != null) { + computedNumber = true; + Object val = StructuredData.unwrap(value); + if (val instanceof Number) { + number = (Number) val; + } else if (val instanceof Boolean) { + number = Evals.asLong((Boolean) val); + } else if (val instanceof String) { + number = ExprEval.computeNumber((String) val); + } + } + } + + @Nullable + @Override + public Object[] asArray() + { + Object val = StructuredData.unwrap(value); + ExprEval maybeArray = ExprEval.bestEffortOf(val); + if (maybeArray.type().isPrimitive() || maybeArray.isArray()) { + return maybeArray.asArray(); + } + return null; + } + + @Override + public ExprEval castTo(ExpressionType castTo) + { + if (ExpressionType.NESTED_DATA.equals(castTo)) { + return this; + } + + Object val = StructuredData.unwrap(value); + ExprEval bestEffortOf = ExprEval.bestEffortOf(val); + + if (bestEffortOf.type().isPrimitive() || bestEffortOf.type().isArray()) { + return bestEffortOf.castTo(castTo); + } + throw invalidCast(ExpressionType.NESTED_DATA, castTo); + } + + @Override + public Expr toExpr() + { + return new ComplexExpr(ExpressionType.NESTED_DATA, value); + } + } + + public static IAE invalidCast(ExpressionType fromType, ExpressionType toType) + { + return new IAE("Invalid type, cannot cast [" + fromType + "] to [" + toType + "]"); + } } diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionType.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionType.java index f2f17f2ef73..2e1c19ae7f2 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExpressionType.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionType.java @@ -51,6 +51,8 @@ public class ExpressionType extends BaseTypeSignature new ExpressionType(ExprType.ARRAY, null, LONG); public static final ExpressionType DOUBLE_ARRAY = new ExpressionType(ExprType.ARRAY, null, DOUBLE); + public static final ExpressionType NESTED_DATA = + ExpressionType.fromColumnTypeStrict(ColumnType.NESTED_DATA); public static final ExpressionType UNKNOWN_COMPLEX = new ExpressionType(ExprType.COMPLEX, null, null); diff --git a/processing/src/main/java/org/apache/druid/math/expr/Function.java b/processing/src/main/java/org/apache/druid/math/expr/Function.java index 03eb95820fa..b936a4adda7 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/Function.java +++ b/processing/src/main/java/org/apache/druid/math/expr/Function.java @@ -1950,7 +1950,7 @@ public interface Function extends NamedFunction castTo = ExpressionType.fromString(StringUtils.toUpperCase(y.asString())); } catch (IllegalArgumentException e) { - throw validationFailed("invalid type %s", y.asString()); + throw validationFailed("Invalid type [%s]", y.asString()); } return x.castTo(castTo); } diff --git a/processing/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java b/processing/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java index 2fb3591ddfc..1a16d99d183 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java +++ b/processing/src/main/java/org/apache/druid/math/expr/vector/VectorProcessors.java @@ -116,8 +116,12 @@ public class VectorProcessors final double[] doubles = new double[maxVectorSize]; final boolean[] nulls; if (constant == null) { - nulls = new boolean[maxVectorSize]; - Arrays.fill(nulls, NullHandling.sqlCompatible()); + if (NullHandling.sqlCompatible()) { + nulls = new boolean[maxVectorSize]; + Arrays.fill(nulls, true); + } else { + nulls = null; + } } else { nulls = null; Arrays.fill(doubles, constant); @@ -149,8 +153,12 @@ public class VectorProcessors final long[] longs = new long[maxVectorSize]; final boolean[] nulls; if (constant == null) { - nulls = new boolean[maxVectorSize]; - Arrays.fill(nulls, NullHandling.sqlCompatible()); + if (NullHandling.sqlCompatible()) { + nulls = new boolean[maxVectorSize]; + Arrays.fill(nulls, true); + } else { + nulls = null; + } } else { nulls = null; Arrays.fill(longs, constant); @@ -721,7 +729,7 @@ public class VectorProcessors if (leftNull) { if (rightNull) { output[i] = 0L; - outputNulls[i] = NullHandling.sqlCompatible(); + outputNulls[i] = true; return; } final boolean bool = Evals.asBoolean(rightInput[i]); @@ -770,7 +778,7 @@ public class VectorProcessors if (leftNull) { if (rightNull) { output[i] = 0; - outputNulls[i] = NullHandling.sqlCompatible(); + outputNulls[i] = true; return; } final boolean bool = Evals.asBoolean(rightInput[i]); @@ -884,7 +892,7 @@ public class VectorProcessors if (leftNull) { if (rightNull) { output[i] = 0L; - outputNulls[i] = NullHandling.sqlCompatible(); + outputNulls[i] = true; return; } final boolean bool = Evals.asBoolean(rightInput[i]); @@ -933,7 +941,7 @@ public class VectorProcessors if (leftNull) { if (rightNull) { output[i] = 0L; - outputNulls[i] = NullHandling.sqlCompatible(); + outputNulls[i] = true; return; } final boolean bool = Evals.asBoolean(rightInput[i]); @@ -980,7 +988,7 @@ public class VectorProcessors final boolean rightNull = rightInput[i] == null; if (leftNull) { if (rightNull) { - outputNulls[i] = NullHandling.sqlCompatible(); + outputNulls[i] = true; return; } final boolean bool = Evals.asBoolean((String) rightInput[i]); diff --git a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java index d1984554810..7e62207111b 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java +++ b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java @@ -22,7 +22,6 @@ package org.apache.druid.query.expression; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Preconditions; import org.apache.druid.guice.annotations.Json; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; @@ -30,7 +29,6 @@ import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.math.expr.ExprType; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.NamedFunction; -import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.nested.NestedPathFinder; import org.apache.druid.segment.nested.NestedPathPart; import org.apache.druid.segment.nested.StructuredData; @@ -46,10 +44,6 @@ import java.util.stream.Collectors; public class NestedDataExpressions { - public static final ExpressionType TYPE = Preconditions.checkNotNull( - ExpressionType.fromColumnType(ColumnType.NESTED_DATA) - ); - public static class JsonObjectExprMacro implements ExprMacroTable.ExprMacro { public static final String NAME = "json_object"; @@ -88,7 +82,7 @@ public class NestedDataExpressions theMap.put(field.asString(), unwrap(value)); } - return ExprEval.ofComplex(TYPE, theMap); + return ExprEval.ofComplex(ExpressionType.NESTED_DATA, theMap); } @Override @@ -102,7 +96,7 @@ public class NestedDataExpressions @Override public ExpressionType getOutputType(InputBindingInspector inspector) { - return TYPE; + return ExpressionType.NESTED_DATA; } } return new StructExpr(args); @@ -213,12 +207,12 @@ public class NestedDataExpressions { ExprEval arg = args.get(0).eval(bindings); if (arg.value() == null) { - return ExprEval.ofComplex(TYPE, null); + return ExprEval.ofComplex(ExpressionType.NESTED_DATA, null); } if (arg.type().is(ExprType.STRING)) { try { return ExprEval.ofComplex( - TYPE, + ExpressionType.NESTED_DATA, jsonMapper.readValue(arg.asString(), Object.class) ); } @@ -244,7 +238,7 @@ public class NestedDataExpressions @Override public ExpressionType getOutputType(InputBindingInspector inspector) { - return TYPE; + return ExpressionType.NESTED_DATA; } } return new ParseJsonExpr(args); @@ -288,19 +282,19 @@ public class NestedDataExpressions if (arg.type().is(ExprType.STRING) && arg.value() != null) { try { return ExprEval.ofComplex( - TYPE, + ExpressionType.NESTED_DATA, jsonMapper.readValue(arg.asString(), Object.class) ); } catch (JsonProcessingException e) { return ExprEval.ofComplex( - TYPE, + ExpressionType.NESTED_DATA, null ); } } return ExprEval.ofComplex( - TYPE, + ExpressionType.NESTED_DATA, null ); } @@ -316,7 +310,7 @@ public class NestedDataExpressions @Override public ExpressionType getOutputType(InputBindingInspector inspector) { - return TYPE; + return ExpressionType.NESTED_DATA; } } return new ParseJsonExpr(args); @@ -440,7 +434,7 @@ public class NestedDataExpressions { ExprEval input = args.get(0).eval(bindings); return ExprEval.ofComplex( - TYPE, + ExpressionType.NESTED_DATA, NestedPathFinder.find(unwrap(input), parts) ); } @@ -457,7 +451,7 @@ public class NestedDataExpressions public ExpressionType getOutputType(InputBindingInspector inspector) { // call all the output JSON typed - return TYPE; + return ExpressionType.NESTED_DATA; } } return new JsonQueryExpr(args); diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java index b59149863a3..1b95cb977bb 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/DoubleVectorValueMatcher.java @@ -19,7 +19,6 @@ package org.apache.druid.query.filter.vector; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.filter.DruidDoublePredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.DimensionHandlerUtils; @@ -30,7 +29,6 @@ import javax.annotation.Nullable; public class DoubleVectorValueMatcher implements VectorValueMatcherFactory { private final VectorValueSelector selector; - private final boolean canHaveNulls = !NullHandling.replaceWithDefault(); public DoubleVectorValueMatcher(final VectorValueSelector selector) { @@ -40,7 +38,7 @@ public class DoubleVectorValueMatcher implements VectorValueMatcherFactory @Override public VectorValueMatcher makeMatcher(@Nullable final String value) { - if (value == null && canHaveNulls) { + if (value == null) { return makeNullValueMatcher(selector); } @@ -62,7 +60,7 @@ public class DoubleVectorValueMatcher implements VectorValueMatcherFactory final double[] vector = selector.getDoubleVector(); final int[] selection = match.getSelection(); final boolean[] nulls = selector.getNullVector(); - final boolean hasNulls = canHaveNulls && nulls != null; + final boolean hasNulls = nulls != null; int numRows = 0; for (int i = 0; i < mask.getSelectionSize(); i++) { @@ -97,7 +95,7 @@ public class DoubleVectorValueMatcher implements VectorValueMatcherFactory final double[] vector = selector.getDoubleVector(); final int[] selection = match.getSelection(); final boolean[] nulls = selector.getNullVector(); - final boolean hasNulls = canHaveNulls && nulls != null; + final boolean hasNulls = nulls != null; int numRows = 0; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java index a91b9d504f5..cd62e406ab3 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/FloatVectorValueMatcher.java @@ -19,7 +19,6 @@ package org.apache.druid.query.filter.vector; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.filter.DruidFloatPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.DimensionHandlerUtils; @@ -30,7 +29,6 @@ import javax.annotation.Nullable; public class FloatVectorValueMatcher implements VectorValueMatcherFactory { private final VectorValueSelector selector; - private final boolean canHaveNulls = !NullHandling.replaceWithDefault(); public FloatVectorValueMatcher(final VectorValueSelector selector) { @@ -40,7 +38,7 @@ public class FloatVectorValueMatcher implements VectorValueMatcherFactory @Override public VectorValueMatcher makeMatcher(@Nullable final String value) { - if (value == null && canHaveNulls) { + if (value == null) { return makeNullValueMatcher(selector); } @@ -62,7 +60,7 @@ public class FloatVectorValueMatcher implements VectorValueMatcherFactory final float[] vector = selector.getFloatVector(); final int[] selection = match.getSelection(); final boolean[] nulls = selector.getNullVector(); - final boolean hasNulls = canHaveNulls && nulls != null; + final boolean hasNulls = nulls != null; int numRows = 0; @@ -98,7 +96,7 @@ public class FloatVectorValueMatcher implements VectorValueMatcherFactory final float[] vector = selector.getFloatVector(); final int[] selection = match.getSelection(); final boolean[] nulls = selector.getNullVector(); - final boolean hasNulls = canHaveNulls && nulls != null; + final boolean hasNulls = nulls != null; int numRows = 0; diff --git a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java index a6eb520e241..55084a67c27 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java +++ b/processing/src/main/java/org/apache/druid/query/filter/vector/LongVectorValueMatcher.java @@ -19,7 +19,6 @@ package org.apache.druid.query.filter.vector; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.filter.DruidLongPredicate; import org.apache.druid.query.filter.DruidPredicateFactory; import org.apache.druid.segment.DimensionHandlerUtils; @@ -30,7 +29,6 @@ import javax.annotation.Nullable; public class LongVectorValueMatcher implements VectorValueMatcherFactory { private final VectorValueSelector selector; - private final boolean canHaveNulls = !NullHandling.replaceWithDefault(); public LongVectorValueMatcher(final VectorValueSelector selector) { @@ -40,7 +38,7 @@ public class LongVectorValueMatcher implements VectorValueMatcherFactory @Override public VectorValueMatcher makeMatcher(@Nullable final String value) { - if (value == null && canHaveNulls) { + if (value == null) { return makeNullValueMatcher(selector); } @@ -62,7 +60,7 @@ public class LongVectorValueMatcher implements VectorValueMatcherFactory final long[] vector = selector.getLongVector(); final int[] selection = match.getSelection(); final boolean[] nulls = selector.getNullVector(); - final boolean hasNulls = canHaveNulls && nulls != null; + final boolean hasNulls = nulls != null; int numRows = 0; @@ -98,7 +96,7 @@ public class LongVectorValueMatcher implements VectorValueMatcherFactory final long[] vector = selector.getLongVector(); final int[] selection = match.getSelection(); final boolean[] nulls = selector.getNullVector(); - final boolean hasNulls = canHaveNulls && nulls != null; + final boolean hasNulls = nulls != null; int numRows = 0; diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java index ee9331d3189..38d8c51a3be 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnIndexer.java @@ -21,6 +21,7 @@ package org.apache.druid.segment; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; @@ -406,6 +407,7 @@ public class AutoTypeColumnIndexer implements DimensionIndexer() { @Override @@ -459,11 +461,12 @@ public class AutoTypeColumnIndexer implements DimensionIndexer processValue(ExprEval eval) { - final ColumnType columnType = ExpressionType.toColumnType(eval.type()); + final ExpressionType columnType = eval.type(); int sizeEstimate; switch (columnType.getType()) { case LONG: @@ -609,4 +612,19 @@ public class AutoTypeColumnIndexer implements DimensionIndexer mergedFields) - { - return mergedFields.size() == 1 - && mergedFields.get(NestedPathFinder.JSON_PATH_ROOT) != null - && mergedFields.get(NestedPathFinder.JSON_PATH_ROOT).getSingleType() != null; - } - @Override public ColumnValueSelector convertSortedSegmentRowValuesToMergedRowValues( int segmentIndex, @@ -290,7 +307,7 @@ public class AutoTypeColumnMerger implements DimensionMergerV9 @Override public boolean hasOnlyNulls() { - return false; + return hasOnlyNulls; } @Override @@ -301,6 +318,7 @@ public class AutoTypeColumnMerger implements DimensionMergerV9 final NestedCommonFormatColumnPartSerde partSerde = NestedCommonFormatColumnPartSerde.serializerBuilder() .withLogicalType(logicalType) .withHasNulls(serializer.hasNulls()) + .isVariantType(isVariantType) .withByteOrder(ByteOrder.nativeOrder()) .withBitmapSerdeFactory(indexSpec.getBitmapSerdeFactory()) .withSerializer(serializer) diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java index dff680d8ae6..aff3e673d5f 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java @@ -30,7 +30,7 @@ import org.apache.druid.segment.nested.ScalarDoubleColumnSerializer; import org.apache.druid.segment.nested.ScalarLongColumnSerializer; import org.apache.druid.segment.nested.ScalarStringColumnSerializer; import org.apache.druid.segment.nested.StructuredData; -import org.apache.druid.segment.nested.VariantArrayColumnSerializer; +import org.apache.druid.segment.nested.VariantColumnSerializer; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; /** @@ -50,7 +50,7 @@ import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; * @see AutoTypeColumnIndexer * @see AutoTypeColumnMerger * @see NestedCommonFormatColumnSerializer - * @see VariantArrayColumnSerializer + * @see VariantColumnSerializer * @see ScalarDoubleColumnSerializer * @see ScalarLongColumnSerializer * @see NestedDataColumnSerializer diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java index e142a3bd49c..e6d5b418786 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnBuilder.java @@ -117,7 +117,7 @@ public class ColumnBuilder return this; } - public ColumnBuilder setStandardTypeColumnSupplier(Supplier columnSupplier) + public ColumnBuilder setNestedCommonFormatColumnSupplier(Supplier columnSupplier) { checkColumnSupplierNotSet(); this.columnSupplier = columnSupplier; diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java index 500a7deab64..14e044b8304 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnType.java @@ -23,9 +23,11 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import javax.annotation.Nullable; +import java.util.Objects; /** * Native Druid types. @@ -68,24 +70,28 @@ public class ColumnType extends BaseTypeSignature // currently, arrays only come from expressions or aggregators /** * An array of Strings. Values will be represented as Object[] + * * @see ValueType#ARRAY * @see ValueType#STRING */ public static final ColumnType STRING_ARRAY = ofArray(STRING); /** * An array of Longs. Values will be represented as Object[] or long[] + * * @see ValueType#ARRAY * @see ValueType#LONG */ public static final ColumnType LONG_ARRAY = ofArray(LONG); /** * An array of Doubles. Values will be represented as Object[] or double[]. + * * @see ValueType#ARRAY * @see ValueType#DOUBLE */ public static final ColumnType DOUBLE_ARRAY = ofArray(DOUBLE); /** * An array of Floats. Values will be represented as Object[] or float[]. + * * @see ValueType#ARRAY * @see ValueType#FLOAT */ @@ -128,4 +134,80 @@ public class ColumnType extends BaseTypeSignature { return ColumnTypeFactory.getInstance().ofComplex(complexTypeName); } + + public static ColumnType leastRestrictiveType(@Nullable ColumnType type, @Nullable ColumnType other) + { + if (type == null) { + return other; + } + if (other == null) { + return type; + } + if (type.is(ValueType.COMPLEX) && other.is(ValueType.COMPLEX)) { + if (type.getComplexTypeName() == null) { + return other; + } + if (other.getComplexTypeName() == null) { + return type; + } + if (!Objects.equals(type, other)) { + throw new IAE("Cannot implicitly cast %s to %s", type, other); + } + return type; + } + // if either is nested data, use nested data, otherwise error + if (type.is(ValueType.COMPLEX) || other.is(ValueType.COMPLEX)) { + if (ColumnType.NESTED_DATA.equals(type) || ColumnType.NESTED_DATA.equals(other)) { + return ColumnType.NESTED_DATA; + } + throw new IAE("Cannot implicitly cast %s to %s", type, other); + } + + // arrays convert based on least restrictive element type + if (type.isArray()) { + if (other.equals(type.getElementType())) { + return type; + } + final ColumnType commonElementType; + if (other.isArray()) { + commonElementType = leastRestrictiveType( + (ColumnType) type.getElementType(), + (ColumnType) other.getElementType() + ); + return ColumnType.ofArray(commonElementType); + } else { + commonElementType = leastRestrictiveType( + (ColumnType) type.getElementType(), + other + ); + } + return ColumnType.ofArray(commonElementType); + } + if (other.isArray()) { + if (type.equals(type.getElementType())) { + return type; + } + final ColumnType commonElementType; + + commonElementType = leastRestrictiveType( + type, + (ColumnType) other.getElementType() + ); + return ColumnType.ofArray(commonElementType); + } + // if either argument is a string, type becomes a string + if (Types.is(type, ValueType.STRING) || Types.is(other, ValueType.STRING)) { + return ColumnType.STRING; + } + + // all numbers win over longs + // floats vs doubles would be handled here, but we currently only support doubles... + if (Types.is(type, ValueType.LONG) && Types.isNullOr(other, ValueType.LONG)) { + return ColumnType.LONG; + } + if (Types.is(type, ValueType.FLOAT) && Types.isNullOr(other, ValueType.FLOAT)) { + return ColumnType.FLOAT; + } + return ColumnType.DOUBLE; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/RowSignature.java b/processing/src/main/java/org/apache/druid/segment/column/RowSignature.java index b1649e6dbbe..770d3632bd2 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/RowSignature.java +++ b/processing/src/main/java/org/apache/druid/segment/column/RowSignature.java @@ -137,6 +137,15 @@ public class RowSignature implements ColumnInspector return Optional.ofNullable(columnTypes.get(getColumnName(columnNumber))); } + /** + * Returns true if the column is a numeric type ({@link ColumnType#isNumeric()}), otherwise false if the column + * is not a numeric type or is not present in the row signature. + */ + public boolean isNumeric(final String columnName) + { + return getColumnType(columnName).map(ColumnType::isNumeric).orElse(false); + } + /** * Returns a list of column names in the order they appear in this signature. */ diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java index 7af2273f509..66bbd71c5dd 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java @@ -681,7 +681,6 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn} for a column which uses a {@link FrontCodedIndexed} to store its value * dictionary, which 'delta encodes' strings (instead of {@link org.apache.druid.segment.data.GenericIndexed} like * {@link StringDictionaryEncodedColumn}). - * + *

* This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than the dictionary * difference. + *

+ * Implements {@link NestedCommonFormatColumn} so it can be used as a reader for single value string specializations + * of {@link org.apache.druid.segment.AutoTypeColumnIndexer}. */ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncodedColumn, NestedCommonFormatColumn diff --git a/processing/src/main/java/org/apache/druid/segment/data/ComparableList.java b/processing/src/main/java/org/apache/druid/segment/data/ComparableList.java index 442d4b5a3a7..9d2944a8d31 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/ComparableList.java +++ b/processing/src/main/java/org/apache/druid/segment/data/ComparableList.java @@ -29,7 +29,6 @@ import java.util.List; public class ComparableList implements Comparable { - private final List delegate; public ComparableList(List input) diff --git a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java index 73ae5b6b0b8..af9a4a75ac6 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/BoundFilter.java @@ -23,6 +23,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Predicate; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.primitives.Doubles; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.BitmapResultFactory; @@ -106,20 +107,27 @@ public class BoundFilter implements Filter } final NumericRangeIndex rangeIndex = indexSupplier.as(NumericRangeIndex.class); if (rangeIndex != null) { - final Number lower = boundDimFilter.hasLowerBound() ? Double.parseDouble(boundDimFilter.getLower()) : null; - final Number upper = boundDimFilter.hasUpperBound() ? Double.parseDouble(boundDimFilter.getUpper()) : null; - final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( - lower, - boundDimFilter.isLowerStrict(), - upper, - boundDimFilter.isUpperStrict() - ); - if (rangeBitmaps != null) { - // preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set - if (boundDimFilter.hasLowerBound() && !NullHandling.isNullOrEquivalent(boundDimFilter.getLower())) { - return rangeBitmaps; - } else { - return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps); + final Number lower = boundDimFilter.hasLowerBound() ? Doubles.tryParse(boundDimFilter.getLower()) : null; + final Number upper = boundDimFilter.hasUpperBound() ? Doubles.tryParse(boundDimFilter.getUpper()) : null; + // valid number bounds are required to use the range index, otherwise we need to fall back to the predicate + // index to get consistent behavior with the value matcher. in a better world this might be a much earlier + // validation error, but.. the bound filter doesn't live in that world + final boolean lowerValid = !(boundDimFilter.hasLowerBound() && lower == null); + final boolean upperValid = !(boundDimFilter.hasUpperBound() && upper == null); + if (lowerValid && upperValid) { + final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange( + lower, + boundDimFilter.isLowerStrict(), + upper, + boundDimFilter.isUpperStrict() + ); + if (rangeBitmaps != null) { + // preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set + if (boundDimFilter.hasLowerBound() && !NullHandling.isNullOrEquivalent(boundDimFilter.getLower())) { + return rangeBitmaps; + } else { + return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps); + } } } } @@ -127,7 +135,6 @@ public class BoundFilter implements Filter // fall back to predicate based index if it is available return Filters.makePredicateIndex(boundDimFilter.getDimension(), selector, getPredicateFactory()); - } @Nullable @@ -136,11 +143,15 @@ public class BoundFilter implements Filter BitmapColumnIndex rangeIndex ) { + + + final BitmapColumnIndex nullBitmap; final NullValueIndex nulls = indexSupplier.as(NullValueIndex.class); if (nulls == null) { return null; } - final BitmapColumnIndex nullBitmap = nulls.forNull(); + nullBitmap = nulls.forNull(); + return new BitmapColumnIndex() { @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index 0e65a3b2b1c..a8f16e47f95 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -123,6 +123,12 @@ public class ExpressionFilter implements Filter ExpressionVectorSelectors.makeVectorObjectSelector(factory, theExpr) ).makeMatcher(predicateFactory); default: + if (ExpressionType.NESTED_DATA.equals(outputType)) { + return VectorValueMatcherColumnProcessorFactory.instance().makeObjectProcessor( + ColumnCapabilitiesImpl.createDefault().setType(ExpressionType.toColumnType(outputType)).setHasNulls(true), + ExpressionVectorSelectors.makeVectorObjectSelector(factory, theExpr) + ).makeMatcher(predicateFactory); + } throw new UOE("Vectorized expression matchers not implemented for type: [%s]", outputType); } } @@ -303,6 +309,12 @@ public class ExpressionFilter implements Filter return Evals::asBoolean; } + @Override + public Predicate makeObjectPredicate() + { + return Evals::objectAsBoolean; + } + // The hashcode and equals are to make SubclassesMustOverrideEqualsAndHashCodeTest stop complaining.. // DruidPredicateFactory currently doesn't really need equals or hashcode since 'toString' method that is actually // called when testing equality of DimensionPredicateFilter, so it's the truly required method, but that seems diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index fb051f74b71..0b496d02d3b 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -89,39 +89,39 @@ import java.util.concurrent.ConcurrentHashMap; /** * Implementation of {@link NestedDataComplexColumn} which uses a {@link CompressedVariableSizedBlobColumn} for the - * 'raw' {@link StructuredData} values and provides selectors for nested 'literal' field columns. + * 'raw' {@link StructuredData} values and provides selectors for nested field columns specified by ordered lists of + * {@link NestedPathPart}. + *

+ * The list of available nested paths is stored in {@link #fields}, and their associated types stored in + * {@link #fieldInfo} which can be accessed by the index of the field in {@link #fields}. + *

+ * In the case that the nested column has only a single field, and that field is the 'root' path, specified by + * {@link #rootFieldPath}, the selectors created for the complex column itself will use the 'root' path selectors + * instead. */ public abstract class CompressedNestedDataComplexColumn> extends NestedDataComplexColumn implements NestedCommonFormatColumn { + private static final ObjectStrategy STRATEGY = NestedDataComplexTypeSerde.INSTANCE.getObjectStrategy(); public static final IntTypeStrategy INT_TYPE_STRATEGY = new IntTypeStrategy(); private final ColumnConfig columnConfig; private final Closer closer; private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; - private CompressedVariableSizedBlobColumn compressedRawColumn; private final ImmutableBitmap nullValues; - private final GenericIndexed fields; private final FieldTypeInfo fieldInfo; - private final Supplier stringDictionarySupplier; private final Supplier> longDictionarySupplier; private final Supplier> doubleDictionarySupplier; private final Supplier arrayDictionarySupplier; - private final SmooshedFileMapper fileMapper; - private final String rootFieldPath; - - private final ConcurrentHashMap columns = new ConcurrentHashMap<>(); - - private static final ObjectStrategy STRATEGY = NestedDataComplexTypeSerde.INSTANCE.getObjectStrategy(); - private final ColumnType logicalType; - private final String columnName; private final BitmapSerdeFactory bitmapSerdeFactory; private final ByteOrder byteOrder; + private final ConcurrentHashMap columns = new ConcurrentHashMap<>(); + private CompressedVariableSizedBlobColumn compressedRawColumn; public CompressedNestedDataComplexColumn( String columnName, @@ -651,7 +651,8 @@ public abstract class CompressedNestedDataComplexColumn * When processing the 'raw' value column is complete, the {@link #writeTo(int, FileSmoosher)} method will sort the * local ids and write them out to a local sorted dictionary, iterate over {@link #intermediateValueWriter} swapping * the unsorted local ids with the sorted ids and writing to the compressed id column writer - * {@link #encodedValueSerializer} building the bitmap indexes along the way. + * {@link #encodedValueSerializer}, building the bitmap indexes along the way. + * + * @see ScalarDoubleFieldColumnWriter - single type double columns + * @see ScalarLongFieldColumnWriter - single type long columns + * @see ScalarStringFieldColumnWriter - single type string columns + * @see VariantArrayFieldColumnWriter - single type array columns of double, long, or string + * @see VariantFieldColumnWriter - mixed type columns of any combination */ public abstract class GlobalDictionaryEncodedFieldColumnWriter { @@ -98,7 +105,8 @@ public abstract class GlobalDictionaryEncodedFieldColumnWriter } /** - * Perform any value conversion needed before storing the value in the + * Perform any value conversion needed before looking up the global id in the value dictionary (such as null handling + * stuff or array processing to add the elements to the dictionary before adding the int[] to the dictionary) */ T processValue(int row, Object value) { @@ -152,6 +160,9 @@ public abstract class GlobalDictionaryEncodedFieldColumnWriter cursorPosition++; } + /** + * Backfill intermediate column with null values + */ private void fillNull(int row) throws IOException { final T value = processValue(row, null); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java index 4d20437c502..97ac990e419 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumn.java @@ -31,6 +31,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnFormat; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; import java.util.SortedMap; @@ -39,7 +40,15 @@ import java.util.TreeMap; /** * Base implementation for columns created with {@link AutoTypeColumnSchema} and handled with * {@link NestedCommonFormatColumnHandler} to allow ease of merge via - * {@link AutoTypeColumnMerger} + * {@link AutoTypeColumnMerger} by providing a common implementation. All columns are read with + * {@link NestedCommonFormatColumnPartSerde} + * + * @see ScalarDoubleColumn + * @see ScalarLongColumn + * @see ScalarStringDictionaryEncodedColumn + * @see org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn + * @see VariantColumn + * @see CompressedNestedDataComplexColumn */ public interface NestedCommonFormatColumn extends BaseColumn { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java index 7ccc0d31c4d..bc4f4ec9462 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java @@ -32,6 +32,25 @@ import java.nio.ByteOrder; import java.nio.channels.WritableByteChannel; import java.util.SortedMap; +/** + * Basic serializer implementation for the {@link NestedCommonFormatColumn} family of columns. The + * {@link org.apache.druid.segment.AutoTypeColumnIndexer} catalogs the types and fields present in the data it processes + * using a {@link StructuredDataProcessor}. When persisting and merging segments, the + * {@link org.apache.druid.segment.AutoTypeColumnMerger} will choose the most appropriate serializer based on the data + * which was processed as follows: + * + * @see ScalarDoubleColumnSerializer - single type double columns + * @see ScalarLongColumnSerializer - single type long columns + * @see ScalarStringColumnSerializer - single type string columns + * @see VariantColumnSerializer - single type array columns of string, long, double, and mixed type columns + * + * @see NestedDataColumnSerializer - nested columns + * + * @see NestedDataColumnSerializerV4 - legacy nested column format created by + * {@link org.apache.druid.segment.NestedDataColumnIndexer} and + * {@link org.apache.druid.segment.NestedDataColumnMerger} + * + */ public abstract class NestedCommonFormatColumnSerializer implements GenericColumnSerializer { public static final byte V0 = 0x00; @@ -95,6 +114,9 @@ public abstract class NestedCommonFormatColumnSerializer implements GenericColum return buffer; } + /** + * Nested field columns are stored in separate + */ public static String getInternalFileName(String fileNameBase, String field) { return StringUtils.format("%s.%s", fileNameBase, field); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index 3b8590c0178..020298b1b35 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -59,6 +59,24 @@ import java.util.List; import java.util.Map; import java.util.SortedMap; +/** + * Serializer for {@link NestedCommonFormatColumn} which can store nested data. The serializer stores several components + * including: + * - a field list and associated type info + * - value dictionaries for string, long, double, and array values (where the arrays are stored as int[] that point to + * the string, long, and double values) + * - raw data is stored with a {@link CompressedVariableSizedBlobColumnSerializer} as blobs of SMILE encoded data + * - a null value bitmap to track which 'raw' rows are null + * + * For each nested field, a {@link GlobalDictionaryEncodedFieldColumnWriter} will write a sub-column to specialize + * fast reading and filtering of that path. + * + * @see ScalarDoubleFieldColumnWriter - single type double field + * @see ScalarLongFieldColumnWriter - single type long field + * @see ScalarStringFieldColumnWriter - single type string field + * @see VariantArrayFieldColumnWriter - single type array of string, long, and double field + * @see VariantFieldColumnWriter - mixed type field + */ public class NestedDataColumnSerializer extends NestedCommonFormatColumnSerializer { private static final Logger log = new Logger(NestedDataColumnSerializer.class); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java index a6d5843d238..47850afef50 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV5.java @@ -39,7 +39,7 @@ import java.util.List; * Nested data column with optimized support for simple arrays. Not actually v5 in the segment since columns are now * serialized using {@link org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde} instead of the generic * complex type system. - * + *

* Not really stored in a segment as V5 since instead of V5 we migrated to {@link NestedCommonFormatColumn} which * specializes physical format based on the types of data encountered during processing, and so versions are now * {@link NestedCommonFormatColumnSerializer#V0} for all associated specializations. diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java index 2ee1a5808e9..18e4c81f858 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java @@ -39,14 +39,14 @@ import java.util.Set; /** * Describes the basic shape for any 'nested data' ({@link StructuredData}) {@link ComplexColumn} implementation along - * with basic facilities for caching any columns created and methods for retrieving selectors for nested literal field - * columns. + * with basic facilities for caching any columns created and methods for retrieving selectors for nested field columns. + *

+ * {@link org.apache.druid.segment.virtual.NestedFieldVirtualColumn} allows query time use of the nested fields. */ public abstract class NestedDataComplexColumn implements ComplexColumn { /** - * Make a {@link DimensionSelector} for a nested literal field column associated with this nested - * complex column specified by a sequence of {@link NestedPathPart}. + * Make a {@link DimensionSelector} for a nested field column */ public abstract DimensionSelector makeDimensionSelector( List path, @@ -55,8 +55,7 @@ public abstract class NestedDataComplexColumn implements ComplexColumn ); /** - * Make a {@link ColumnValueSelector} for a nested literal field column associated with this nested - * complex column specified by a sequence of {@link NestedPathPart}. + * Make a {@link ColumnValueSelector} for a nested field column */ public abstract ColumnValueSelector makeColumnValueSelector( List path, @@ -64,8 +63,7 @@ public abstract class NestedDataComplexColumn implements ComplexColumn ); /** - * Make a {@link SingleValueDimensionVectorSelector} for a nested literal field column associated with this nested - * complex column specified by a sequence of {@link NestedPathPart}. + * Make a {@link SingleValueDimensionVectorSelector} for a nested field column */ public abstract SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( List path, @@ -73,8 +71,7 @@ public abstract class NestedDataComplexColumn implements ComplexColumn ); /** - * Make a {@link VectorObjectSelector} for a nested literal field column associated with this nested - * complex column located at the 'path' represented as a sequence of {@link NestedPathPart}. + * Make a {@link VectorObjectSelector} for a nested field column */ public abstract VectorObjectSelector makeVectorObjectSelector( List path, @@ -82,29 +79,40 @@ public abstract class NestedDataComplexColumn implements ComplexColumn ); /** - * Make a {@link VectorValueSelector} for a nested literal field column associated with this nested - * complex column located at the 'path' represented as a sequence of {@link NestedPathPart}. + * Make a {@link VectorValueSelector} for a nested field column */ public abstract VectorValueSelector makeVectorValueSelector( List path, ReadableVectorOffset readableOffset ); + /** + * Get list of fields represented as a sequence of {@link NestedPathPart} + */ public abstract List> getNestedFields(); + /** + * Get all {@link ColumnType} for the nested field column + */ @Nullable public abstract Set getColumnTypes(List path); + /** + * Get a {@link ColumnHolder} for a nested field column to retrieve metadata, the column itself, or indexes. + */ @Nullable public abstract ColumnHolder getColumnHolder(List path); /** - * Make a {@link ColumnIndexSupplier} for a nested literal field column associated with this nested - * complex column located at the 'path' represented as a sequence of {@link NestedPathPart}. + * Make a {@link ColumnIndexSupplier} for a nested field column */ @Nullable public abstract ColumnIndexSupplier getColumnIndexSupplier(List path); + /** + * Shortcut to check if a nested field column is {@link ColumnType#isNumeric()}, useful when broadly choosing the + * type of vector selector to be used when dealing with the path + */ public abstract boolean isNumeric(List path); @Override diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumn.java index 9a006059357..689ad57cf7a 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumn.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.nested; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DoubleColumnSelector; @@ -37,6 +38,9 @@ import org.roaringbitmap.PeekableIntIterator; import javax.annotation.Nullable; +/** + * {@link NestedCommonFormatColumn} for {@link ColumnType#DOUBLE} + */ public class ScalarDoubleColumn implements NestedCommonFormatColumn { private final FixedIndexed doubleDictionary; @@ -91,6 +95,9 @@ public class ScalarDoubleColumn implements NestedCommonFormatColumn @Override public boolean isNull() { + if (NullHandling.replaceWithDefault()) { + return false; + } final int i = offset.getOffset(); if (i < offsetMark) { // offset was reset, reset iterator state @@ -134,6 +141,9 @@ public class ScalarDoubleColumn implements NestedCommonFormatColumn @Override public boolean[] getNullVector() { + if (NullHandling.replaceWithDefault()) { + return null; + } computeVectorsIfNeeded(); return nullVector; } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index b9cc9c39a55..23c21b876bc 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -21,6 +21,7 @@ package org.apache.druid.segment.nested; import com.google.common.base.Strings; import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableList; import com.google.common.primitives.Doubles; import it.unimi.dsi.fastutil.doubles.DoubleArraySet; import it.unimi.dsi.fastutil.doubles.DoubleIterator; @@ -29,6 +30,7 @@ import it.unimi.dsi.fastutil.ints.IntIntPair; import it.unimi.dsi.fastutil.ints.IntIterator; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; @@ -138,8 +140,6 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier> doubleDictionarySupplier; private final Supplier valueColumnSupplier; @@ -184,7 +184,12 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier T as(Class clazz) { if (clazz.equals(NullValueIndex.class)) { - final BitmapColumnIndex nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); + final BitmapColumnIndex nullIndex; + if (NullHandling.replaceWithDefault()) { + nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); + } else { + nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); + } return (T) (NullValueIndex) () -> nullIndex; } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { @@ -217,20 +222,27 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier dictionary = doubleDictionarySupplier.get(); + int defaultValueIndex = dictionary.indexOf(NullHandling.defaultDoubleValue()); + return new SimpleBitmapColumnIndex() { - final FixedIndexed dictionary = doubleDictionarySupplier.get(); - @Override public double estimateSelectivity(int totalRows) { if (doubleValue == null) { - if (inputNull) { + if (inputNull && NullHandling.sqlCompatible()) { return (double) getBitmap(0).size() / totalRows; } else { return 0.0; } } + if (NullHandling.replaceWithDefault() && doubleValue.equals(NullHandling.defaultDoubleValue())) { + if (defaultValueIndex >= 0) { + return ((double) getBitmap(0).size() + (double) getBitmap(defaultValueIndex).size()) / totalRows; + } + return (double) getBitmap(0).size() / totalRows; + } final int id = dictionary.indexOf(doubleValue); if (id < 0) { return 0.0; @@ -242,12 +254,24 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier T computeBitmapResult(BitmapResultFactory bitmapResultFactory) { if (doubleValue == null) { - if (inputNull) { + if (inputNull && NullHandling.sqlCompatible()) { return bitmapResultFactory.wrapDimensionValue(getBitmap(0)); } else { + // input was not null but not a double... no match return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); } } + if (NullHandling.replaceWithDefault() && doubleValue.equals(NullHandling.defaultDoubleValue())) { + if (defaultValueIndex >= 0) { + return bitmapResultFactory.unionDimensionValueBitmaps( + ImmutableList.of( + getBitmap(0), + getBitmap(defaultValueIndex) + ) + ); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(0)); + } final int id = dictionary.indexOf(doubleValue); if (id < 0) { return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); @@ -274,6 +298,10 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier longDictionary; @@ -92,6 +96,9 @@ public class ScalarLongColumn implements NestedCommonFormatColumn @Override public boolean isNull() { + if (NullHandling.replaceWithDefault()) { + return false; + } final int i = offset.getOffset(); if (i < offsetMark) { // offset was reset, reset iterator state @@ -135,6 +142,9 @@ public class ScalarLongColumn implements NestedCommonFormatColumn @Override public boolean[] getNullVector() { + if (NullHandling.replaceWithDefault()) { + return null; + } computeVectorsIfNeeded(); return nullVector; } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 10d07bcf308..39d6057aefc 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.nested; import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableList; import it.unimi.dsi.fastutil.ints.IntIntPair; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.longs.LongArraySet; @@ -27,6 +28,7 @@ import it.unimi.dsi.fastutil.longs.LongIterator; import it.unimi.dsi.fastutil.longs.LongSet; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; @@ -137,7 +139,6 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier> longDictionarySupplier; private final Supplier valueColumnSupplier; @@ -183,7 +184,12 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier T as(Class clazz) { if (clazz.equals(NullValueIndex.class)) { - final BitmapColumnIndex nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); + final BitmapColumnIndex nullIndex; + if (NullHandling.replaceWithDefault()) { + nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); + } else { + nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); + } return (T) (NullValueIndex) () -> nullIndex; } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { @@ -211,6 +217,9 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier dictionary = longDictionarySupplier.get(); + int defaultValueIndex = dictionary.indexOf(NullHandling.defaultLongValue()); + @Override public BitmapColumnIndex forValue(@Nullable String value) { @@ -218,18 +227,22 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier dictionary = longDictionarySupplier.get(); - @Override public double estimateSelectivity(int totalRows) { if (longValue == null) { - if (inputNull) { + if (inputNull && NullHandling.sqlCompatible()) { return (double) getBitmap(0).size() / totalRows; } else { return 0.0; } } + if (NullHandling.replaceWithDefault() && longValue.equals(NullHandling.defaultLongValue())) { + if (defaultValueIndex >= 0) { + return ((double) getBitmap(0).size() + (double) getBitmap(defaultValueIndex).size()) / totalRows; + } + return (double) getBitmap(0).size() / totalRows; + } final int id = dictionary.indexOf(longValue); if (id < 0) { return 0.0; @@ -241,12 +254,23 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier T computeBitmapResult(BitmapResultFactory bitmapResultFactory) { if (longValue == null) { - if (inputNull) { + if (inputNull && NullHandling.sqlCompatible()) { return bitmapResultFactory.wrapDimensionValue(getBitmap(0)); } else { return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); } } + if (NullHandling.replaceWithDefault() && longValue.equals(NullHandling.defaultLongValue())) { + if (defaultValueIndex >= 0) { + return bitmapResultFactory.unionDimensionValueBitmaps( + ImmutableList.of( + getBitmap(0), + getBitmap(defaultValueIndex) + ) + ); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(0)); + } final int id = dictionary.indexOf(longValue); if (id < 0) { return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); @@ -273,6 +297,9 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier> implements DictionaryEncodedColumn, NestedCommonFormatColumn { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringFieldColumnWriter.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringFieldColumnWriter.java index bf4c5f0bca3..bfd1de44a4b 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringFieldColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarStringFieldColumnWriter.java @@ -28,7 +28,7 @@ import java.io.IOException; import java.nio.channels.WritableByteChannel; /** - * Field writer for string type nested columns of {@link NestedDataColumnSerializerV4} and + * Nested field writer for string type columns of {@link NestedDataColumnSerializerV4} and * {@link NestedDataColumnSerializer} */ public final class ScalarStringFieldColumnWriter extends GlobalDictionaryEncodedFieldColumnWriter diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ValueDictionary.java b/processing/src/main/java/org/apache/druid/segment/nested/ValueDictionary.java index b8ffb0711a7..9ebc1a88fbb 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ValueDictionary.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ValueDictionary.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.nested; import com.google.common.base.Preconditions; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.AutoTypeColumnIndexer; import org.apache.druid.segment.ComparatorDimensionDictionary; @@ -83,6 +84,12 @@ public class ValueDictionary this.stringArrays = new TreeSet<>(ColumnType.STRING_ARRAY.getNullableStrategy()); this.longArrays = new TreeSet<>(ColumnType.LONG_ARRAY.getNullableStrategy()); this.doubleArrays = new TreeSet<>(ColumnType.DOUBLE_ARRAY.getNullableStrategy()); + + // always add default values in default value mode. they don't cost much even if they aren't used + if (NullHandling.replaceWithDefault()) { + longDictionary.add(NullHandling.defaultLongValue()); + doubleDictionary.add(NullHandling.defaultDoubleValue()); + } } public int addLongValue(@Nullable Long value) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumn.java deleted file mode 100644 index f2ec4c2b395..00000000000 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumn.java +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.nested; - -import com.google.common.primitives.Doubles; -import com.google.common.primitives.Floats; -import org.apache.druid.collections.bitmap.ImmutableBitmap; -import org.apache.druid.common.guava.GuavaUtils; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.DimensionHandlerUtils; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.StringEncodingStrategies; -import org.apache.druid.segment.data.ColumnarInts; -import org.apache.druid.segment.data.FixedIndexed; -import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; -import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.data.ReadableOffset; -import org.apache.druid.segment.vector.ReadableVectorInspector; -import org.apache.druid.segment.vector.ReadableVectorOffset; -import org.apache.druid.segment.vector.VectorObjectSelector; -import org.roaringbitmap.PeekableIntIterator; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Iterator; - -public class VariantArrayColumn> implements NestedCommonFormatColumn -{ - private final TStringDictionary stringDictionary; - private final FixedIndexed longDictionary; - private final FixedIndexed doubleDictionary; - private final FrontCodedIntArrayIndexed arrayDictionary; - private final ColumnarInts encodedValueColumn; - private final ImmutableBitmap nullValueBitmap; - private final ColumnType logicalType; - private final int adjustLongId; - private final int adjustDoubleId; - private final int adjustArrayId; - - public VariantArrayColumn( - TStringDictionary stringDictionary, - FixedIndexed longDictionary, - FixedIndexed doubleDictionary, - FrontCodedIntArrayIndexed arrayDictionary, - ColumnarInts encodedValueColumn, - ImmutableBitmap nullValueBitmap, - ColumnType logicalType - ) - { - this.stringDictionary = stringDictionary; - this.longDictionary = longDictionary; - this.doubleDictionary = doubleDictionary; - this.arrayDictionary = arrayDictionary; - this.encodedValueColumn = encodedValueColumn; - this.nullValueBitmap = nullValueBitmap; - this.logicalType = logicalType; - this.adjustLongId = stringDictionary.size(); - this.adjustDoubleId = adjustLongId + longDictionary.size(); - this.adjustArrayId = adjustDoubleId + doubleDictionary.size(); - } - - @Override - public ColumnType getLogicalType() - { - return logicalType; - } - - @Override - public Indexed getStringDictionary() - { - return new StringEncodingStrategies.Utf8ToStringIndexed(stringDictionary); - } - - @Override - public Indexed getLongDictionary() - { - return longDictionary; - } - - @Override - public Indexed getDoubleDictionary() - { - return doubleDictionary; - } - - @Override - public Indexed getArrayDictionary() - { - Iterable arrays = () -> { - - return new Iterator() - { - final Iterator delegate = arrayDictionary.iterator(); - - @Override - public boolean hasNext() - { - return delegate.hasNext(); - } - - @Override - public Object[] next() - { - final int[] next = delegate.next(); - final Object[] nextArray = new Object[next.length]; - for (int i = 0; i < nextArray.length; i++) { - nextArray[i] = lookupId(next[i]); - } - return nextArray; - } - - @Nullable - private Object lookupId(int globalId) - { - if (globalId == 0) { - return null; - } - final int adjustLongId = stringDictionary.size(); - final int adjustDoubleId = stringDictionary.size() + longDictionary.size(); - if (globalId < adjustLongId) { - return StringUtils.fromUtf8Nullable(stringDictionary.get(globalId)); - } else if (globalId < adjustDoubleId) { - return longDictionary.get(globalId - adjustLongId); - } else if (globalId < adjustDoubleId + doubleDictionary.size()) { - return doubleDictionary.get(globalId - adjustDoubleId); - } - throw new IAE("Unknown globalId [%s]", globalId); - } - }; - }; - return new Indexed() - { - @Override - public int size() - { - return arrayDictionary.size(); - } - - @Nullable - @Override - public Object[] get(int index) - { - throw new UnsupportedOperationException("get not supported"); - } - - @Override - public int indexOf(@Nullable Object[] value) - { - throw new UnsupportedOperationException("indexOf not supported"); - } - - @Override - public Iterator iterator() - { - return arrays.iterator(); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - // meh - } - }; - } - - @Override - public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) - { - return new ColumnValueSelector() - { - - private PeekableIntIterator nullIterator = nullValueBitmap.peekableIterator(); - private int nullMark = -1; - private int offsetMark = -1; - - @Nullable - @Override - public Object getObject() - { - final int id = encodedValueColumn.get(offset.getOffset()); - if (id < adjustArrayId) { - return lookupScalarValue(id); - } else { - int[] arr = arrayDictionary.get(id - adjustArrayId); - if (arr == null) { - return null; - } - final Object[] array = new Object[arr.length]; - for (int i = 0; i < arr.length; i++) { - array[i] = lookupScalarValue(arr[i]); - } - return array; - } - } - - @Override - public float getFloat() - { - final int id = encodedValueColumn.get(offset.getOffset()); - if (id == 0) { - // zero - return 0f; - } else if (id < adjustLongId) { - // try to convert string to float - Float f = Floats.tryParse(StringUtils.fromUtf8(stringDictionary.get(id))); - return f == null ? 0f : f; - } else if (id < adjustDoubleId) { - return longDictionary.get(id - adjustLongId).floatValue(); - } else { - return doubleDictionary.get(id - adjustDoubleId).floatValue(); - } - } - - @Override - public double getDouble() - { - final int id = encodedValueColumn.get(offset.getOffset()); - if (id == 0) { - // zero - return 0.0; - } else if (id < adjustLongId) { - // try to convert string to double - Double d = Doubles.tryParse(StringUtils.fromUtf8(stringDictionary.get(id))); - return d == null ? 0.0 : d; - } else if (id < adjustDoubleId) { - return longDictionary.get(id - adjustLongId).doubleValue(); - } else { - return doubleDictionary.get(id - adjustDoubleId); - } - } - - @Override - public long getLong() - { - final int id = encodedValueColumn.get(offset.getOffset()); - if (id == 0) { - // zero - return 0L; - } else if (id < adjustLongId) { - // try to convert string to long - Long l = GuavaUtils.tryParseLong(StringUtils.fromUtf8(stringDictionary.get(id))); - return l == null ? 0L : l; - } else if (id < adjustDoubleId) { - return longDictionary.get(id - adjustLongId); - } else { - return doubleDictionary.get(id - adjustDoubleId).longValue(); - } - } - - @Override - public boolean isNull() - { - final int i = offset.getOffset(); - if (i < offsetMark) { - // offset was reset, reset iterator state - nullMark = -1; - nullIterator = nullValueBitmap.peekableIterator(); - } - offsetMark = i; - if (nullMark < i) { - nullIterator.advanceIfNeeded(offsetMark); - if (nullIterator.hasNext()) { - nullMark = nullIterator.next(); - } - } - if (nullMark == offsetMark) { - return true; - } - return DimensionHandlerUtils.isNumericNull(getObject()); - } - - @Override - public Class classOfObject() - { - return Object.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("encodedValueColumn", encodedValueColumn); - } - }; - } - - @Override - public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) - { - return new VectorObjectSelector() - { - private final int[] vector = new int[offset.getMaxVectorSize()]; - private final Object[] objects = new Object[offset.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - @Override - - public Object[] getObjectVector() - { - if (id == offset.getId()) { - return objects; - } - - if (offset.isContiguous()) { - encodedValueColumn.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); - } else { - encodedValueColumn.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); - } - for (int i = 0; i < offset.getCurrentVectorSize(); i++) { - final int globalId = vector[i]; - if (globalId < adjustArrayId) { - objects[i] = lookupScalarValue(globalId); - } else { - int[] arr = arrayDictionary.get(globalId - adjustArrayId); - if (arr == null) { - objects[i] = null; - } else { - final Object[] array = new Object[arr.length]; - for (int j = 0; j < arr.length; j++) { - array[j] = lookupScalarValue(arr[j]); - } - objects[i] = array; - } - } - } - id = offset.getId(); - - return objects; - } - - @Override - public int getMaxVectorSize() - { - return offset.getMaxVectorSize(); - } - - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); - } - }; - } - - @Override - public void close() throws IOException - { - encodedValueColumn.close(); - } - - private Object lookupScalarValue(int globalId) - { - if (globalId < adjustLongId) { - return StringUtils.fromUtf8Nullable(stringDictionary.get(globalId)); - } else if (globalId < adjustDoubleId) { - return longDictionary.get(globalId - adjustLongId); - } else if (globalId < adjustArrayId) { - return doubleDictionary.get(globalId - adjustDoubleId); - } - throw new IllegalArgumentException("not a scalar in the dictionary"); - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayFieldColumnWriter.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayFieldColumnWriter.java index 147f2b04474..3166548abc1 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayFieldColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayFieldColumnWriter.java @@ -20,6 +20,7 @@ package org.apache.druid.segment.nested; import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; @@ -27,6 +28,10 @@ import org.apache.druid.segment.writeout.SegmentWriteOutMedium; import java.io.IOException; import java.nio.channels.WritableByteChannel; +/** + * Nested field writer for array type columns of {@link NestedDataColumnSerializer}. This is currently only used + * for single type array columns, but can handle any type of array. {@link VariantFieldColumnWriter} is used for mixed + */ public class VariantArrayFieldColumnWriter extends GlobalDictionaryEncodedFieldColumnWriter { @@ -44,6 +49,7 @@ public class VariantArrayFieldColumnWriter extends GlobalDictionaryEncodedFieldC @Override int[] processValue(int row, Object value) { + // replace Object[] with int[] composed of the global ids if (value instanceof Object[]) { Object[] array = (Object[]) value; final int[] globalIds = new int[array.length]; @@ -67,6 +73,10 @@ public class VariantArrayFieldColumnWriter extends GlobalDictionaryEncodedFieldC } return globalIds; } + if (value != null) { + // this writer is only used for arrays so all values will have been coerced to Object[] at this point + throw new ISE("Value is not an array, got [%s] instead", value.getClass()); + } return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java new file mode 100644 index 00000000000..dfc713f41ef --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java @@ -0,0 +1,783 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Predicate; +import com.google.common.base.Predicates; +import com.google.common.primitives.Doubles; +import com.google.common.primitives.Floats; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.common.guava.GuavaUtils; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.AbstractDimensionSelector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.DictionaryEncodedColumn; +import org.apache.druid.segment.column.StringDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.data.ColumnarInts; +import org.apache.druid.segment.data.FixedIndexed; +import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; +import org.apache.druid.segment.data.Indexed; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.data.SingleIndexedInt; +import org.apache.druid.segment.filter.BooleanValueMatcher; +import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorInspector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.roaringbitmap.PeekableIntIterator; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.BitSet; +import java.util.Iterator; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * {@link NestedCommonFormatColumn} for single type array columns, and mixed type columns. If {@link #variantTypes} + * is non-null, the column is composed of all of the types defined there, otherwise all rows are consistently + * {@link #logicalType}. If mixed type, logical type is set by {@link ColumnType#leastRestrictiveType}. + */ +public class VariantColumn> + implements DictionaryEncodedColumn, NestedCommonFormatColumn +{ + private final TStringDictionary stringDictionary; + private final FixedIndexed longDictionary; + private final FixedIndexed doubleDictionary; + private final FrontCodedIntArrayIndexed arrayDictionary; + private final ColumnarInts encodedValueColumn; + private final ImmutableBitmap nullValueBitmap; + private final ColumnType logicalType; + private final ExpressionType logicalExpressionType; + @Nullable + private final FieldTypeInfo.TypeSet variantTypes; + private final int adjustLongId; + private final int adjustDoubleId; + private final int adjustArrayId; + + public VariantColumn( + TStringDictionary stringDictionary, + FixedIndexed longDictionary, + FixedIndexed doubleDictionary, + FrontCodedIntArrayIndexed arrayDictionary, + ColumnarInts encodedValueColumn, + ImmutableBitmap nullValueBitmap, + ColumnType logicalType, + @Nullable Byte variantTypeSetByte + ) + { + this.stringDictionary = stringDictionary; + this.longDictionary = longDictionary; + this.doubleDictionary = doubleDictionary; + this.arrayDictionary = arrayDictionary; + this.encodedValueColumn = encodedValueColumn; + this.nullValueBitmap = nullValueBitmap; + this.logicalType = logicalType; + this.logicalExpressionType = ExpressionType.fromColumnTypeStrict(logicalType); + this.variantTypes = variantTypeSetByte == null ? null : new FieldTypeInfo.TypeSet(variantTypeSetByte); + this.adjustLongId = stringDictionary.size(); + this.adjustDoubleId = adjustLongId + longDictionary.size(); + this.adjustArrayId = adjustDoubleId + doubleDictionary.size(); + } + + @Override + public ColumnType getLogicalType() + { + return logicalType; + } + + @Override + public Indexed getStringDictionary() + { + return new StringEncodingStrategies.Utf8ToStringIndexed(stringDictionary); + } + + @Override + public Indexed getLongDictionary() + { + return longDictionary; + } + + @Override + public Indexed getDoubleDictionary() + { + return doubleDictionary; + } + + @Override + public Indexed getArrayDictionary() + { + Iterable arrays = () -> new Iterator() + { + final Iterator delegate = arrayDictionary.iterator(); + + @Override + public boolean hasNext() + { + return delegate.hasNext(); + } + + @Override + public Object[] next() + { + final int[] next = delegate.next(); + final Object[] nextArray = new Object[next.length]; + for (int i = 0; i < nextArray.length; i++) { + nextArray[i] = lookupId(next[i]); + } + return nextArray; + } + + @Nullable + private Object lookupId(int id) + { + if (id == 0) { + return null; + } + final int adjustLongId = stringDictionary.size(); + final int adjustDoubleId = stringDictionary.size() + longDictionary.size(); + if (id < adjustLongId) { + return StringUtils.fromUtf8Nullable(stringDictionary.get(id)); + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId); + } else if (id < adjustDoubleId + doubleDictionary.size()) { + return doubleDictionary.get(id - adjustDoubleId); + } + throw new IAE("Unknown id [%s]", id); + } + }; + return new Indexed() + { + @Override + public int size() + { + return arrayDictionary.size(); + } + + @Nullable + @Override + public Object[] get(int index) + { + throw new UnsupportedOperationException("get not supported"); + } + + @Override + public int indexOf(@Nullable Object[] value) + { + throw new UnsupportedOperationException("indexOf not supported"); + } + + @Override + public Iterator iterator() + { + return arrays.iterator(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + // meh + } + }; + } + + @Override + public SortedMap getFieldTypeInfo() + { + if (variantTypes != null) { + FieldTypeInfo.MutableTypeSet rootOnlyType = new FieldTypeInfo.MutableTypeSet(variantTypes.getByteValue()); + SortedMap fields = new TreeMap<>(); + fields.put(NestedPathFinder.JSON_PATH_ROOT, rootOnlyType); + return fields; + } + FieldTypeInfo.MutableTypeSet rootOnlyType = new FieldTypeInfo.MutableTypeSet().add(getLogicalType()); + SortedMap fields = new TreeMap<>(); + fields.put(NestedPathFinder.JSON_PATH_ROOT, rootOnlyType); + return fields; + } + + @Override + public int length() + { + return encodedValueColumn.size(); + } + + @Override + public boolean hasMultipleValues() + { + return false; + } + + @Override + public int getSingleValueRow(int rowNum) + { + return encodedValueColumn.get(rowNum); + } + + @Override + public IndexedInts getMultiValueRow(int rowNum) + { + throw new IllegalStateException("Multi-value row not supported"); + } + + @Nullable + @Override + public String lookupName(int id) + { + if (id < stringDictionary.size()) { + return StringUtils.fromUtf8Nullable(stringDictionary.get(id)); + } else if (id < stringDictionary.size() + longDictionary.size()) { + return String.valueOf(longDictionary.get(id - adjustLongId)); + } else if (id < stringDictionary.size() + longDictionary.size() + doubleDictionary.size()) { + return String.valueOf(doubleDictionary.get(id - adjustDoubleId)); + } + return null; + } + + @Override + public int lookupId(String val) + { + if (val == null) { + return 0; + } + int candidate = stringDictionary.indexOf(StringUtils.toUtf8ByteBuffer(val)); + if (candidate >= 0) { + return candidate; + } + candidate = longDictionary.indexOf(GuavaUtils.tryParseLong(val)); + if (candidate >= 0) { + candidate += adjustLongId; + return candidate; + } + candidate = doubleDictionary.indexOf(Doubles.tryParse(val)); + if (candidate >= 0) { + candidate += adjustDoubleId; + return candidate; + } + + // not in here, we can't really do anything cool here + return -1; + } + + @Override + public int getCardinality() + { + if (logicalType.isArray() && variantTypes == null) { + return arrayDictionary.size(); + } + // this probably isn't correct if we expose this as a multi-value dimension instead of an array, which would leave + // the array dictionary out of this computation + return stringDictionary.size() + longDictionary.size() + doubleDictionary.size() + arrayDictionary.size(); + } + + @Override + public DimensionSelector makeDimensionSelector( + ReadableOffset offset, + @Nullable ExtractionFn extractionFn + ) + { + if (logicalType.isArray()) { + throw new IAE("Dimension selector is currently unsupported for [%s]", logicalType); + } + // copy everywhere all the time + class StringDimensionSelector extends AbstractDimensionSelector + implements SingleValueHistoricalDimensionSelector, IdLookup + { + private final SingleIndexedInt row = new SingleIndexedInt(); + + @Override + public IndexedInts getRow() + { + row.setValue(getRowValue()); + return row; + } + + public int getRowValue() + { + return encodedValueColumn.get(offset.getOffset()); + } + + @Override + public float getFloat() + { + final int id = getRowValue(); + if (id == 0) { + // zero + return 0f; + } else if (id < adjustLongId) { + // try to convert string to float + Float f = Floats.tryParse(StringUtils.fromUtf8(stringDictionary.get(id))); + return f == null ? 0f : f; + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId).floatValue(); + } else { + return doubleDictionary.get(id - adjustDoubleId).floatValue(); + } + } + + @Override + public double getDouble() + { + final int id = getRowValue(); + if (id == 0) { + // zero + return 0.0; + } else if (id < adjustLongId) { + // try to convert string to double + Double d = Doubles.tryParse(StringUtils.fromUtf8(stringDictionary.get(id))); + return d == null ? 0.0 : d; + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId).doubleValue(); + } else { + return doubleDictionary.get(id - adjustDoubleId); + } + } + + @Override + public long getLong() + { + final int id = getRowValue(); + if (id == 0) { + // zero + return 0L; + } else if (id < adjustLongId) { + // try to convert string to long + Long l = GuavaUtils.tryParseLong(StringUtils.fromUtf8(stringDictionary.get(id))); + return l == null ? 0L : l; + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId); + } else { + return doubleDictionary.get(id - adjustDoubleId).longValue(); + } + } + + @Override + public boolean isNull() + { + if (getRowValue() == 0) { + return true; + } + return DimensionHandlerUtils.isNumericNull(getObject()); + } + + @Override + public IndexedInts getRow(int offset) + { + row.setValue(getRowValue(offset)); + return row; + } + + @Override + public int getRowValue(int offset) + { + return encodedValueColumn.get(offset); + } + + @Override + public ValueMatcher makeValueMatcher(final @Nullable String value) + { + if (extractionFn == null) { + final int valueId = lookupId(value); + if (valueId >= 0) { + return new ValueMatcher() + { + @Override + public boolean matches() + { + return getRowValue() == valueId; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("column", VariantColumn.this); + } + }; + } else { + return BooleanValueMatcher.of(false); + } + } else { + // Employ caching BitSet optimization + return makeValueMatcher(Predicates.equalTo(value)); + } + } + + @Override + public ValueMatcher makeValueMatcher(final Predicate predicate) + { + final BitSet checkedIds = new BitSet(getCardinality()); + final BitSet matchingIds = new BitSet(getCardinality()); + + // Lazy matcher; only check an id if matches() is called. + return new ValueMatcher() + { + @Override + public boolean matches() + { + final int id = getRowValue(); + + if (checkedIds.get(id)) { + return matchingIds.get(id); + } else { + final boolean matches = predicate.apply(lookupName(id)); + checkedIds.set(id); + if (matches) { + matchingIds.set(id); + } + return matches; + } + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("column", VariantColumn.this); + } + }; + } + + @Override + public Object getObject() + { + return VariantColumn.this.lookupName(getRowValue()); + } + + @Override + public Class classOfObject() + { + return String.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("column", encodedValueColumn); + inspector.visit("offset", offset); + inspector.visit("extractionFn", extractionFn); + } + + @Override + public int getValueCardinality() + { + return getCardinality(); + } + + @Override + public String lookupName(int id) + { + final String value = VariantColumn.this.lookupName(id); + return extractionFn == null ? value : extractionFn.apply(value); + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return extractionFn == null ? this : null; + } + + @Override + public int lookupId(String name) + { + if (extractionFn == null) { + return VariantColumn.this.lookupId(name); + } + throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); + } + } + + return new StringDimensionSelector(); + } + + @Override + public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) + { + return new ColumnValueSelector() + { + private PeekableIntIterator nullIterator = nullValueBitmap.peekableIterator(); + private int nullMark = -1; + private int offsetMark = -1; + + @Nullable + @Override + public Object getObject() + { + final int id = encodedValueColumn.get(offset.getOffset()); + if (id < adjustArrayId) { + return lookupScalarValue(id); + } else { + int[] arr = arrayDictionary.get(id - adjustArrayId); + if (arr == null) { + return null; + } + final Object[] array = new Object[arr.length]; + for (int i = 0; i < arr.length; i++) { + array[i] = lookupScalarValue(arr[i]); + } + return array; + } + } + + @Override + public float getFloat() + { + final int id = encodedValueColumn.get(offset.getOffset()); + if (id == 0) { + // zero + return 0f; + } else if (id < adjustLongId) { + // try to convert string to float + Float f = Floats.tryParse(StringUtils.fromUtf8(stringDictionary.get(id))); + return f == null ? 0f : f; + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId).floatValue(); + } else { + return doubleDictionary.get(id - adjustDoubleId).floatValue(); + } + } + + @Override + public double getDouble() + { + final int id = encodedValueColumn.get(offset.getOffset()); + if (id == 0) { + // zero + return 0.0; + } else if (id < adjustLongId) { + // try to convert string to double + Double d = Doubles.tryParse(StringUtils.fromUtf8(stringDictionary.get(id))); + return d == null ? 0.0 : d; + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId).doubleValue(); + } else { + return doubleDictionary.get(id - adjustDoubleId); + } + } + + @Override + public long getLong() + { + final int id = encodedValueColumn.get(offset.getOffset()); + if (id == 0) { + // zero + return 0L; + } else if (id < adjustLongId) { + // try to convert string to long + Long l = GuavaUtils.tryParseLong(StringUtils.fromUtf8(stringDictionary.get(id))); + return l == null ? 0L : l; + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId); + } else { + return doubleDictionary.get(id - adjustDoubleId).longValue(); + } + } + + @Override + public boolean isNull() + { + final int i = offset.getOffset(); + if (i < offsetMark) { + // offset was reset, reset iterator state + nullMark = -1; + nullIterator = nullValueBitmap.peekableIterator(); + } + offsetMark = i; + if (nullMark < i) { + nullIterator.advanceIfNeeded(offsetMark); + if (nullIterator.hasNext()) { + nullMark = nullIterator.next(); + } + } + if (nullMark == offsetMark) { + return true; + } + return DimensionHandlerUtils.isNumericNull(getObject()); + } + + @Override + public Class classOfObject() + { + return Object.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("encodedValueColumn", encodedValueColumn); + } + }; + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset offset) + { + final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector + { + public StringVectorSelector() + { + super(encodedValueColumn, offset); + } + + @Override + public int getValueCardinality() + { + return getCardinality(); + } + + @Nullable + @Override + public String lookupName(final int id) + { + return VariantColumn.this.lookupName(id); + } + + @Override + public int lookupId(@Nullable String name) + { + return VariantColumn.this.lookupId(name); + } + } + + return new StringVectorSelector(); + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset) + { + throw new UnsupportedOperationException(); + } + + @Override + public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) + { + return new VectorObjectSelector() + { + private final int[] vector = new int[offset.getMaxVectorSize()]; + private final Object[] objects = new Object[offset.getMaxVectorSize()]; + private int offsetId = ReadableVectorInspector.NULL_ID; + + @Override + + public Object[] getObjectVector() + { + if (offsetId == offset.getId()) { + return objects; + } + + if (offset.isContiguous()) { + encodedValueColumn.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + encodedValueColumn.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + final int dictionaryId = vector[i]; + if (dictionaryId < adjustArrayId) { + objects[i] = lookupScalarValueStrict(dictionaryId); + } else { + int[] arr = arrayDictionary.get(dictionaryId - adjustArrayId); + if (arr == null) { + objects[i] = null; + } else { + final Object[] array = new Object[arr.length]; + for (int j = 0; j < arr.length; j++) { + array[j] = lookupScalarValue(arr[j]); + } + objects[i] = ExprEval.ofType(logicalExpressionType, array).asArray(); + } + } + } + offsetId = offset.getId(); + + return objects; + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + }; + } + + @Override + public void close() throws IOException + { + encodedValueColumn.close(); + } + + /** + * Lookup value from appropriate scalar value dictionary, coercing the value to {@link #logicalType}, particularly + * useful for the vector query engine which prefers all the types are consistent + *

+ * This method should NEVER be used when values must round trip to be able to be looked up from the array value + * dictionary since it might coerce element values to a different type + */ + private Object lookupScalarValueStrict(int id) + { + if (id == 0) { + return null; + } + if (variantTypes == null) { + return lookupScalarValue(id); + } else { + ExprEval eval = ExprEval.ofType(logicalExpressionType, lookupScalarValue(id)); + return eval.value(); + } + } + + private Object lookupScalarValue(int id) + { + if (id < adjustLongId) { + return StringUtils.fromUtf8Nullable(stringDictionary.get(id)); + } else if (id < adjustDoubleId) { + return longDictionary.get(id - adjustLongId); + } else if (id < adjustArrayId) { + return doubleDictionary.get(id - adjustDoubleId); + } + throw new IllegalArgumentException("not a scalar in the dictionary"); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java similarity index 89% rename from processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumnAndIndexSupplier.java rename to processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java index 28b808b09f8..69246edf667 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnAndIndexSupplier.java @@ -50,9 +50,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; -public class VariantArrayColumnAndIndexSupplier implements Supplier, ColumnIndexSupplier +public class VariantColumnAndIndexSupplier implements Supplier, ColumnIndexSupplier { - public static VariantArrayColumnAndIndexSupplier read( + public static VariantColumnAndIndexSupplier read( ColumnType logicalType, ByteOrder byteOrder, BitmapSerdeFactory bitmapSerdeFactory, @@ -65,6 +65,17 @@ public class VariantArrayColumnAndIndexSupplier implements Supplier stringDictionary; private final Supplier frontCodedStringDictionarySupplier; @@ -198,8 +212,9 @@ public class VariantArrayColumnAndIndexSupplier implements Supplier valueIndexes; private final ImmutableBitmap nullValueBitmap; - public VariantArrayColumnAndIndexSupplier( + public VariantColumnAndIndexSupplier( ColumnType logicalType, + @Nullable Byte variantTypeSetByte, GenericIndexed stringDictionary, Supplier frontCodedStringDictionarySupplier, Supplier> longDictionarySupplier, @@ -213,6 +228,7 @@ public class VariantArrayColumnAndIndexSupplier implements Supplier( + return new VariantColumn<>( frontCodedStringDictionarySupplier.get(), longDictionarySupplier.get(), doubleDictionarySupplier.get(), arrayDictionarySupplier.get(), encodedValueColumnSupplier.get(), nullValueBitmap, - logicalType + logicalType, + variantTypeSetByte ); } - return new VariantArrayColumn<>( + return new VariantColumn<>( stringDictionary.singleThreaded(), longDictionarySupplier.get(), doubleDictionarySupplier.get(), arrayDictionarySupplier.get(), encodedValueColumnSupplier.get(), nullValueBitmap, - logicalType + logicalType, + variantTypeSetByte ); } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnSerializer.java similarity index 85% rename from processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumnSerializer.java rename to processing/src/main/java/org/apache/druid/segment/nested/VariantColumnSerializer.java index 702af36453e..72d5ed0ab3e 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantArrayColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumnSerializer.java @@ -45,14 +45,20 @@ import org.apache.druid.segment.data.GenericIndexedWriter; import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.WritableByteChannel; -public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerializer +/** + * Serializer for a {@link NestedCommonFormatColumn} for single type arrays and mixed type columns, but not columns + * with nested data. If {@link #variantTypeSetByte} is set then the column has mixed types, which is added to the base + * metadata stored in the column file. + */ +public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer { - private static final Logger log = new Logger(VariantArrayColumnSerializer.class); + private static final Logger log = new Logger(VariantColumnSerializer.class); private final String name; private final SegmentWriteOutMedium segmentWriteOutMedium; @@ -65,27 +71,28 @@ public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerial private FixedIndexedWriter doubleDictionaryWriter; private FrontCodedIntArrayIndexedWriter arrayDictionaryWriter; private FixedIndexedIntWriter arrayElementDictionaryWriter; - private int rowCount = 0; private boolean closedForWrite = false; - private boolean dictionarySerialized = false; - private SingleValueColumnarIntsSerializer encodedValueSerializer; private GenericIndexedWriter bitmapIndexWriter; private GenericIndexedWriter arrayElementIndexWriter; private MutableBitmap[] bitmaps; private ByteBuffer columnNameBytes = null; private final Int2ObjectRBTreeMap arrayElements = new Int2ObjectRBTreeMap<>(); + @Nullable + private final Byte variantTypeSetByte; - public VariantArrayColumnSerializer( + public VariantColumnSerializer( String name, + @Nullable Byte variantTypeSetByte, IndexSpec indexSpec, SegmentWriteOutMedium segmentWriteOutMedium, Closer closer ) { this.name = name; + this.variantTypeSetByte = variantTypeSetByte; this.segmentWriteOutMedium = segmentWriteOutMedium; this.indexSpec = indexSpec; this.closer = closer; @@ -254,10 +261,9 @@ public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerial } ExprEval eval = ExprEval.bestEffortOf(StructuredData.unwrap(selector.getObject())); - int[] globalIds = null; if (eval.isArray()) { Object[] array = eval.asArray(); - globalIds = new int[array.length]; + int[] globalIds = new int[array.length]; for (int i = 0; i < array.length; i++) { if (array[i] == null) { globalIds[i] = 0; @@ -276,10 +282,35 @@ public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerial (id) -> indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap() ).add(rowCount); } + final int dictId = dictionaryIdLookup.lookupArray(globalIds); + encodedValueSerializer.addValue(dictId); + bitmaps[dictId].add(rowCount); + } else { + final Object o = eval.value(); + final int dictId; + if (o == null) { + dictId = 0; + } else if (o instanceof String) { + dictId = dictionaryIdLookup.lookupString((String) o); + } else if (o instanceof Long) { + dictId = dictionaryIdLookup.lookupLong((Long) o); + } else if (o instanceof Double) { + dictId = dictionaryIdLookup.lookupDouble((Double) o); + } else { + dictId = -1; + } + Preconditions.checkArgument(dictId >= 0, "unknown global id [%s] for value [%s]", dictId, o); + if (dictId != 0) { + // treat as single element array + arrayElements.computeIfAbsent( + dictId, + (id) -> indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap() + ).add(rowCount); + } + encodedValueSerializer.addValue(dictId); + bitmaps[dictId].add(rowCount); } - final int dictId = globalIds == null ? 0 : dictionaryIdLookup.lookupArray(globalIds); - encodedValueSerializer.addValue(dictId); - bitmaps[dictId].add(rowCount); + rowCount++; } @@ -311,6 +342,9 @@ public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerial long size = 1 + columnNameBytes.capacity(); // the value dictionaries, raw column, and null index are all stored in separate files + if (variantTypeSetByte != null) { + size += 1; + } return size; } @@ -324,6 +358,9 @@ public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerial Preconditions.checkArgument(dictionaryWriter.isSorted(), "Dictionary not sorted?!?"); writeV0Header(channel, columnNameBytes); + if (variantTypeSetByte != null) { + channel.write(ByteBuffer.wrap(new byte[]{variantTypeSetByte})); + } writeInternal(smoosher, dictionaryWriter, STRING_DICTIONARY_FILE_NAME); writeInternal(smoosher, longDictionaryWriter, LONG_DICTIONARY_FILE_NAME); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantFieldColumnWriter.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantFieldColumnWriter.java index e0cd92dd6b9..01651379d4f 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantFieldColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantFieldColumnWriter.java @@ -28,7 +28,8 @@ import java.io.IOException; import java.nio.channels.WritableByteChannel; /** - * Literal field writer for mixed type nested columns of {@link NestedDataColumnSerializerV4} + * Nested field writer for mixed type scalar or array columns of {@link NestedDataColumnSerializerV4} and + * {@link NestedDataColumnSerializer}. */ public final class VariantFieldColumnWriter extends GlobalDictionaryEncodedFieldColumnWriter { @@ -47,6 +48,7 @@ public final class VariantFieldColumnWriter extends GlobalDictionaryEncodedField @Override Object processValue(int row, Object value) { + // replace arrays represented as Object[] with int[] composed of the global ids if (value instanceof Object[]) { Object[] array = (Object[]) value; final int[] globalIds = new int[array.length]; @@ -66,6 +68,7 @@ public final class VariantFieldColumnWriter extends GlobalDictionaryEncodedField } return globalIds; } + // non-arrays can be passed directly through return super.processValue(row, value); } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java index 13366f43bf7..8cf05426b9e 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java @@ -33,13 +33,25 @@ import org.apache.druid.segment.nested.NestedDataColumnSupplier; import org.apache.druid.segment.nested.ScalarDoubleColumnAndIndexSupplier; import org.apache.druid.segment.nested.ScalarLongColumnAndIndexSupplier; import org.apache.druid.segment.nested.ScalarStringColumnAndIndexSupplier; -import org.apache.druid.segment.nested.VariantArrayColumnAndIndexSupplier; +import org.apache.druid.segment.nested.VariantColumnAndIndexSupplier; import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +/** + * {@link ColumnPartSerde} shared by all {@link NestedCommonFormatColumn}. The {@link #logicalType} defines the native + * Druid type of the column to use for things like {@link org.apache.druid.segment.column.ColumnCapabilities} and + * {@link #hasNulls} if any null values are present. If {@link #isVariantType} is set, the column internally is + * composed of a mix of types, this is currently only used by {@link VariantColumnAndIndexSupplier}. + * + * @see ScalarDoubleColumnAndIndexSupplier + * @see ScalarLongColumnAndIndexSupplier + * @see ScalarStringColumnAndIndexSupplier + * @see VariantColumnAndIndexSupplier + * @see NestedDataColumnSupplier + */ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde { public static SerializerBuilder serializerBuilder() @@ -62,15 +74,17 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde public static NestedCommonFormatColumnPartSerde createDeserializer( @JsonProperty("logicalType") ColumnType logicalType, @JsonProperty("hasNulls") boolean hasNulls, + @JsonProperty("isVariantType") boolean isVariantType, @JsonProperty("byteOrder") ByteOrder byteOrder, @JsonProperty("bitmapSerdeFactory") BitmapSerdeFactory bitmapSerdeFactory ) { - return new NestedCommonFormatColumnPartSerde(logicalType, hasNulls, byteOrder, bitmapSerdeFactory, null); + return new NestedCommonFormatColumnPartSerde(logicalType, hasNulls, isVariantType, byteOrder, bitmapSerdeFactory, null); } private final ColumnType logicalType; private final boolean hasNulls; + private final boolean isVariantType; private final ByteOrder byteOrder; private final BitmapSerdeFactory bitmapSerdeFactory; @@ -81,6 +95,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde private NestedCommonFormatColumnPartSerde( ColumnType logicalType, boolean hasNulls, + boolean isVariant, ByteOrder byteOrder, BitmapSerdeFactory bitmapSerdeFactory, @Nullable Serializer serializer @@ -88,6 +103,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde { this.logicalType = logicalType; this.hasNulls = hasNulls; + this.isVariantType = isVariant; this.byteOrder = byteOrder; this.bitmapSerdeFactory = bitmapSerdeFactory; this.serializer = serializer; @@ -104,6 +120,26 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde @Override public Deserializer getDeserializer() { + if (isVariantType || logicalType.isArray()) { + return ((buffer, builder, columnConfig) -> { + VariantColumnAndIndexSupplier supplier = VariantColumnAndIndexSupplier.read( + logicalType, + byteOrder, + bitmapSerdeFactory, + buffer, + builder, + columnConfig + ); + ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); + capabilitiesBuilder.setDictionaryEncoded(true); + capabilitiesBuilder.setDictionaryValuesSorted(true); + capabilitiesBuilder.setDictionaryValuesUnique(true); + builder.setType(logicalType); + builder.setNestedCommonFormatColumnSupplier(supplier); + builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); + }); + } if (logicalType.is(ValueType.STRING)) { return ((buffer, builder, columnConfig) -> { ScalarStringColumnAndIndexSupplier supplier = ScalarStringColumnAndIndexSupplier.read( @@ -118,7 +154,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde capabilitiesBuilder.setDictionaryValuesSorted(true); capabilitiesBuilder.setDictionaryValuesUnique(true); builder.setType(logicalType); - builder.setStandardTypeColumnSupplier(supplier); + builder.setNestedCommonFormatColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); builder.setFilterable(true); @@ -138,7 +174,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde capabilitiesBuilder.setDictionaryValuesSorted(true); capabilitiesBuilder.setDictionaryValuesUnique(true); builder.setType(logicalType); - builder.setStandardTypeColumnSupplier(supplier); + builder.setNestedCommonFormatColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); builder.setFilterable(true); @@ -158,32 +194,12 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde capabilitiesBuilder.setDictionaryValuesSorted(true); capabilitiesBuilder.setDictionaryValuesUnique(true); builder.setType(logicalType); - builder.setStandardTypeColumnSupplier(supplier); + builder.setNestedCommonFormatColumnSupplier(supplier); builder.setIndexSupplier(supplier, true, false); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); builder.setFilterable(true); }); } - if (logicalType.isArray()) { - return ((buffer, builder, columnConfig) -> { - VariantArrayColumnAndIndexSupplier supplier = VariantArrayColumnAndIndexSupplier.read( - logicalType, - byteOrder, - bitmapSerdeFactory, - buffer, - builder, - columnConfig - ); - ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); - capabilitiesBuilder.setDictionaryEncoded(true); - capabilitiesBuilder.setDictionaryValuesSorted(true); - capabilitiesBuilder.setDictionaryValuesUnique(true); - builder.setType(logicalType); - builder.setStandardTypeColumnSupplier(supplier); - builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); - }); - } return (buffer, builder, columnConfig) -> { NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( hasNulls, @@ -200,7 +216,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde ColumnType simpleType = supplier.getLogicalType(); ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType; builder.setType(logicalType); - builder.setStandardTypeColumnSupplier(supplier); + builder.setNestedCommonFormatColumnSupplier(supplier); builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, hasNulls)); builder.setFilterable(true); }; @@ -218,6 +234,12 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde return hasNulls; } + @JsonProperty("isVariantType") + public boolean isVariantType() + { + return isVariantType; + } + @JsonProperty public ByteOrder getByteOrder() { @@ -234,6 +256,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde { private ColumnType logicalType; private boolean hasNulls; + private boolean isVariantType; private ByteOrder byteOrder = ByteOrder.nativeOrder(); BitmapSerdeFactory bitmapSerdeFactory; @Nullable @@ -245,6 +268,12 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde return this; } + public SerializerBuilder isVariantType(boolean isVariant) + { + this.isVariantType = isVariant; + return this; + } + public SerializerBuilder withSerializer(final Serializer serializer) { this.serializer = serializer; @@ -271,7 +300,7 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde public NestedCommonFormatColumnPartSerde build() { - return new NestedCommonFormatColumnPartSerde(logicalType, hasNulls, byteOrder, bitmapSerdeFactory, serializer); + return new NestedCommonFormatColumnPartSerde(logicalType, hasNulls, isVariantType, byteOrder, bitmapSerdeFactory, serializer); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index 617e1befe77..c7ef30e883b 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -26,10 +26,11 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.primitives.Doubles; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Numbers; +import org.apache.druid.math.expr.Evals; +import org.apache.druid.math.expr.ExprEval; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.extraction.ExtractionFn; @@ -51,11 +52,13 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedColumn; +import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.column.ValueTypes; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn; +import org.apache.druid.segment.nested.NestedCommonFormatColumn; import org.apache.druid.segment.nested.NestedDataComplexColumn; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.nested.NestedFieldDictionaryEncodedColumn; @@ -63,8 +66,10 @@ import org.apache.druid.segment.nested.NestedPathArrayElement; import org.apache.druid.segment.nested.NestedPathFinder; import org.apache.druid.segment.nested.NestedPathPart; import org.apache.druid.segment.nested.StructuredData; -import org.apache.druid.segment.nested.VariantArrayColumn; +import org.apache.druid.segment.nested.VariantColumn; +import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier; import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector; +import org.apache.druid.segment.vector.BaseFloatVectorValueSelector; import org.apache.druid.segment.vector.BaseLongVectorValueSelector; import org.apache.druid.segment.vector.NilVectorSelector; import org.apache.druid.segment.vector.ReadableVectorInspector; @@ -78,6 +83,7 @@ import java.nio.ByteBuffer; import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.Set; /** * Optimized virtual column that can make direct selectors into a {@link NestedDataComplexColumn} or any associated @@ -294,8 +300,8 @@ public class NestedFieldVirtualColumn implements VirtualColumn ); } - if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantArrayColumn) { - final VariantArrayColumn arrayColumn = (VariantArrayColumn) theColumn; + if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantColumn) { + final VariantColumn arrayColumn = (VariantColumn) theColumn; ColumnValueSelector arraySelector = arrayColumn.makeColumnValueSelector(offset); final int elementNumber = ((NestedPathArrayElement) parts.get(0)).getIndex(); if (elementNumber < 0) { @@ -365,7 +371,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn if (parts.isEmpty()) { // dictionary encoded columns do not typically implement the value selector methods (getLong, getDouble, getFloat) // so we want to wrap their selector in a "best effort" casting selector to implement them - if (theColumn instanceof DictionaryEncodedColumn) { + if (theColumn instanceof DictionaryEncodedColumn && !(theColumn instanceof VariantColumn)) { final DictionaryEncodedColumn column = (DictionaryEncodedColumn) theColumn; return new BestEffortCastingValueSelector(column.makeDimensionSelector(offset, null)); } @@ -374,8 +380,8 @@ public class NestedFieldVirtualColumn implements VirtualColumn return theColumn.makeColumnValueSelector(offset); } - if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantArrayColumn) { - final VariantArrayColumn arrayColumn = (VariantArrayColumn) theColumn; + if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantColumn) { + final VariantColumn arrayColumn = (VariantColumn) theColumn; ColumnValueSelector arraySelector = arrayColumn.makeColumnValueSelector(offset); final int elementNumber = ((NestedPathArrayElement) parts.get(0)).getIndex(); if (elementNumber < 0) { @@ -532,8 +538,8 @@ public class NestedFieldVirtualColumn implements VirtualColumn return column.makeVectorObjectSelector(offset); } - if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && column instanceof VariantArrayColumn) { - final VariantArrayColumn arrayColumn = (VariantArrayColumn) column; + if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && column instanceof VariantColumn) { + final VariantColumn arrayColumn = (VariantColumn) column; VectorObjectSelector arraySelector = arrayColumn.makeVectorObjectSelector(offset); final int elementNumber = ((NestedPathArrayElement) parts.get(0)).getIndex(); if (elementNumber < 0) { @@ -601,131 +607,360 @@ public class NestedFieldVirtualColumn implements VirtualColumn if (!(theColumn instanceof NestedDataComplexColumn)) { if (parts.isEmpty()) { - ColumnCapabilities capabilities = holder.getCapabilities(); if (theColumn instanceof DictionaryEncodedColumn) { - return ExpressionVectorSelectors.castObjectSelectorToNumeric( - offset, - this.columnName, - theColumn.makeVectorObjectSelector(offset), - capabilities.toColumnType(), - expectedType - ); + final VectorObjectSelector delegate = theColumn.makeVectorObjectSelector(offset); + if (expectedType != null && expectedType.is(ValueType.LONG)) { + return new BaseLongVectorValueSelector(offset) + { + private int currentOffsetId = ReadableVectorInspector.NULL_ID; + private final long[] longs = new long[delegate.getMaxVectorSize()]; + @Nullable + private boolean[] nulls = null; + + @Override + public long[] getLongVector() + { + computeLongs(); + return longs; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeLongs(); + return nulls; + } + + private void computeLongs() + { + if (currentOffsetId != offset.getId()) { + currentOffsetId = offset.getId(); + final Object[] values = delegate.getObjectVector(); + for (int i = 0; i < values.length; i++) { + Number n = ExprEval.computeNumber(Evals.asString(values[i])); + if (n != null) { + longs[i] = n.longValue(); + if (nulls != null) { + nulls[i] = false; + } + } else { + if (nulls == null) { + nulls = new boolean[offset.getMaxVectorSize()]; + } + nulls[i] = true; + } + } + } + } + }; + } else if (expectedType != null && expectedType.is(ValueType.FLOAT)) { + return new BaseFloatVectorValueSelector(offset) + { + private int currentOffsetId = ReadableVectorInspector.NULL_ID; + private final float[] floats = new float[delegate.getMaxVectorSize()]; + @Nullable + private boolean[] nulls = null; + + @Override + public float[] getFloatVector() + { + computeFloats(); + return floats; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeFloats(); + return nulls; + } + + private void computeFloats() + { + if (currentOffsetId != offset.getId()) { + currentOffsetId = offset.getId(); + final Object[] values = delegate.getObjectVector(); + for (int i = 0; i < values.length; i++) { + Number n = ExprEval.computeNumber(Evals.asString(values[i])); + if (n != null) { + floats[i] = n.floatValue(); + if (nulls != null) { + nulls[i] = false; + } + } else { + if (nulls == null) { + nulls = new boolean[offset.getMaxVectorSize()]; + } + nulls[i] = true; + } + } + } + } + }; + } else { + return new BaseDoubleVectorValueSelector(offset) + { + private int currentOffsetId = ReadableVectorInspector.NULL_ID; + private final double[] doubles = new double[delegate.getMaxVectorSize()]; + @Nullable + private boolean[] nulls = null; + @Override + public double[] getDoubleVector() + { + computeDoubles(); + return doubles; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + computeDoubles(); + return nulls; + } + + private void computeDoubles() + { + if (currentOffsetId != offset.getId()) { + currentOffsetId = offset.getId(); + final Object[] values = delegate.getObjectVector(); + for (int i = 0; i < values.length; i++) { + Number n = ExprEval.computeNumber(Evals.asString(values[i])); + if (n != null) { + doubles[i] = n.doubleValue(); + if (nulls != null) { + nulls[i] = false; + } + } else { + if (nulls == null) { + nulls = new boolean[offset.getMaxVectorSize()]; + } + nulls[i] = true; + } + } + } + } + }; + } } return theColumn.makeVectorValueSelector(offset); } - if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantArrayColumn) { - final VariantArrayColumn arrayColumn = (VariantArrayColumn) theColumn; + if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantColumn) { + final VariantColumn arrayColumn = (VariantColumn) theColumn; VectorObjectSelector arraySelector = arrayColumn.makeVectorObjectSelector(offset); final int elementNumber = ((NestedPathArrayElement) parts.get(0)).getIndex(); if (elementNumber < 0) { throw new IAE("Cannot make array element selector, negative array index not supported"); } - return new VectorValueSelector() - { - private final long[] longs = new long[offset.getMaxVectorSize()]; - private final double[] doubles = new double[offset.getMaxVectorSize()]; - private final float[] floats = new float[offset.getMaxVectorSize()]; - private final boolean[] nulls = new boolean[offset.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - private void computeNumbers() + if (expectedType != null && expectedType.is(ValueType.LONG)) { + return new BaseLongVectorValueSelector(offset) { - if (offset.getId() != id) { - final Object[] maybeArrays = arraySelector.getObjectVector(); - for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { - Object maybeArray = maybeArrays[i]; - if (maybeArray instanceof Object[]) { - Object[] anArray = (Object[]) maybeArray; - if (elementNumber < anArray.length) { - if (anArray[elementNumber] instanceof Number) { - Number n = (Number) anArray[elementNumber]; - longs[i] = n.longValue(); - doubles[i] = n.doubleValue(); - floats[i] = n.floatValue(); - nulls[i] = false; - } else { - Double d = anArray[elementNumber] instanceof String - ? Doubles.tryParse((String) anArray[elementNumber]) - : null; - if (d != null) { - longs[i] = d.longValue(); - doubles[i] = d; - floats[i] = d.floatValue(); + private final long[] longs = new long[offset.getMaxVectorSize()]; + private final boolean[] nulls = new boolean[offset.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; + + private void computeNumbers() + { + if (offset.getId() != id) { + final Object[] maybeArrays = arraySelector.getObjectVector(); + for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { + Object maybeArray = maybeArrays[i]; + if (maybeArray instanceof Object[]) { + Object[] anArray = (Object[]) maybeArray; + if (elementNumber < anArray.length) { + if (anArray[elementNumber] instanceof Number) { + Number n = (Number) anArray[elementNumber]; + longs[i] = n.longValue(); nulls[i] = false; } else { - nullElement(i); + Double d = anArray[elementNumber] instanceof String + ? Doubles.tryParse((String) anArray[elementNumber]) + : null; + if (d != null) { + longs[i] = d.longValue(); + nulls[i] = false; + } else { + longs[i] = 0L; + nulls[i] = true; + } } + } else { + nullElement(i); } } else { + // not an array? nullElement(i); } - } else { - // not an array? - nullElement(i); } + id = offset.getId(); } - id = offset.getId(); } - } - private void nullElement(int i) - { - longs[i] = 0L; - doubles[i] = 0L; - floats[i] = 0L; - nulls[i] = true; - } - - @Override - public long[] getLongVector() - { - if (offset.getId() != id) { - computeNumbers(); + private void nullElement(int i) + { + longs[i] = 0L; + nulls[i] = true; } - return longs; - } - @Override - public float[] getFloatVector() - { - if (offset.getId() != id) { - computeNumbers(); + @Override + public long[] getLongVector() + { + if (offset.getId() != id) { + computeNumbers(); + } + return longs; } - return floats; - } - @Override - public double[] getDoubleVector() - { - if (offset.getId() != id) { - computeNumbers(); + @Nullable + @Override + public boolean[] getNullVector() + { + if (offset.getId() != id) { + computeNumbers(); + } + return nulls; } - return doubles; - } - - @Nullable - @Override - public boolean[] getNullVector() + }; + } else if (expectedType != null && expectedType.is(ValueType.FLOAT)) { + return new BaseFloatVectorValueSelector(offset) { - if (offset.getId() != id) { - computeNumbers(); + private final float[] floats = new float[offset.getMaxVectorSize()]; + private final boolean[] nulls = new boolean[offset.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; + + private void computeNumbers() + { + if (offset.getId() != id) { + final Object[] maybeArrays = arraySelector.getObjectVector(); + for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { + Object maybeArray = maybeArrays[i]; + if (maybeArray instanceof Object[]) { + Object[] anArray = (Object[]) maybeArray; + if (elementNumber < anArray.length) { + if (anArray[elementNumber] instanceof Number) { + Number n = (Number) anArray[elementNumber]; + floats[i] = n.floatValue(); + nulls[i] = false; + } else { + Double d = anArray[elementNumber] instanceof String + ? Doubles.tryParse((String) anArray[elementNumber]) + : null; + if (d != null) { + floats[i] = d.floatValue(); + nulls[i] = false; + } else { + nullElement(i); + } + } + } else { + nullElement(i); + } + } else { + // not an array? + nullElement(i); + } + } + id = offset.getId(); + } } - return nulls; - } - @Override - public int getMaxVectorSize() - { - return arraySelector.getMaxVectorSize(); - } + private void nullElement(int i) + { + floats[i] = 0f; + nulls[i] = true; + } - @Override - public int getCurrentVectorSize() + @Override + public float[] getFloatVector() + { + if (offset.getId() != id) { + computeNumbers(); + } + return floats; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + if (offset.getId() != id) { + computeNumbers(); + } + return nulls; + } + }; + } else { + return new BaseDoubleVectorValueSelector(offset) { - return arraySelector.getCurrentVectorSize(); - } - }; + private final double[] doubles = new double[offset.getMaxVectorSize()]; + private final boolean[] nulls = new boolean[offset.getMaxVectorSize()]; + private int id = ReadableVectorInspector.NULL_ID; + + private void computeNumbers() + { + if (offset.getId() != id) { + final Object[] maybeArrays = arraySelector.getObjectVector(); + for (int i = 0; i < arraySelector.getCurrentVectorSize(); i++) { + Object maybeArray = maybeArrays[i]; + if (maybeArray instanceof Object[]) { + Object[] anArray = (Object[]) maybeArray; + if (elementNumber < anArray.length) { + if (anArray[elementNumber] instanceof Number) { + Number n = (Number) anArray[elementNumber]; + doubles[i] = n.doubleValue(); + nulls[i] = false; + } else { + Double d = anArray[elementNumber] instanceof String + ? Doubles.tryParse((String) anArray[elementNumber]) + : null; + if (d != null) { + doubles[i] = d; + nulls[i] = false; + } else { + nullElement(i); + } + } + } else { + nullElement(i); + } + } else { + // not an array? + nullElement(i); + } + } + id = offset.getId(); + } + } + + private void nullElement(int i) + { + doubles[i] = 0.0; + nulls[i] = true; + } + + @Override + public double[] getDoubleVector() + { + if (offset.getId() != id) { + computeNumbers(); + } + return doubles; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + if (offset.getId() != id) { + computeNumbers(); + } + return nulls; + } + }; + } } return NilVectorSelector.create(offset); } @@ -776,7 +1011,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn nullVector = new boolean[objectSelector.getMaxVectorSize()]; } longVector[i] = 0L; - nullVector[i] = NullHandling.sqlCompatible(); + nullVector[i] = true; } else { Long l; if (v instanceof Number) { @@ -794,7 +1029,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn nullVector = new boolean[objectSelector.getMaxVectorSize()]; } longVector[i] = 0L; - nullVector[i] = NullHandling.sqlCompatible(); + nullVector[i] = true; } } } @@ -839,7 +1074,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn nullVector = new boolean[objectSelector.getMaxVectorSize()]; } doubleVector[i] = 0.0; - nullVector[i] = NullHandling.sqlCompatible(); + nullVector[i] = true; } else { Double d; if (v instanceof Number) { @@ -857,7 +1092,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn nullVector = new boolean[objectSelector.getMaxVectorSize()]; } doubleVector[i] = 0.0; - nullVector[i] = NullHandling.sqlCompatible(); + nullVector[i] = true; } } } @@ -879,13 +1114,41 @@ public class NestedFieldVirtualColumn implements VirtualColumn } BaseColumn theColumn = holder.getColumn(); if (theColumn instanceof CompressedNestedDataComplexColumn) { - return ((CompressedNestedDataComplexColumn) theColumn).getColumnIndexSupplier(parts); + final CompressedNestedDataComplexColumn nestedColumn = (CompressedNestedDataComplexColumn) theColumn; + final ColumnIndexSupplier nestedColumnPathIndexSupplier = nestedColumn.getColumnIndexSupplier(parts); + if (expectedType != null) { + final Set types = nestedColumn.getColumnTypes(parts); + // if the expected output type is numeric but not all of the input types are numeric, we might have additional + // null values than what the null value bitmap is tracking, wrap it + if (expectedType.isNumeric() && types.stream().anyMatch(t -> !t.isNumeric())) { + return NoIndexesColumnIndexSupplier.getInstance(); + } + } + return nestedColumnPathIndexSupplier; } if (parts.isEmpty()) { - return holder.getIndexSupplier(); + final ColumnIndexSupplier baseIndexSupplier = holder.getIndexSupplier(); + if (expectedType != null) { + if (theColumn instanceof NumericColumn) { + return baseIndexSupplier; + } + if (theColumn instanceof NestedCommonFormatColumn) { + final NestedCommonFormatColumn commonFormat = (NestedCommonFormatColumn) theColumn; + if (expectedType.isNumeric() && !commonFormat.getLogicalType().isNumeric()) { + return NoIndexesColumnIndexSupplier.getInstance(); + } + } else { + return expectedType.isNumeric() ? NoIndexesColumnIndexSupplier.getInstance() : baseIndexSupplier; + } + } + return baseIndexSupplier; } - if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && !hasNegativeArrayIndex && theColumn instanceof VariantArrayColumn) { - return holder.getIndexSupplier(); + if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantColumn) { + // cannot use the array column index supplier directly, in the future array columns should expose a function + // with a signature like 'getArrayElementIndexSupplier(int index)' to allow getting indexes for specific elements + // if we want to support this stuff. Right now VariantArrayColumn doesn't actually retain enough information about + // what positions the values are in to support doing anything cool here, so we just return 'no indexes' + return NoIndexesColumnIndexSupplier.getInstance(); } return null; } @@ -904,7 +1167,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn // from here return ColumnCapabilitiesImpl.createDefault() .setType(expectedType != null ? expectedType : ColumnType.STRING) - .setHasNulls(expectedType == null || !expectedType.isNumeric() || (expectedType.isNumeric() && NullHandling.sqlCompatible())); + .setHasNulls(true); } @Nullable @@ -934,8 +1197,7 @@ public class NestedFieldVirtualColumn implements VirtualColumn .setDictionaryValuesSorted(useDictionary) .setDictionaryValuesUnique(useDictionary) .setHasBitmapIndexes(useDictionary) - .setHasNulls(expectedType == null || (expectedType.isNumeric() - && NullHandling.sqlCompatible())); + .setHasNulls(true); } // column is not nested, use underlying column capabilities, adjusted for expectedType as necessary if (parts.isEmpty()) { @@ -1011,7 +1273,10 @@ public class NestedFieldVirtualColumn implements VirtualColumn */ public static class RawFieldLiteralColumnValueSelector extends RawFieldColumnSelector { - public RawFieldLiteralColumnValueSelector(ColumnValueSelector baseSelector, List parts) + public RawFieldLiteralColumnValueSelector( + ColumnValueSelector baseSelector, + List parts + ) { super(baseSelector, parts); } @@ -1337,7 +1602,8 @@ public class NestedFieldVirtualColumn implements VirtualColumn if (row.size() != 1) { return true; } - return Doubles.tryParse(lookupName(row.get(0))) == null; + final String s = lookupName(row.get(0)); + return s == null || Doubles.tryParse(s) == null; } @Override diff --git a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java index a2ddb78dc3a..61799c45af1 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/EvalTest.java @@ -27,6 +27,7 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.column.TypeStrategiesTest; +import org.apache.druid.segment.nested.StructuredData; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.BeforeClass; @@ -272,7 +273,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofStringArray(new String[]{"foo", "bar"}).castTo(ExpressionType.STRING) ); - Assert.assertEquals("invalid type cannot cast ARRAY to STRING", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [STRING]", t.getMessage()); } @Test @@ -282,7 +283,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofStringArray(new String[]{"foo", "bar"}).castTo(ExpressionType.LONG) ); - Assert.assertEquals("invalid type cannot cast ARRAY to LONG", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [LONG]", t.getMessage()); } @Test @@ -292,7 +293,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofStringArray(new String[]{"foo", "bar"}).castTo(ExpressionType.DOUBLE) ); - Assert.assertEquals("invalid type cannot cast ARRAY to DOUBLE", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [DOUBLE]", t.getMessage()); } @Test @@ -302,7 +303,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofLongArray(new Long[]{1L, 2L}).castTo(ExpressionType.STRING) ); - Assert.assertEquals("invalid type cannot cast ARRAY to STRING", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [STRING]", t.getMessage()); } @Test @@ -312,7 +313,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofLongArray(new Long[]{1L, 2L}).castTo(ExpressionType.LONG) ); - Assert.assertEquals("invalid type cannot cast ARRAY to LONG", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [LONG]", t.getMessage()); } @Test @@ -322,7 +323,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofLongArray(new Long[]{1L, 2L}).castTo(ExpressionType.DOUBLE) ); - Assert.assertEquals("invalid type cannot cast ARRAY to DOUBLE", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [DOUBLE]", t.getMessage()); } @Test @@ -332,7 +333,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofDoubleArray(new Double[]{1.1, 2.2}).castTo(ExpressionType.STRING) ); - Assert.assertEquals("invalid type cannot cast ARRAY to STRING", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [STRING]", t.getMessage()); } @Test @@ -342,7 +343,7 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofDoubleArray(new Double[]{1.1, 2.2}).castTo(ExpressionType.LONG) ); - Assert.assertEquals("invalid type cannot cast ARRAY to LONG", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [LONG]", t.getMessage()); } @Test @@ -352,7 +353,285 @@ public class EvalTest extends InitializedNullHandlingTest IAE.class, () -> ExprEval.ofDoubleArray(new Double[]{1.1, 2.2}).castTo(ExpressionType.DOUBLE) ); - Assert.assertEquals("invalid type cannot cast ARRAY to DOUBLE", t.getMessage()); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [DOUBLE]", t.getMessage()); + } + + @Test + public void testNestedDataCast() + { + ExprEval cast; + cast = ExprEval.ofComplex(ExpressionType.NESTED_DATA, "hello").castTo(ExpressionType.STRING); + Assert.assertEquals("hello", cast.value()); + Assert.assertEquals("hello", ExprEval.ofComplex(ExpressionType.NESTED_DATA, "hello").asString()); + Assert.assertEquals(ExpressionType.STRING, cast.type()); + + cast = ExprEval.of("hello").castTo(ExpressionType.NESTED_DATA); + Assert.assertEquals("hello", cast.value()); + Assert.assertEquals(ExpressionType.NESTED_DATA, cast.type()); + + cast = ExprEval.ofComplex(ExpressionType.NESTED_DATA, 123L).castTo(ExpressionType.STRING); + Assert.assertEquals("123", cast.value()); + Assert.assertEquals(ExpressionType.STRING, cast.type()); + + cast = ExprEval.ofComplex(ExpressionType.NESTED_DATA, 123L).castTo(ExpressionType.LONG); + Assert.assertEquals(123L, cast.value()); + Assert.assertEquals(123L, ExprEval.ofComplex(ExpressionType.NESTED_DATA, 123L).asLong()); + Assert.assertEquals(ExpressionType.LONG, cast.type()); + + cast = ExprEval.of(123L).castTo(ExpressionType.NESTED_DATA); + Assert.assertEquals(123L, cast.value()); + Assert.assertEquals(ExpressionType.NESTED_DATA, cast.type()); + + cast = ExprEval.ofComplex(ExpressionType.NESTED_DATA, 123L).castTo(ExpressionType.DOUBLE); + Assert.assertEquals(123.0, cast.value()); + Assert.assertEquals(123.0, ExprEval.ofComplex(ExpressionType.NESTED_DATA, 123L).asDouble(), 0.0); + Assert.assertEquals(ExpressionType.DOUBLE, cast.type()); + + cast = ExprEval.of(12.3).castTo(ExpressionType.NESTED_DATA); + Assert.assertEquals(12.3, cast.value()); + Assert.assertEquals(ExpressionType.NESTED_DATA, cast.type()); + + cast = ExprEval.ofComplex(ExpressionType.NESTED_DATA, ImmutableList.of("a", "b", "c")).castTo(ExpressionType.STRING_ARRAY); + Assert.assertArrayEquals(new Object[]{"a", "b", "c"}, (Object[]) cast.value()); + Assert.assertArrayEquals( + new Object[]{"a", "b", "c"}, + ExprEval.ofComplex(ExpressionType.NESTED_DATA, ImmutableList.of("a", "b", "c")).asArray() + ); + Assert.assertEquals(ExpressionType.STRING_ARRAY, cast.type()); + + cast = ExprEval.ofArray(ExpressionType.STRING_ARRAY, new Object[]{"a", "b", "c"}).castTo(ExpressionType.NESTED_DATA); + Assert.assertArrayEquals(new Object[]{"a", "b", "c"}, (Object[]) cast.value()); + Assert.assertEquals(ExpressionType.NESTED_DATA, cast.type()); + + cast = ExprEval.ofComplex(ExpressionType.NESTED_DATA, ImmutableList.of(1L, 2L, 3L)).castTo(ExpressionType.LONG_ARRAY); + Assert.assertArrayEquals(new Object[]{1L, 2L, 3L}, (Object[]) cast.value()); + Assert.assertArrayEquals( + new Object[]{1L, 2L, 3L}, + ExprEval.ofComplex(ExpressionType.NESTED_DATA, ImmutableList.of(1L, 2L, 3L)).asArray() + ); + Assert.assertEquals(ExpressionType.LONG_ARRAY, cast.type()); + + cast = ExprEval.ofArray(ExpressionType.LONG_ARRAY, new Object[]{1L, 2L, 3L}).castTo(ExpressionType.NESTED_DATA); + Assert.assertArrayEquals(new Object[]{1L, 2L, 3L}, (Object[]) cast.value()); + Assert.assertEquals(ExpressionType.NESTED_DATA, cast.type()); + + cast = ExprEval.ofComplex(ExpressionType.NESTED_DATA, ImmutableList.of(1L, 2L, 3L)).castTo(ExpressionType.DOUBLE_ARRAY); + Assert.assertArrayEquals(new Object[]{1.0, 2.0, 3.0}, (Object[]) cast.value()); + Assert.assertEquals(ExpressionType.DOUBLE_ARRAY, cast.type()); + + cast = ExprEval.ofArray(ExpressionType.DOUBLE_ARRAY, new Object[]{1.1, 2.2, 3.3}).castTo(ExpressionType.NESTED_DATA); + Assert.assertArrayEquals(new Object[]{1.1, 2.2, 3.3}, (Object[]) cast.value()); + Assert.assertEquals(ExpressionType.NESTED_DATA, cast.type()); + + ExpressionType nestedArray = ExpressionTypeFactory.getInstance().ofArray(ExpressionType.NESTED_DATA); + cast = ExprEval.ofComplex( + ExpressionType.NESTED_DATA, + ImmutableList.of(ImmutableMap.of("x", 1, "y", 2), ImmutableMap.of("x", 3, "y", 4)) + ).castTo(nestedArray); + Assert.assertArrayEquals( + new Object[]{ + ImmutableMap.of("x", 1, "y", 2), + ImmutableMap.of("x", 3, "y", 4) + }, + (Object[]) cast.value() + ); + Assert.assertEquals(nestedArray, cast.type()); + Assert.assertArrayEquals( + new Object[]{ + ImmutableMap.of("x", 1, "y", 2), + ImmutableMap.of("x", 3, "y", 4) + }, + ExprEval.ofComplex( + ExpressionType.NESTED_DATA, + ImmutableList.of( + ImmutableMap.of("x", 1, "y", 2), + ImmutableMap.of("x", 3, "y", 4) + ) + ).asArray() + ); + + Assert.assertThrows(IAE.class, () -> ExprEval.ofLong(1234L).castTo(nestedArray)); + Assert.assertThrows(IAE.class, () -> ExprEval.of("hello").castTo(nestedArray)); + Assert.assertThrows(IAE.class, () -> ExprEval.ofDouble(1.234).castTo(nestedArray)); + Assert.assertThrows(IAE.class, () -> ExprEval.ofComplex(ExpressionType.NESTED_DATA, 1234L).castTo(nestedArray)); + } + + @Test + public void testNestedAsOtherStuff() + { + ExprEval eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, StructuredData.wrap(true)); + Assert.assertTrue(eval.asBoolean()); + Assert.assertFalse(eval.isNumericNull()); + Assert.assertEquals(1, eval.asInt()); + Assert.assertEquals(1L, eval.asLong()); + Assert.assertEquals(1.0, eval.asDouble(), 0.0); + + Assert.assertTrue(ExprEval.ofComplex(ExpressionType.NESTED_DATA, true).asBoolean()); + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, false); + Assert.assertFalse(eval.asBoolean()); + Assert.assertFalse(eval.isNumericNull()); + Assert.assertEquals(0, eval.asInt()); + Assert.assertEquals(0L, eval.asLong()); + Assert.assertEquals(0.0, eval.asDouble(), 0.0); + + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, "true"); + Assert.assertTrue(eval.asBoolean()); + Assert.assertFalse(eval.isNumericNull()); + Assert.assertEquals(1L, eval.asLong()); + Assert.assertEquals(1, eval.asInt()); + Assert.assertEquals(1.0, eval.asDouble(), 0.0); + + Assert.assertTrue(ExprEval.ofComplex(ExpressionType.NESTED_DATA, StructuredData.wrap("true")).asBoolean()); + Assert.assertTrue(ExprEval.ofComplex(ExpressionType.NESTED_DATA, "TRUE").asBoolean()); + Assert.assertTrue(ExprEval.ofComplex(ExpressionType.NESTED_DATA, "True").asBoolean()); + + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, StructuredData.wrap(1L)); + Assert.assertTrue(eval.asBoolean()); + Assert.assertFalse(eval.isNumericNull()); + Assert.assertEquals(1L, eval.asLong()); + Assert.assertEquals(1, eval.asInt()); + Assert.assertEquals(1.0, eval.asDouble(), 0.0); + + Assert.assertTrue(ExprEval.ofComplex(ExpressionType.NESTED_DATA, 1L).asBoolean()); + + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, StructuredData.wrap(1.23)); + Assert.assertTrue(eval.asBoolean()); + Assert.assertFalse(eval.isNumericNull()); + Assert.assertEquals(1L, eval.asLong()); + Assert.assertEquals(1, eval.asInt()); + Assert.assertEquals(1.23, eval.asDouble(), 0.0); + + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, "hello"); + Assert.assertFalse(eval.asBoolean()); + Assert.assertTrue(eval.isNumericNull()); + Assert.assertEquals(0, eval.asInt()); + Assert.assertEquals(0L, eval.asLong()); + Assert.assertEquals(0.0, eval.asDouble(), 0.0); + + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, Arrays.asList("1", "2", "3")); + Assert.assertFalse(eval.asBoolean()); + Assert.assertTrue(eval.isNumericNull()); + Assert.assertEquals(0, eval.asInt()); + Assert.assertEquals(0L, eval.asLong()); + Assert.assertEquals(0.0, eval.asDouble(), 0.0); + Assert.assertArrayEquals(new Object[]{"1", "2", "3"}, eval.asArray()); + + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, Arrays.asList(1L, 2L, 3L)); + Assert.assertFalse(eval.asBoolean()); + Assert.assertTrue(eval.isNumericNull()); + Assert.assertEquals(0, eval.asInt()); + Assert.assertEquals(0L, eval.asLong()); + Assert.assertEquals(0.0, eval.asDouble(), 0.0); + Assert.assertArrayEquals(new Object[]{1L, 2L, 3L}, eval.asArray()); + + eval = ExprEval.ofComplex(ExpressionType.NESTED_DATA, Arrays.asList(1.1, 2.2, 3.3)); + Assert.assertFalse(eval.asBoolean()); + Assert.assertTrue(eval.isNumericNull()); + Assert.assertEquals(0, eval.asInt()); + Assert.assertEquals(0L, eval.asLong()); + Assert.assertEquals(0.0, eval.asDouble(), 0.0); + Assert.assertArrayEquals(new Object[]{1.1, 2.2, 3.3}, eval.asArray()); + + eval = ExprEval.ofComplex( + ExpressionType.NESTED_DATA, + ImmutableList.of( + ImmutableMap.of("x", 1, "y", 2), + ImmutableMap.of("x", 3, "y", 4) + ) + ); + Assert.assertFalse(eval.asBoolean()); + Assert.assertEquals(0, eval.asLong()); + Assert.assertEquals(0, eval.asInt()); + Assert.assertEquals(0.0, eval.asDouble(), 0.0); + } + + @Test + public void testNonNestedComplexCastThrows() + { + ExpressionType someComplex = ExpressionTypeFactory.getInstance().ofComplex("tester"); + Throwable t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofType(someComplex, "hello").castTo(ExpressionType.STRING) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [STRING]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofType(someComplex, "hello").castTo(ExpressionType.LONG) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [LONG]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofType(someComplex, "hello").castTo(ExpressionType.DOUBLE) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [DOUBLE]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofType(someComplex, "hello").castTo(ExpressionType.STRING_ARRAY) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [ARRAY]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofType(someComplex, "hello").castTo(ExpressionType.LONG_ARRAY) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [ARRAY]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofType(someComplex, "hello").castTo(ExpressionType.DOUBLE_ARRAY) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [ARRAY]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofType(someComplex, "hello").castTo(ExpressionType.NESTED_DATA) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [COMPLEX]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.of("hello").castTo(someComplex) + ); + Assert.assertEquals("Invalid type, cannot cast [STRING] to [COMPLEX]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.of(123L).castTo(someComplex) + ); + Assert.assertEquals("Invalid type, cannot cast [LONG] to [COMPLEX]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.of(1.23).castTo(someComplex) + ); + Assert.assertEquals("Invalid type, cannot cast [DOUBLE] to [COMPLEX]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofStringArray(new Object[]{"a", "b", "c"}).castTo(someComplex) + ); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [COMPLEX]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofLongArray(new Object[]{1L, 2L, 3L}).castTo(someComplex) + ); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [COMPLEX]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofDoubleArray(new Object[]{1.1, 2.2, 3.3}).castTo(someComplex) + ); + Assert.assertEquals("Invalid type, cannot cast [ARRAY] to [COMPLEX]", t.getMessage()); + + t = Assert.assertThrows( + IllegalArgumentException.class, + () -> ExprEval.ofComplex(ExpressionType.NESTED_DATA, ImmutableMap.of("x", 1L)).castTo(someComplex) + ); + Assert.assertEquals("Invalid type, cannot cast [COMPLEX] to [COMPLEX]", t.getMessage()); } @Test @@ -813,16 +1092,11 @@ public class EvalTest extends InitializedNullHandlingTest Assert.assertEquals(type, eval.type()); Assert.assertEquals(pair, eval.value()); - // json type isn't defined in druid-core - ExpressionType json = ExpressionType.fromString("COMPLEX"); - eval = ExprEval.ofType(json, ImmutableMap.of("x", 1L, "y", 2L)); - Assert.assertEquals(json, eval.type()); + // json type best efforts its way to other types + eval = ExprEval.ofType(ExpressionType.NESTED_DATA, ImmutableMap.of("x", 1L, "y", 2L)); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); Assert.assertEquals(ImmutableMap.of("x", 1L, "y", 2L), eval.value()); - eval = ExprEval.ofType(json, "hello"); - Assert.assertEquals(json, eval.type()); - Assert.assertEquals("hello", eval.value()); - ExpressionType stringyComplexThing = ExpressionType.fromString("COMPLEX"); eval = ExprEval.ofType(stringyComplexThing, "notbase64"); Assert.assertEquals(stringyComplexThing, eval.type()); diff --git a/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java b/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java index 11fdc85e563..d6304e94c39 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/FunctionTest.java @@ -19,22 +19,23 @@ package org.apache.druid.math.expr; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.column.TypeStrategiesTest; import org.apache.druid.segment.column.TypeStrategy; +import org.apache.druid.segment.nested.StructuredData; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; import javax.annotation.Nullable; import java.math.BigDecimal; @@ -44,10 +45,10 @@ import java.util.Set; public class FunctionTest extends InitializedNullHandlingTest { - @Rule - public ExpectedException expectedException = ExpectedException.none(); + private Expr.ObjectBinding bestEffortBindings; + private Expr.ObjectBinding typedBindings; + private Expr.ObjectBinding[] allBindings; - private Expr.ObjectBinding bindings; @BeforeClass public static void setupClass() @@ -56,33 +57,66 @@ public class FunctionTest extends InitializedNullHandlingTest TypeStrategiesTest.NULLABLE_TEST_PAIR_TYPE.getComplexTypeName(), new TypeStrategiesTest.NullableLongPairTypeStrategy() ); + NestedDataModule.registerHandlersAndSerde(); } @Before public void setup() { - ImmutableMap.Builder builder = - ImmutableMap.builder() - .put("x", "foo") - .put("y", 2) - .put("z", 3.1) - .put("d", 34.56D) - .put("maxLong", Long.MAX_VALUE) - .put("minLong", Long.MIN_VALUE) - .put("f", 12.34F) - .put("nan", Double.NaN) - .put("inf", Double.POSITIVE_INFINITY) - .put("-inf", Double.NEGATIVE_INFINITY) - .put("o", 0) - .put("od", 0D) - .put("of", 0F) - .put("a", new String[] {"foo", "bar", "baz", "foobar"}) - .put("b", new Long[] {1L, 2L, 3L, 4L, 5L}) - .put("c", new Double[] {3.1, 4.2, 5.3}) - .put("someComplex", new TypeStrategiesTest.NullableLongPair(1L, 2L)) - .put("str1", "v1") - .put("str2", "v2"); - bindings = InputBindings.forMap(builder.build()); + ImmutableMap.Builder inputTypesBuilder = ImmutableMap.builder(); + inputTypesBuilder.put("x", ExpressionType.STRING) + .put("y", ExpressionType.LONG) + .put("z", ExpressionType.DOUBLE) + .put("d", ExpressionType.DOUBLE) + .put("maxLong", ExpressionType.LONG) + .put("minLong", ExpressionType.LONG) + .put("f", ExpressionType.DOUBLE) + .put("nan", ExpressionType.DOUBLE) + .put("inf", ExpressionType.DOUBLE) + .put("-inf", ExpressionType.DOUBLE) + .put("o", ExpressionType.LONG) + .put("od", ExpressionType.DOUBLE) + .put("of", ExpressionType.DOUBLE) + .put("a", ExpressionType.STRING_ARRAY) + .put("b", ExpressionType.LONG_ARRAY) + .put("c", ExpressionType.DOUBLE_ARRAY) + .put("someComplex", ExpressionType.fromColumnType(TypeStrategiesTest.NULLABLE_TEST_PAIR_TYPE)) + .put("str1", ExpressionType.STRING) + .put("str2", ExpressionType.STRING) + .put("nestedArray", ExpressionType.NESTED_DATA); + + final StructuredData nestedArray = StructuredData.wrap( + ImmutableList.of( + ImmutableMap.of("x", 2L, "y", 3.3), + ImmutableMap.of("x", 4L, "y", 6.6) + ) + ); + ImmutableMap.Builder builder = ImmutableMap.builder(); + builder.put("x", "foo") + .put("y", 2) + .put("z", 3.1) + .put("d", 34.56D) + .put("maxLong", Long.MAX_VALUE) + .put("minLong", Long.MIN_VALUE) + .put("f", 12.34F) + .put("nan", Double.NaN) + .put("inf", Double.POSITIVE_INFINITY) + .put("-inf", Double.NEGATIVE_INFINITY) + .put("o", 0) + .put("od", 0D) + .put("of", 0F) + .put("a", new String[]{"foo", "bar", "baz", "foobar"}) + .put("b", new Long[]{1L, 2L, 3L, 4L, 5L}) + .put("c", new Double[]{3.1, 4.2, 5.3}) + .put("someComplex", new TypeStrategiesTest.NullableLongPair(1L, 2L)) + .put("str1", "v1") + .put("str2", "v2") + .put("nestedArray", nestedArray); + bestEffortBindings = InputBindings.forMap(builder.build()); + typedBindings = InputBindings.forMap( + builder.build(), InputBindings.inspectorFromTypeMap(inputTypesBuilder.build()) + ); + allBindings = new Expr.ObjectBinding[]{bestEffortBindings, typedBindings}; } @Test @@ -243,6 +277,8 @@ public class FunctionTest extends InitializedNullHandlingTest { assertExpr("array_length([1,2,3])", 3L); assertExpr("array_length(a)", 4L); + // nested types only work with typed bindings right now, and pretty limited support for stuff + assertExpr("array_length(nestedArray)", 2L, typedBindings); } @Test @@ -251,6 +287,8 @@ public class FunctionTest extends InitializedNullHandlingTest assertExpr("array_offset([1, 2, 3], 2)", 3L); assertArrayExpr("array_offset([1, 2, 3], 3)", null); assertExpr("array_offset(a, 2)", "baz"); + // nested types only work with typed bindings right now, and pretty limited support for stuff + assertExpr("array_offset(nestedArray, 1)", ImmutableMap.of("x", 4L, "y", 6.6), typedBindings); } @Test @@ -259,6 +297,8 @@ public class FunctionTest extends InitializedNullHandlingTest assertExpr("array_ordinal([1, 2, 3], 3)", 3L); assertArrayExpr("array_ordinal([1, 2, 3], 4)", null); assertExpr("array_ordinal(a, 3)", "baz"); + // nested types only work with typed bindings right now, and pretty limited support for stuff + assertExpr("array_ordinal(nestedArray, 2)", ImmutableMap.of("x", 4L, "y", 6.6), typedBindings); } @Test @@ -671,7 +711,7 @@ public class FunctionTest extends InitializedNullHandlingTest try { //x = "foo" Parser.parse("human_readable_binary_byte_format(x)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); // for sqlCompatible, function above returns null and goes here // but for non-sqlCompatible, it must not go to here @@ -687,7 +727,7 @@ public class FunctionTest extends InitializedNullHandlingTest try { //x = "foo" Parser.parse("human_readable_binary_byte_format(1024, x)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); //must not go to here Assert.assertTrue(false); @@ -702,7 +742,7 @@ public class FunctionTest extends InitializedNullHandlingTest try { //of = 0F Parser.parse("human_readable_binary_byte_format(1024, of)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); //must not go to here Assert.assertTrue(false); @@ -717,7 +757,7 @@ public class FunctionTest extends InitializedNullHandlingTest try { //of = 0F Parser.parse("human_readable_binary_byte_format(1024, nonexist)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); //must not go to here Assert.assertTrue(false); @@ -735,7 +775,7 @@ public class FunctionTest extends InitializedNullHandlingTest { try { Parser.parse("human_readable_binary_byte_format(1024, maxLong)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); Assert.assertTrue(false); } catch (ExpressionValidationException e) { @@ -747,7 +787,7 @@ public class FunctionTest extends InitializedNullHandlingTest try { Parser.parse("human_readable_binary_byte_format(1024, minLong)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); Assert.assertTrue(false); } catch (ExpressionValidationException e) { @@ -759,7 +799,7 @@ public class FunctionTest extends InitializedNullHandlingTest try { Parser.parse("human_readable_binary_byte_format(1024, -1)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); Assert.assertTrue(false); } catch (ExpressionValidationException e) { @@ -771,7 +811,7 @@ public class FunctionTest extends InitializedNullHandlingTest try { Parser.parse("human_readable_binary_byte_format(1024, 4)", ExprMacroTable.nil()) - .eval(bindings); + .eval(bestEffortBindings); Assert.assertTrue(false); } catch (ExpressionValidationException e) { @@ -785,10 +825,15 @@ public class FunctionTest extends InitializedNullHandlingTest @Test public void testSizeFormatInvalidArgumentSize() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage("Function[human_readable_binary_byte_format] requires 1 or 2 arguments"); - Parser.parse("human_readable_binary_byte_format(1024, 2, 3)", ExprMacroTable.nil()) - .eval(bindings); + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> Parser.parse( + "human_readable_binary_byte_format(1024, 2, 3)", + ExprMacroTable.nil() + ).eval(bestEffortBindings) + ); + + Assert.assertEquals("Function[human_readable_binary_byte_format] requires 1 or 2 arguments", t.getMessage()); } @Test @@ -915,50 +960,49 @@ public class FunctionTest extends InitializedNullHandlingTest @Test public void testComplexDecodeBaseWrongArgCount() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage("Function[complex_decode_base64] requires 2 arguments"); - assertExpr( - "complex_decode_base64(string)", - null + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertExpr("complex_decode_base64(string)", null) ); + Assert.assertEquals("Function[complex_decode_base64] requires 2 arguments", t.getMessage()); } @Test public void testComplexDecodeBaseArg0Null() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage( - "Function[complex_decode_base64] first argument must be constant STRING expression containing a valid complex type name but got NULL instead" + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertExpr("complex_decode_base64(null, string)", null) ); - assertExpr( - "complex_decode_base64(null, string)", - null + Assert.assertEquals( + "Function[complex_decode_base64] first argument must be constant STRING expression containing a valid complex type name but got NULL instead", + t.getMessage() ); } @Test public void testComplexDecodeBaseArg0BadType() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage( - "Function[complex_decode_base64] first argument must be constant STRING expression containing a valid complex type name but got '1' instead" + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertExpr("complex_decode_base64(1, string)", null) ); - assertExpr( - "complex_decode_base64(1, string)", - null + Assert.assertEquals( + "Function[complex_decode_base64] first argument must be constant STRING expression containing a valid complex type name but got '1' instead", + t.getMessage() ); } @Test public void testComplexDecodeBaseArg0Unknown() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage( - "Function[complex_decode_base64] first argument must be a valid COMPLEX type name, got unknown COMPLEX type [COMPLEX]" + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertExpr("complex_decode_base64('unknown', string)", null) ); - assertExpr( - "complex_decode_base64('unknown', string)", - null + Assert.assertEquals( + "Function[complex_decode_base64] first argument must be a valid COMPLEX type name, got unknown COMPLEX type [COMPLEX]", + t.getMessage() ); } @@ -972,33 +1016,53 @@ public class FunctionTest extends InitializedNullHandlingTest @Test public void testMVToArrayWithConstantLiteral() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage("Function[mv_to_array] argument 1 should be an identifier expression. Use array() instead"); - assertArrayExpr("mv_to_array('1')", null); + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertArrayExpr("mv_to_array('1')", null) + ); + Assert.assertEquals( + "Function[mv_to_array] argument 1 should be an identifier expression. Use array() instead", + t.getMessage() + ); } @Test public void testMVToArrayWithFunction() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage("Function[mv_to_array] argument (repeat [hello, 2]) should be an identifier expression. Use array() instead"); - assertArrayExpr("mv_to_array(repeat('hello', 2))", null); + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertArrayExpr("mv_to_array(repeat('hello', 2))", null) + ); + Assert.assertEquals( + "Function[mv_to_array] argument (repeat [hello, 2]) should be an identifier expression. Use array() instead", + t.getMessage() + ); } @Test public void testMVToArrayWithMoreArgs() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage("Function[mv_to_array] requires 1 argument"); - assertArrayExpr("mv_to_array(x,y)", null); + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertArrayExpr("mv_to_array(x,y)", null) + ); + Assert.assertEquals( + "Function[mv_to_array] requires 1 argument", + t.getMessage() + ); } @Test public void testMVToArrayWithNoArgs() { - expectedException.expect(ExpressionValidationException.class); - expectedException.expectMessage("Function[mv_to_array] requires 1 argument"); - assertArrayExpr("mv_to_array()", null); + Throwable t = Assert.assertThrows( + ExpressionValidationException.class, + () -> assertArrayExpr("mv_to_array()", null) + ); + Assert.assertEquals( + "Function[mv_to_array] requires 1 argument", + t.getMessage() + ); } @Test @@ -1010,28 +1074,54 @@ public class FunctionTest extends InitializedNullHandlingTest @Test public void testMultiplyOnString() { - expectedException.expect(IAE.class); - expectedException.expectMessage("operator '*' in expression (\"str1\" * \"str2\") is not supported on type STRING."); - assertExpr("str1 * str2", null); + Throwable t = Assert.assertThrows( + IAE.class, + () -> assertExpr("str1 * str2", null) + ); + Assert.assertEquals( + "operator '*' in expression (\"str1\" * \"str2\") is not supported on type STRING.", + t.getMessage() + ); } @Test public void testMinusOnString() { - expectedException.expect(IAE.class); - expectedException.expectMessage("operator '-' in expression (\"str1\" - \"str2\") is not supported on type STRING."); - assertExpr("str1 - str2", null); + Throwable t = Assert.assertThrows( + IAE.class, + () -> assertExpr("str1 - str2", null) + ); + Assert.assertEquals( + "operator '-' in expression (\"str1\" - \"str2\") is not supported on type STRING.", + t.getMessage() + ); } @Test public void testDivOnString() { - expectedException.expect(IAE.class); - expectedException.expectMessage("operator '/' in expression (\"str1\" / \"str2\") is not supported on type STRING."); - assertExpr("str1 / str2", null); + Throwable t = Assert.assertThrows( + IAE.class, + () -> assertExpr("str1 / str2", null) + ); + Assert.assertEquals( + "operator '/' in expression (\"str1\" / \"str2\") is not supported on type STRING.", + t.getMessage() + ); } private void assertExpr(final String expression, @Nullable final Object expectedResult) + { + for (Expr.ObjectBinding toUse : allBindings) { + assertExpr(expression, expectedResult, toUse); + } + } + + private void assertExpr( + final String expression, + @Nullable final Object expectedResult, + Expr.ObjectBinding bindings + ) { final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); Assert.assertEquals(expression, expectedResult, expr.eval(bindings).value()); @@ -1050,6 +1140,18 @@ public class FunctionTest extends InitializedNullHandlingTest } private void assertArrayExpr(final String expression, @Nullable final Object[] expectedResult) + { + + for (Expr.ObjectBinding toUse : allBindings) { + assertArrayExpr(expression, expectedResult, toUse); + } + } + + private void assertArrayExpr( + final String expression, + @Nullable final Object[] expectedResult, + Expr.ObjectBinding bindings + ) { final Expr expr = Parser.parse(expression, ExprMacroTable.nil()); Assert.assertArrayEquals(expression, expectedResult, expr.eval(bindings).asArray()); diff --git a/processing/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java b/processing/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java index a8724db8a01..ad4dccd41a7 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/VectorExprSanityTest.java @@ -260,6 +260,18 @@ public class VectorExprSanityTest extends InitializedNullHandlingTest testExpression("concat(s1,'-',s2,'-',l1,'-',d1)", types); } + @Test + public void testConstants() + { + testExpression("null", types); + testExpression("1", types); + testExpression("1.1", types); + testExpression("NaN", types); + testExpression("Infinity", types); + testExpression("-Infinity", types); + testExpression("'hello'", types); + } + static void testFunctions(Map types, String[] templates, String[] args) { for (String template : templates) { diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java index 236159e2571..564ff92198e 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java @@ -54,11 +54,13 @@ import org.apache.druid.query.QueryRunnerFactory; import org.apache.druid.query.QueryRunnerTestHelper; import org.apache.druid.query.QueryToolChest; import org.apache.druid.query.context.ResponseContext; +import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerFactory; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; import org.apache.druid.query.groupby.TestGroupByBuffers; +import org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2; import org.apache.druid.query.scan.ScanQueryConfig; import org.apache.druid.query.scan.ScanQueryEngine; import org.apache.druid.query.scan.ScanQueryQueryToolChest; @@ -100,6 +102,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * This class provides general utility to test any druid aggregation implementation given raw data, @@ -785,13 +788,30 @@ public class AggregationTestHelper implements Closeable ); String resultStr = mapper.writer().writeValueAsString(yielder); - List resultRows = Lists.transform( + List resultRows = Lists.transform( readQueryResultArrayFromString(resultStr), toolChest.makePreComputeManipulatorFn( queryPlus.getQuery(), MetricManipulatorFns.deserializing() ) ); + + // coerce stuff so merge happens right after serde + if (queryPlus.getQuery() instanceof GroupByQuery) { + List comparable = + resultRows.stream() + .peek(row -> { + GroupByQuery query = (GroupByQuery) queryPlus.getQuery(); + GroupByQueryEngineV2.convertRowTypesToOutputTypes( + query.getDimensions(), + row, + query.getResultRowDimensionStart() + ); + }) + .collect(Collectors.toList()); + + return Sequences.simple(comparable); + } return Sequences.simple(resultRows); } catch (Exception ex) { diff --git a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java index 235579b19fb..afb6c9298c8 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java @@ -66,9 +66,9 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Expr.ObjectBinding inputBindings = InputBindings.forInputSuppliers( new ImmutableMap.Builder() - .put("nest", InputBindings.inputSupplier(NestedDataExpressions.TYPE, () -> NEST)) - .put("nestWrapped", InputBindings.inputSupplier(NestedDataExpressions.TYPE, () -> new StructuredData(NEST))) - .put("nester", InputBindings.inputSupplier(NestedDataExpressions.TYPE, () -> NESTER)) + .put("nest", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> NEST)) + .put("nestWrapped", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> new StructuredData(NEST))) + .put("nester", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> NESTER)) .put("string", InputBindings.inputSupplier(ExpressionType.STRING, () -> "abcdef")) .put("long", InputBindings.inputSupplier(ExpressionType.LONG, () -> 1234L)) .put("double", InputBindings.inputSupplier(ExpressionType.DOUBLE, () -> 1.234)) @@ -209,17 +209,17 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Expr expr = Parser.parse("json_query(nest, '$.x')", MACRO_TABLE); ExprEval eval = expr.eval(inputBindings); Assert.assertEquals(100L, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("json_query(nester, '$.x')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(NESTER.get("x"), eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("json_query(nester, '$.x[1]')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals("b", eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("json_query(nester, '$.x[23]')", MACRO_TABLE); eval = expr.eval(inputBindings); @@ -236,7 +236,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest expr = Parser.parse("json_query(nester, '$.y.a')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals("hello", eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("json_query(nester, '$.y.a.b.c[12]')", MACRO_TABLE); eval = expr.eval(inputBindings); @@ -245,7 +245,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest expr = Parser.parse("json_query(long, '$')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(1234L, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); } @Test @@ -254,44 +254,44 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Expr expr = Parser.parse("parse_json(null)", MACRO_TABLE); ExprEval eval = expr.eval(inputBindings); Assert.assertEquals(null, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("parse_json('null')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(null, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); Assert.assertThrows(ExpressionProcessingException.class, () -> Parser.parse("parse_json('{')", MACRO_TABLE)); expr = Parser.parse("try_parse_json('{')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(null, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); Assert.assertThrows(ExpressionProcessingException.class, () -> Parser.parse("parse_json('hello world')", MACRO_TABLE)); expr = Parser.parse("try_parse_json('hello world')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(null, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("parse_json('\"hello world\"')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals("hello world", eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("parse_json('1')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(1, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("parse_json('true')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(true, eval.value()); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("parse_json('{\"foo\":1}')", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals("{\"foo\":1}", JSON_MAPPER.writeValueAsString(eval.value())); - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); } @Test @@ -309,7 +309,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Map val = (Map) eval.value(); Assert.assertEquals(NEST.get(key), ((Integer) val.get(key)).longValue()); } - Assert.assertEquals(NestedDataExpressions.TYPE, eval.type()); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); expr = Parser.parse("json_value(parse_json('{\"x\":100,\"y\":200,\"z\":300}'), '$.x')", MACRO_TABLE); eval = expr.eval(inputBindings); diff --git a/processing/src/test/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactoryTest.java index c9ee77b0e97..dce49b129f0 100644 --- a/processing/src/test/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/filter/vector/VectorValueMatcherColumnProcessorFactoryTest.java @@ -19,7 +19,6 @@ package org.apache.druid.query.filter.vector; -import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; @@ -63,13 +62,8 @@ public class VectorValueMatcherColumnProcessorFactoryTest extends InitializedNul Assert.assertEquals(VECTOR_SIZE, matcher.getMaxVectorSize()); Assert.assertEquals(CURRENT_SIZE, matcher.getCurrentVectorSize()); - // in default mode, matching null produces a boolean matcher VectorValueMatcher booleanMatcher = matcherFactory.makeMatcher((String) null); - if (NullHandling.replaceWithDefault()) { - Assert.assertTrue(booleanMatcher instanceof BooleanVectorValueMatcher); - } else { - Assert.assertFalse(booleanMatcher instanceof BooleanVectorValueMatcher); - } + Assert.assertFalse(booleanMatcher instanceof BooleanVectorValueMatcher); Assert.assertEquals(VECTOR_SIZE, booleanMatcher.getMaxVectorSize()); Assert.assertEquals(CURRENT_SIZE, booleanMatcher.getCurrentVectorSize()); EasyMock.verify(vectorValueSelector); @@ -92,13 +86,9 @@ public class VectorValueMatcherColumnProcessorFactoryTest extends InitializedNul Assert.assertEquals(VECTOR_SIZE, matcher.getMaxVectorSize()); Assert.assertEquals(CURRENT_SIZE, matcher.getCurrentVectorSize()); - // in default mode, matching null produces a boolean matcher + VectorValueMatcher booleanMatcher = matcherFactory.makeMatcher((String) null); - if (NullHandling.replaceWithDefault()) { - Assert.assertTrue(booleanMatcher instanceof BooleanVectorValueMatcher); - } else { - Assert.assertFalse(booleanMatcher instanceof BooleanVectorValueMatcher); - } + Assert.assertFalse(booleanMatcher instanceof BooleanVectorValueMatcher); Assert.assertEquals(VECTOR_SIZE, booleanMatcher.getMaxVectorSize()); Assert.assertEquals(CURRENT_SIZE, booleanMatcher.getCurrentVectorSize()); EasyMock.verify(vectorValueSelector); @@ -120,13 +110,8 @@ public class VectorValueMatcherColumnProcessorFactoryTest extends InitializedNul Assert.assertEquals(VECTOR_SIZE, matcher.getMaxVectorSize()); Assert.assertEquals(CURRENT_SIZE, matcher.getCurrentVectorSize()); - // in default mode, matching null produces a boolean matcher VectorValueMatcher booleanMatcher = matcherFactory.makeMatcher((String) null); - if (NullHandling.replaceWithDefault()) { - Assert.assertTrue(booleanMatcher instanceof BooleanVectorValueMatcher); - } else { - Assert.assertFalse(booleanMatcher instanceof BooleanVectorValueMatcher); - } + Assert.assertFalse(booleanMatcher instanceof BooleanVectorValueMatcher); Assert.assertEquals(VECTOR_SIZE, booleanMatcher.getMaxVectorSize()); Assert.assertEquals(CURRENT_SIZE, booleanMatcher.getCurrentVectorSize()); EasyMock.verify(vectorValueSelector); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java index c6f9a0b7761..c3a15527704 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java @@ -368,7 +368,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest } @Test - public void testGroupBySomeFieldOnStringColumnWithFilterExpectedType() + public void testGroupBySomeFieldOnStringColumnWithFilterExpectedTypeLong() { List vals = new ArrayList<>(); vals.add("100"); @@ -396,6 +396,64 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest ); } + @Test + public void testGroupBySomeFieldOnStringColumnWithFilterExpectedTypeDouble() + { + List vals = new ArrayList<>(); + vals.add("100"); + vals.add("200"); + vals.add("300"); + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.DOUBLE)) + .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0", ColumnType.LONG)) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .setDimFilter(new InDimFilter("v0", vals, null)) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{100.0, 2L} + ), + false, + true + ); + } + + @Test + public void testGroupBySomeFieldOnStringColumnWithFilterExpectedTypeFloat() + { + List vals = new ArrayList<>(); + vals.add("100"); + vals.add("200"); + vals.add("300"); + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.FLOAT)) + .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0", ColumnType.LONG)) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .setDimFilter(new InDimFilter("v0", vals, null)) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{100f, 2L} + ), + false, + true + ); + } + @Test public void testGroupBySomeFieldOnStringColumnWithFilterNil() { @@ -603,8 +661,10 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest final Object[] resultRow = results.get(i).getArray(); Assert.assertEquals(expected.get(i).length, resultRow.length); for (int j = 0; j < resultRow.length; j++) { - if (rowSignature.getColumnType(j).map(t -> t.anyOf(ValueType.DOUBLE, ValueType.FLOAT)).orElse(false)) { + if (rowSignature.getColumnType(j).map(t -> t.is(ValueType.DOUBLE)).orElse(false)) { Assert.assertEquals((Double) expected.get(i)[j], (Double) resultRow[j], 0.01); + } else if (rowSignature.getColumnType(j).map(t -> t.is(ValueType.FLOAT)).orElse(false)) { + Assert.assertEquals((Float) expected.get(i)[j], (Float) resultRow[j], 0.01); } else { Assert.assertEquals(expected.get(i)[j], resultRow[j]); } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java new file mode 100644 index 00000000000..3d992521735 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedGroupByArrayQueryTest.java @@ -0,0 +1,505 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.guice.NestedDataModule; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.NestedDataTestUtils; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2; +import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.ComparableList; +import org.apache.druid.segment.data.ComparableStringArray; +import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +@RunWith(Parameterized.class) +public class NestedGroupByArrayQueryTest +{ + private static final Logger LOG = new Logger(NestedDataGroupByQueryTest.class); + private static final ObjectMapper JSON_MAPPER = TestHelper.makeJsonMapper(); + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private final Closer closer; + private final GroupByQueryConfig config; + private final QueryContexts.Vectorize vectorize; + private final AggregationTestHelper helper; + private final BiFunction> segmentsGenerator; + private final String segmentsName; + + public NestedGroupByArrayQueryTest( + GroupByQueryConfig config, + BiFunction> segmentGenerator, + String vectorize + ) + { + NestedDataModule.registerHandlersAndSerde(); + this.config = config; + this.vectorize = QueryContexts.Vectorize.fromString(vectorize); + this.helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + NestedDataModule.getJacksonModulesList(), + config, + tempFolder + ); + this.segmentsGenerator = segmentGenerator; + this.segmentsName = segmentGenerator.toString(); + this.closer = Closer.create(); + } + + public Map getContext() + { + return ImmutableMap.of( + QueryContexts.VECTORIZE_KEY, vectorize.toString(), + QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize.toString() + ); + } + + @Parameterized.Parameters(name = "config = {0}, segments = {1}, vectorize = {2}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + final List>> segmentsGenerators = + NestedDataTestUtils.getSegmentGenerators(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); + + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { + // group by v1 doesn't support array stuff + continue; + } + for (BiFunction> generatorFn : segmentsGenerators) { + // skip force because arrays don't really support vectorize engine, but we want the coverage for once they do... + for (String vectorize : new String[]{"false", "true"}) { + constructors.add(new Object[]{config, generatorFn, vectorize}); + } + } + } + return constructors; + } + + @Before + public void setup() + { + } + + @After + public void teardown() throws IOException + { + closer.close(); + } + + @Test + public void testGroupByRootArrayString() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("arrayString", ColumnType.STRING_ARRAY)) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{null, 8L}, + new Object[]{ComparableStringArray.of("a", "b"), 8L}, + new Object[]{ComparableStringArray.of("a", "b", "c"), 4L}, + new Object[]{ComparableStringArray.of("b", "c"), 4L}, + new Object[]{ComparableStringArray.of("d", "e"), 4L} + ) + ); + } + + @Test + public void testGroupByRootArrayLong() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("arrayLong", ColumnType.LONG_ARRAY)) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{null, 8L}, + new Object[]{asComparableList(1L, 2L, 3L), 8L}, + new Object[]{asComparableList(1L, 2L, 3L, 4L), 4L}, + new Object[]{asComparableList(1L, 4L), 4L}, + new Object[]{asComparableList(2L, 3L), 4L} + ) + ); + } + + @Test + public void testGroupByRootArrayDouble() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("arrayDouble", ColumnType.DOUBLE_ARRAY)) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{null, 8L}, + new Object[]{asComparableList(1.1, 2.2, 3.3), 8L}, + new Object[]{asComparableList(1.1, 3.3), 4L}, + new Object[]{asComparableList(2.2, 3.3, 4.0), 4L}, + new Object[]{asComparableList(3.3, 4.4, 5.5), 4L} + ) + ); + } + + @Test + public void testGroupByRootArrayStringElement() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.STRING)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayString", + "$[2]", + "v0", + ColumnType.STRING + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{null, 24L}, + new Object[]{"c", 4L} + ) + ); + } + + @Test + public void testGroupByRootArrayStringElementDouble() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.DOUBLE)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayString", + "$[2]", + "v0", + ColumnType.DOUBLE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{NullHandling.defaultDoubleValue(), 28L} + ) + ); + } + + @Test + public void testGroupByRootArrayStringElementLong() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.LONG)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayString", + "$[2]", + "v0", + ColumnType.LONG + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{NullHandling.defaultLongValue(), 28L} + ) + ); + } + + @Test + public void testGroupByRootArrayStringElementFloat() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.FLOAT)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayString", + "$[2]", + "v0", + ColumnType.FLOAT + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{NullHandling.defaultFloatValue(), 28L} + ) + ); + } + + @Test + public void testGroupByRootArrayLongElement() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.LONG)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayLong", + "$[2]", + "v0", + ColumnType.LONG + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{NullHandling.defaultLongValue(), 16L}, + new Object[]{3L, 12L} + ) + ); + } + + @Test + public void testGroupByRootArrayLongElementDouble() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.DOUBLE)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayLong", + "$[2]", + "v0", + ColumnType.DOUBLE + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{NullHandling.defaultDoubleValue(), 16L}, + new Object[]{3.0, 12L} + ) + ); + } + + @Test + public void testGroupByRootArrayLongElementFloat() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.FLOAT)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayLong", + "$[2]", + "v0", + ColumnType.FLOAT + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{NullHandling.defaultFloatValue(), 16L}, + new Object[]{3.0f, 12L} + ) + ); + } + + @Test + public void testGroupByRootArrayLongElementString() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.STRING)) + .setVirtualColumns( + new NestedFieldVirtualColumn( + "arrayLong", + "$[2]", + "v0", + ColumnType.STRING + ) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{null, 16L}, + new Object[]{"3", 12L} + ) + ); + } + + private void runResults( + GroupByQuery groupQuery, + List expectedResults + ) + { + Supplier> runner = + () -> helper.runQueryOnSegmentsObjs(segmentsGenerator.apply(tempFolder, closer), groupQuery).toList(); + + List results = runner.get(); + verifyResults( + groupQuery, + results, + expectedResults + ); + } + + private static void verifyResults(GroupByQuery query, List results, List expected) + { + RowSignature rowSignature = query.getResultRowSignature(); + List serdeAndBack = + results.stream() + .peek( + row -> GroupByQueryEngineV2.convertRowTypesToOutputTypes( + query.getDimensions(), + row, + query.getResultRowDimensionStart() + ) + ) + .collect(Collectors.toList()); + LOG.info("results:\n%s", serdeAndBack.stream().map(ResultRow::toString).collect(Collectors.joining("\n"))); + Assert.assertEquals(expected.size(), serdeAndBack.size()); + for (int i = 0; i < expected.size(); i++) { + final Object[] resultRow = serdeAndBack.get(i).getArray(); + Assert.assertEquals(expected.get(i).length, resultRow.length); + for (int j = 0; j < resultRow.length; j++) { + if (expected.get(i)[j] == null) { + Assert.assertNull(resultRow[j]); + } else if (rowSignature.getColumnType(j).map(t -> t.is(ValueType.DOUBLE)).orElse(false)) { + Assert.assertEquals((Double) expected.get(i)[j], (Double) resultRow[j], 0.01); + } else if (rowSignature.getColumnType(j).map(t -> t.is(ValueType.FLOAT)).orElse(false)) { + Assert.assertEquals((Float) expected.get(i)[j], (Float) resultRow[j], 0.01); + } else { + Assert.assertEquals(expected.get(i)[j], resultRow[j]); + } + } + } + } + + public static ComparableList asComparableList(T... objects) + { + return new ComparableList<>(Arrays.asList(objects)); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 87798ebc18f..e1d1a79f727 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -188,7 +188,9 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest logResults(resultsRealtime); Assert.assertEquals(1, resultsRealtime.size()); Assert.assertEquals(resultsRealtime.size(), resultsSegments.size()); - Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); + if (NullHandling.sqlCompatible()) { + Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); + } } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java index 33c93bb818a..f4b03fe44f5 100644 --- a/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/AutoTypeColumnIndexerTest.java @@ -68,60 +68,70 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest public void testKeySizeEstimation() { AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer(); - Assert.assertEquals(0, indexer.getCardinality()); + int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2; + Assert.assertEquals(baseCardinality, indexer.getCardinality()); EncodedKeyComponent key; // new raw value, new field, new dictionary entry key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); Assert.assertEquals(228, key.getEffectiveSizeBytes()); - Assert.assertEquals(1, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); // adding same value only adds estimated size of value itself key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); Assert.assertEquals(112, key.getEffectiveSizeBytes()); - Assert.assertEquals(1, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); // new raw value, new field, new dictionary entry key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); Assert.assertEquals(94, key.getEffectiveSizeBytes()); - Assert.assertEquals(2, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); // adding same value only adds estimated size of value itself key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); Assert.assertEquals(16, key.getEffectiveSizeBytes()); - Assert.assertEquals(2, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); // new raw value, new dictionary entry key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false); Assert.assertEquals(48, key.getEffectiveSizeBytes()); - Assert.assertEquals(3, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 3, indexer.getCardinality()); // new raw value, new fields key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); Assert.assertEquals(168, key.getEffectiveSizeBytes()); - Assert.assertEquals(6, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); // new raw value, re-use fields and dictionary key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); Assert.assertEquals(104, key.getEffectiveSizeBytes()); - Assert.assertEquals(6, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); // new raw value, new fields key = indexer.processRowValsToUnsortedEncodedKeyComponent( ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), false ); Assert.assertEquals(166, key.getEffectiveSizeBytes()); - Assert.assertEquals(6, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); // new raw value key = indexer.processRowValsToUnsortedEncodedKeyComponent( ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), false ); Assert.assertEquals(166, key.getEffectiveSizeBytes()); - Assert.assertEquals(6, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false); if (NullHandling.replaceWithDefault()) { Assert.assertEquals(0, key.getEffectiveSizeBytes()); - Assert.assertEquals(7, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); } else { Assert.assertEquals(104, key.getEffectiveSizeBytes()); - Assert.assertEquals(7, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); + } + + key = indexer.processRowValsToUnsortedEncodedKeyComponent(0L, false); + if (NullHandling.replaceWithDefault()) { + Assert.assertEquals(16, key.getEffectiveSizeBytes()); + Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); + } else { + Assert.assertEquals(48, key.getEffectiveSizeBytes()); + Assert.assertEquals(baseCardinality + 8, indexer.getCardinality()); } } @@ -247,20 +257,43 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest columnSelectorFactory = cursorList.get(3).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(LONG_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultLongValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultLongValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultLongValue()), dimensionSelector.getObject()); + } + columnSelectorFactory = cursorList.get(4).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(LONG_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultLongValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultLongValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultLongValue()), dimensionSelector.getObject()); + } } @Test @@ -320,18 +353,42 @@ public class AutoTypeColumnIndexerTest extends InitializedNullHandlingTest columnSelectorFactory = cursorList.get(3).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(DOUBLE_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultDoubleValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultDoubleValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultDoubleValue()), dimensionSelector.getObject()); + } columnSelectorFactory = cursorList.get(4).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(DOUBLE_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultDoubleValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultDoubleValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultDoubleValue()), dimensionSelector.getObject()); + } } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java index 0fbd20a9a86..0ade7993d4e 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java @@ -57,6 +57,7 @@ import java.util.Collections; import java.util.List; import java.util.UUID; import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Function; /** * Helps tests make segments. @@ -97,7 +98,7 @@ public class IndexBuilder public static IndexBuilder create() { - return new IndexBuilder(TestHelper.JSON_MAPPER, TestHelper.NO_CACHE_COLUMN_CONFIG); + return new IndexBuilder(TestHelper.JSON_MAPPER, TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG); } public static IndexBuilder create(ColumnConfig columnConfig) @@ -107,7 +108,7 @@ public class IndexBuilder public static IndexBuilder create(ObjectMapper jsonMapper) { - return new IndexBuilder(jsonMapper, TestHelper.NO_CACHE_COLUMN_CONFIG); + return new IndexBuilder(jsonMapper, TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG); } public static IndexBuilder create(ObjectMapper jsonMapper, ColumnConfig columnConfig) @@ -203,6 +204,12 @@ public class IndexBuilder return this; } + public IndexBuilder mapSchema(Function f) + { + this.schema = f.apply(this.schema); + return this; + } + public IncrementalIndex buildIncrementalIndex() { if (inputSource != null) { @@ -259,6 +266,7 @@ public class IndexBuilder } } + public QueryableIndex buildMMappedMergedIndex() { Preconditions.checkNotNull(tmpDir, "tmpDir"); diff --git a/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java b/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java index b11417dc361..90d235c4a03 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java @@ -118,7 +118,7 @@ public class IndexIONullColumnsCompatibilityTest extends InitializedNullHandling @Test public void testV9LoaderThatIgnoresmptyColumns() throws IOException { - QueryableIndex queryableIndex = new V9IndexLoaderExceptEmptyColumns(TestHelper.NO_CACHE_COLUMN_CONFIG).load( + QueryableIndex queryableIndex = new V9IndexLoaderExceptEmptyColumns(TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG).load( segmentDir, TestHelper.makeJsonMapper(), false, diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java index 92da314dfc7..a50df711498 100644 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java @@ -68,60 +68,70 @@ public class NestedDataColumnIndexerTest extends InitializedNullHandlingTest public void testKeySizeEstimation() { NestedDataColumnIndexer indexer = new NestedDataColumnIndexer(); - Assert.assertEquals(0, indexer.getCardinality()); + int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2; + Assert.assertEquals(baseCardinality, indexer.getCardinality()); EncodedKeyComponent key; // new raw value, new field, new dictionary entry key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); Assert.assertEquals(228, key.getEffectiveSizeBytes()); - Assert.assertEquals(1, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); // adding same value only adds estimated size of value itself key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); Assert.assertEquals(112, key.getEffectiveSizeBytes()); - Assert.assertEquals(1, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 1, indexer.getCardinality()); // new raw value, new field, new dictionary entry key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); Assert.assertEquals(94, key.getEffectiveSizeBytes()); - Assert.assertEquals(2, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); // adding same value only adds estimated size of value itself key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); Assert.assertEquals(16, key.getEffectiveSizeBytes()); - Assert.assertEquals(2, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 2, indexer.getCardinality()); // new raw value, new dictionary entry key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false); Assert.assertEquals(48, key.getEffectiveSizeBytes()); - Assert.assertEquals(3, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 3, indexer.getCardinality()); // new raw value, new fields key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); Assert.assertEquals(276, key.getEffectiveSizeBytes()); - Assert.assertEquals(5, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); // new raw value, re-use fields and dictionary key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); Assert.assertEquals(56, key.getEffectiveSizeBytes()); - Assert.assertEquals(5, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); // new raw value, new fields key = indexer.processRowValsToUnsortedEncodedKeyComponent( ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), false ); Assert.assertEquals(286, key.getEffectiveSizeBytes()); - Assert.assertEquals(5, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); // new raw value key = indexer.processRowValsToUnsortedEncodedKeyComponent( ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), false ); Assert.assertEquals(118, key.getEffectiveSizeBytes()); - Assert.assertEquals(5, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 5, indexer.getCardinality()); key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false); if (NullHandling.replaceWithDefault()) { Assert.assertEquals(0, key.getEffectiveSizeBytes()); - Assert.assertEquals(6, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); } else { Assert.assertEquals(104, key.getEffectiveSizeBytes()); - Assert.assertEquals(6, indexer.getCardinality()); + Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); + } + + key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false); + if (NullHandling.replaceWithDefault()) { + Assert.assertEquals(16, key.getEffectiveSizeBytes()); + Assert.assertEquals(baseCardinality + 6, indexer.getCardinality()); + } else { + Assert.assertEquals(48, key.getEffectiveSizeBytes()); + Assert.assertEquals(baseCardinality + 7, indexer.getCardinality()); } } @@ -247,20 +257,42 @@ public class NestedDataColumnIndexerTest extends InitializedNullHandlingTest columnSelectorFactory = cursorList.get(3).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(LONG_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultLongValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultLongValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultLongValue()), dimensionSelector.getObject()); + } columnSelectorFactory = cursorList.get(4).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(LONG_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultLongValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultLongValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultLongValue()), dimensionSelector.getObject()); + } } @Test @@ -320,18 +352,42 @@ public class NestedDataColumnIndexerTest extends InitializedNullHandlingTest columnSelectorFactory = cursorList.get(3).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(DOUBLE_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultDoubleValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultDoubleValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultDoubleValue()), dimensionSelector.getObject()); + } columnSelectorFactory = cursorList.get(4).getColumnSelectorFactory(); valueSelector = columnSelectorFactory.makeColumnValueSelector(DOUBLE_COL); dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertNull(valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertNull(dimensionSelector.getObject()); + } else { + Assert.assertEquals(NullHandling.defaultDoubleValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(1, dimensionSelector.getRow().size()); + Assert.assertEquals( + String.valueOf(NullHandling.defaultDoubleValue()), + dimensionSelector.lookupName(dimensionSelector.getRow().get(0)) + ); + Assert.assertEquals(String.valueOf(NullHandling.defaultDoubleValue()), dimensionSelector.getObject()); + } } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/TestHelper.java b/processing/src/test/java/org/apache/druid/segment/TestHelper.java index 6dbde8ab6e3..b0136094a6d 100644 --- a/processing/src/test/java/org/apache/druid/segment/TestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/TestHelper.java @@ -62,7 +62,26 @@ import java.util.stream.IntStream; public class TestHelper { public static final ObjectMapper JSON_MAPPER = makeJsonMapper(); - public static final ColumnConfig NO_CACHE_COLUMN_CONFIG = () -> 0; + public static final ColumnConfig NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG = new ColumnConfig() + { + @Override + public int columnCacheSizeBytes() + { + return 0; + } + + @Override + public double skipValueRangeIndexScale() + { + return 1.0; + } + + @Override + public double skipValuePredicateIndexScale() + { + return 1.0; + } + }; public static IndexMergerV9 getTestIndexMergerV9(SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) { @@ -71,7 +90,7 @@ public class TestHelper public static IndexIO getTestIndexIO() { - return getTestIndexIO(NO_CACHE_COLUMN_CONFIG); + return getTestIndexIO(NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG); } public static IndexIO getTestIndexIO(ColumnConfig columnConfig) diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnTypeTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnTypeTest.java index fe730f21745..89f0aeb80f3 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnTypeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnTypeTest.java @@ -145,6 +145,28 @@ public class ColumnTypeTest ); } + @Test + public void testLeastRestrictiveType() + { + Assert.assertEquals(ColumnType.STRING, ColumnType.leastRestrictiveType(ColumnType.STRING, ColumnType.DOUBLE)); + Assert.assertEquals(ColumnType.STRING, ColumnType.leastRestrictiveType(ColumnType.STRING, ColumnType.LONG)); + Assert.assertEquals(ColumnType.STRING, ColumnType.leastRestrictiveType(ColumnType.STRING, ColumnType.FLOAT)); + Assert.assertEquals(ColumnType.DOUBLE, ColumnType.leastRestrictiveType(ColumnType.FLOAT, ColumnType.DOUBLE)); + Assert.assertEquals(ColumnType.DOUBLE, ColumnType.leastRestrictiveType(ColumnType.FLOAT, ColumnType.LONG)); + Assert.assertEquals(ColumnType.LONG, ColumnType.leastRestrictiveType(ColumnType.LONG, ColumnType.LONG)); + Assert.assertEquals(ColumnType.LONG_ARRAY, ColumnType.leastRestrictiveType(ColumnType.LONG, ColumnType.LONG_ARRAY)); + Assert.assertEquals(ColumnType.STRING_ARRAY, ColumnType.leastRestrictiveType(ColumnType.STRING_ARRAY, ColumnType.LONG_ARRAY)); + Assert.assertEquals(ColumnType.STRING_ARRAY, ColumnType.leastRestrictiveType(ColumnType.STRING_ARRAY, ColumnType.DOUBLE_ARRAY)); + Assert.assertEquals(ColumnType.DOUBLE_ARRAY, ColumnType.leastRestrictiveType(ColumnType.LONG_ARRAY, ColumnType.DOUBLE_ARRAY)); + Assert.assertEquals(ColumnType.NESTED_DATA, ColumnType.leastRestrictiveType(ColumnType.STRING_ARRAY, ColumnType.NESTED_DATA)); + Assert.assertEquals(ColumnType.NESTED_DATA, ColumnType.leastRestrictiveType(ColumnType.NESTED_DATA, ColumnType.STRING_ARRAY)); + Assert.assertEquals(ColumnType.NESTED_DATA, ColumnType.leastRestrictiveType(ColumnType.NESTED_DATA, ColumnType.UNKNOWN_COMPLEX)); + Assert.assertEquals(SOME_COMPLEX, ColumnType.leastRestrictiveType(SOME_COMPLEX, ColumnType.UNKNOWN_COMPLEX)); + Assert.assertEquals(SOME_COMPLEX, ColumnType.leastRestrictiveType(ColumnType.UNKNOWN_COMPLEX, SOME_COMPLEX)); + Assert.assertThrows(IllegalArgumentException.class, () -> ColumnType.leastRestrictiveType(ColumnType.NESTED_DATA, SOME_COMPLEX)); + Assert.assertThrows(IllegalArgumentException.class, () -> ColumnType.leastRestrictiveType(ColumnType.STRING_ARRAY, SOME_COMPLEX)); + } + static class SomeOtherTypeSignature extends BaseTypeSignature { public SomeOtherTypeSignature( diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 017ec103f08..0298e204b64 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -64,6 +64,7 @@ import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -119,6 +120,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; public abstract class BaseFilterTest extends InitializedNullHandlingTest { @@ -310,44 +312,105 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest final Map>> finishers = ImmutableMap.>>builder() - .put( - "incremental", - input -> { - final IncrementalIndex index = input.buildIncrementalIndex(); - return Pair.of(new IncrementalIndexStorageAdapter(index), index); - } - ) - .put( - "mmapped", - input -> { - final QueryableIndex index = input.buildMMappedIndex(); - return Pair.of(new QueryableIndexStorageAdapter(index), index); - } - ) - .put( - "mmappedMerged", - input -> { - final QueryableIndex index = input.buildMMappedMergedIndex(); - return Pair.of(new QueryableIndexStorageAdapter(index), index); - } - ) - .put( - "rowBasedWithoutTypeSignature", - input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> {}) - ) - .put( - "rowBasedWithTypeSignature", - input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {}) - ) - .put("frame (row-based)", input -> { - final FrameSegment segment = input.buildFrameSegment(FrameType.ROW_BASED); - return Pair.of(segment.asStorageAdapter(), segment); - }) - .put("frame (columnar)", input -> { - final FrameSegment segment = input.buildFrameSegment(FrameType.COLUMNAR); - return Pair.of(segment.asStorageAdapter(), segment); - }) - .build(); + .put( + "incremental", + input -> { + final IncrementalIndex index = input.buildIncrementalIndex(); + return Pair.of(new IncrementalIndexStorageAdapter(index), index); + } + ) + .put( + "mmappedAutoTypes", + input -> { + input.mapSchema( + schema -> + new IncrementalIndexSchema( + schema.getMinTimestamp(), + schema.getTimestampSpec(), + schema.getGran(), + schema.getVirtualColumns(), + schema.getDimensionsSpec().withDimensions( + schema.getDimensionsSpec() + .getDimensions() + .stream() + .map( + dimensionSchema -> new AutoTypeColumnSchema(dimensionSchema.getName()) + ) + .collect(Collectors.toList()) + ), + schema.getMetrics(), + schema.isRollup() + ) + ); + final QueryableIndex index = input.buildMMappedIndex(); + return Pair.of(new QueryableIndexStorageAdapter(index), index); + } + ) + .put( + "mmappedAutoTypesMerged", + input -> { + input.mapSchema( + schema -> + new IncrementalIndexSchema( + schema.getMinTimestamp(), + schema.getTimestampSpec(), + schema.getGran(), + schema.getVirtualColumns(), + schema.getDimensionsSpec().withDimensions( + schema.getDimensionsSpec() + .getDimensions() + .stream() + .map( + dimensionSchema -> new AutoTypeColumnSchema(dimensionSchema.getName()) + ) + .collect(Collectors.toList()) + ), + schema.getMetrics(), + schema.isRollup() + ) + ); + // if 1 row per segment some of the columns have null values for the row which causes 'auto' + // typing default value coercion to be lost in default value mode, so make sure there is at + // least one number in each segment for these tests to pass correctly because the column + // is typeless and so doesn't write out zeros like regular numbers do + input.intermediaryPersistSize(3); + final QueryableIndex index = input.buildMMappedMergedIndex(); + return Pair.of(new QueryableIndexStorageAdapter(index), index); + } + ) + .put( + "mmapped", + input -> { + final QueryableIndex index = input.buildMMappedIndex(); + return Pair.of(new QueryableIndexStorageAdapter(index), index); + } + ) + .put( + "mmappedMerged", + input -> { + final QueryableIndex index = input.buildMMappedMergedIndex(); + return Pair.of(new QueryableIndexStorageAdapter(index), index); + } + ) + .put( + "rowBasedWithoutTypeSignature", + input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> { + }) + ) + .put( + "rowBasedWithTypeSignature", + input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> { + }) + ) + .put("frame (row-based)", input -> { + final FrameSegment segment = input.buildFrameSegment(FrameType.ROW_BASED); + return Pair.of(segment.asStorageAdapter(), segment); + }) + .put("frame (columnar)", input -> { + final FrameSegment segment = input.buildFrameSegment(FrameType.COLUMNAR); + return Pair.of(segment.asStorageAdapter(), segment); + }) + .build(); StringEncodingStrategy[] stringEncoding = new StringEncodingStrategy[]{ new StringEncodingStrategy.Utf8(), @@ -816,58 +879,66 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest final boolean testVectorized ) { - Assert.assertEquals( - "Cursor: " + filter, - expectedRows, - selectColumnValuesMatchingFilter(filter, "dim0") - ); - - if (testVectorized) { + try { Assert.assertEquals( - "Cursor (vectorized): " + filter, + "Cursor: " + filter, expectedRows, - selectColumnValuesMatchingFilterUsingVectorCursor(filter, "dim0") + selectColumnValuesMatchingFilter(filter, "dim0") ); + if (testVectorized) { + Assert.assertEquals( + "Cursor (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorCursor(filter, "dim0") + ); + + Assert.assertEquals( + "Cursor Virtual Column (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorVirtualColumnCursor(filter, "vdim0", "dim0") + ); + } + Assert.assertEquals( - "Cursor Virtual Column (vectorized): " + filter, + "Cursor with postFiltering: " + filter, expectedRows, - selectColumnValuesMatchingFilterUsingVectorVirtualColumnCursor(filter, "vdim0", "dim0") + selectColumnValuesMatchingFilterUsingPostFiltering(filter, "dim0") ); - } - Assert.assertEquals( - "Cursor with postFiltering: " + filter, - expectedRows, - selectColumnValuesMatchingFilterUsingPostFiltering(filter, "dim0") - ); + if (testVectorized) { + Assert.assertEquals( + "Cursor with postFiltering (vectorized): " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingVectorizedPostFiltering(filter, "dim0") + ); + } - if (testVectorized) { Assert.assertEquals( - "Cursor with postFiltering (vectorized): " + filter, - expectedRows, - selectColumnValuesMatchingFilterUsingVectorizedPostFiltering(filter, "dim0") - ); - } - - Assert.assertEquals( - "Filtered aggregator: " + filter, - expectedRows.size(), - selectCountUsingFilteredAggregator(filter) - ); - - if (testVectorized) { - Assert.assertEquals( - "Filtered aggregator (vectorized): " + filter, + "Filtered aggregator: " + filter, expectedRows.size(), - selectCountUsingVectorizedFilteredAggregator(filter) + selectCountUsingFilteredAggregator(filter) + ); + + if (testVectorized) { + Assert.assertEquals( + "Filtered aggregator (vectorized): " + filter, + expectedRows.size(), + selectCountUsingVectorizedFilteredAggregator(filter) + ); + } + + Assert.assertEquals( + "RowBasedColumnSelectorFactory: " + filter, + expectedRows, + selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory(filter, "dim0") ); } - - Assert.assertEquals( - "RowBasedColumnSelectorFactory: " + filter, - expectedRows, - selectColumnValuesMatchingFilterUsingRowBasedColumnSelectorFactory(filter, "dim0") - ); + catch (ISE ise) { + // ignore failures resulting from 'auto' + if (!(testName.contains("AutoTypes") && "Unsupported type[ARRAY]".equals(ise.getMessage()))) { + throw ise; + } + } } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java index 5106df70ae6..2bc1c2c5c4f 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ColumnComparisonFilterTest.java @@ -95,6 +95,11 @@ public class ColumnComparisonFilterTest extends BaseFilterTest @Test public void testColumnsWithoutNulls() { + // columns have mixed type input and so are ingested as COMPLEX + // however the comparison filter currently nulls out complex types instead of comparing the values + if (testName.contains("AutoTypes")) { + return; + } assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim0"), DefaultDimensionSpec.of("dim1") @@ -117,6 +122,11 @@ public class ColumnComparisonFilterTest extends BaseFilterTest @Test public void testMissingColumnNotSpecifiedInDimensionList() { + // columns have mixed type input and so are ingested as COMPLEX + // however the comparison filter currently nulls out complex types instead of comparing the values + if (testName.contains("AutoTypes")) { + return; + } assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of( DefaultDimensionSpec.of("dim6"), DefaultDimensionSpec.of("dim7") diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java index 289843f6006..e46e736f9fa 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExpressionFilterTest.java @@ -162,6 +162,10 @@ public class ExpressionFilterTest extends BaseFilterTest @Test public void testOneMultiValuedStringColumn() { + // auto type columns don't support mvds, bail out + if (testName.contains("AutoTypes")) { + return; + } if (NullHandling.replaceWithDefault()) { assertFilterMatchesSkipVectorize(edf("dim4 == ''"), ImmutableList.of("1", "2", "6", "7", "8")); } else { @@ -248,6 +252,10 @@ public class ExpressionFilterTest extends BaseFilterTest assertFilterMatches(edf("dim2 == dim3"), ImmutableList.of("2", "5", "8")); } + // auto type columns don't support mvds, bail out + if (testName.contains("AutoTypes")) { + return; + } // String vs. multi-value string assertFilterMatchesSkipVectorize(edf("dim0 == dim4"), ImmutableList.of("3", "4", "5")); } diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java index 525431dbb60..e579f0f0792 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java @@ -188,7 +188,7 @@ public class JoinTestHelper public static IndexBuilder createFactIndexBuilder(final File tmpDir) throws IOException { - return createFactIndexBuilder(TestHelper.NO_CACHE_COLUMN_CONFIG, tmpDir, -1); + return createFactIndexBuilder(TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG, tmpDir, -1); } public static IndexBuilder createFactIndexBuilder( diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index aa01998bb25..88083536464 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -252,6 +252,7 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest NestedCommonFormatColumnPartSerde partSerde = NestedCommonFormatColumnPartSerde.createDeserializer( ColumnType.NESTED_DATA, false, + false, ByteOrder.nativeOrder(), RoaringBitmapSerdeFactory.getInstance() ); @@ -275,6 +276,7 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest NestedCommonFormatColumnPartSerde partSerde = NestedCommonFormatColumnPartSerde.createDeserializer( ColumnType.NESTED_DATA, false, + false, ByteOrder.nativeOrder(), RoaringBitmapSerdeFactory.getInstance() ); @@ -733,13 +735,15 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest Assert.assertNull(dimSelector.getObject()); Assert.assertNull(dimSelector.lookupName(dimSelector.getRow().get(0))); - Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(rowNumber)); - Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(rowNumber)); + Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(rowNumber)); Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(rowNumber)); + + + Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) .get(rowNumber)); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java index a3f523bb9ea..95016d013fe 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java @@ -515,12 +515,14 @@ public class NestedDataColumnSupplierV4Test extends InitializedNullHandlingTest Assert.assertNull(dimSelector.lookupName(dimSelector.getRow().get(0))); Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(rowNumber)); - Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) .computeBitmapResult(resultFactory) .get(rowNumber)); Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(rowNumber)); + + + Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(rowNumber)); Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) .get(rowNumber)); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java index 51d18676ebf..33a11225d49 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarDoubleColumnSupplierTest.java @@ -25,6 +25,7 @@ import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.io.Closer; @@ -46,6 +47,8 @@ import org.apache.druid.segment.column.NullValueIndex; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -235,7 +238,9 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest private void smokeTest(ScalarDoubleColumnAndIndexSupplier supplier, ScalarDoubleColumn column) { SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size()); + NoFilterVectorOffset vectorOffset = new NoFilterVectorOffset(1, 0, data.size()); ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); + VectorValueSelector vectorValueSelector = column.makeVectorValueSelector(vectorOffset); StringValueSetIndex valueSetIndex = supplier.as(StringValueSetIndex.class); DruidPredicateIndex predicateIndex = supplier.as(DruidPredicateIndex.class); @@ -255,8 +260,17 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest if (row != null) { Assert.assertEquals(row, valueSelector.getObject()); - Assert.assertEquals((double) row, valueSelector.getDouble(), 0.0); + Assert.assertEquals(row, valueSelector.getDouble(), 0.0); Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(row, vectorValueSelector.getDoubleVector()[0], 0.0); + Assert.assertEquals(row.longValue(), vectorValueSelector.getLongVector()[0]); + Assert.assertEquals(row.floatValue(), vectorValueSelector.getFloatVector()[0], 0.0); + boolean[] nullVector = vectorValueSelector.getNullVector(); + if (NullHandling.sqlCompatible() && nullVector != null) { + Assert.assertFalse(nullVector[0]); + } else { + Assert.assertNull(nullVector); + } Assert.assertTrue(valueSetIndex.forValue(String.valueOf(row)).computeBitmapResult(resultFactory).get(i)); Assert.assertTrue(valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of(String.valueOf(row)))) @@ -275,15 +289,36 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); } else { - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertTrue(vectorValueSelector.getNullVector()[0]); + Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) + .computeBitmapResult(resultFactory) + .get(i)); + } else { + Assert.assertEquals(NullHandling.defaultDoubleValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals(NullHandling.defaultDoubleValue(), vectorValueSelector.getDoubleVector()[0], 0.0); + Assert.assertNull(vectorValueSelector.getNullVector()); + + Assert.assertFalse(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) + .computeBitmapResult(resultFactory) + .get(i)); + final String defaultString = String.valueOf(NullHandling.defaultDoubleValue()); + Assert.assertTrue(valueSetIndex.forValue(defaultString).computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(defaultString)) + .computeBitmapResult(resultFactory) + .get(i)); + } + + - Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(i)); - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); - Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) - .computeBitmapResult(resultFactory) - .get(i)); Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(i)); Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) @@ -291,6 +326,7 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest } offset.increment(); + vectorOffset.advance(); } } } diff --git a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java index a1e13cbf3ab..28d191463b9 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/ScalarLongColumnSupplierTest.java @@ -25,6 +25,7 @@ import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.io.Closer; @@ -46,6 +47,8 @@ import org.apache.druid.segment.column.NullValueIndex; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -235,7 +238,9 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest private void smokeTest(ScalarLongColumnAndIndexSupplier supplier, ScalarLongColumn column) { SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size()); + NoFilterVectorOffset vectorOffset = new NoFilterVectorOffset(1, 0, data.size()); ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); + VectorValueSelector vectorValueSelector = column.makeVectorValueSelector(vectorOffset); StringValueSetIndex valueSetIndex = supplier.as(StringValueSetIndex.class); DruidPredicateIndex predicateIndex = supplier.as(DruidPredicateIndex.class); @@ -257,6 +262,15 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest Assert.assertEquals(row, valueSelector.getObject()); Assert.assertEquals((long) row, valueSelector.getLong()); Assert.assertFalse(valueSelector.isNull()); + Assert.assertEquals((long) row, vectorValueSelector.getLongVector()[0]); + Assert.assertEquals(row.doubleValue(), vectorValueSelector.getDoubleVector()[0], 0.0); + Assert.assertEquals(row.floatValue(), vectorValueSelector.getFloatVector()[0], 0.0); + boolean[] nullVector = vectorValueSelector.getNullVector(); + if (NullHandling.sqlCompatible() && nullVector != null) { + Assert.assertFalse(nullVector[0]); + } else { + Assert.assertNull(nullVector); + } Assert.assertTrue(valueSetIndex.forValue(String.valueOf(row)).computeBitmapResult(resultFactory).get(i)); Assert.assertTrue(valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of(String.valueOf(row)))) @@ -275,15 +289,32 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); } else { - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); + if (NullHandling.sqlCompatible()) { + Assert.assertNull(valueSelector.getObject()); + Assert.assertTrue(valueSelector.isNull()); + Assert.assertTrue(vectorValueSelector.getNullVector()[0]); + Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) + .computeBitmapResult(resultFactory) + .get(i)); + } else { + Assert.assertEquals(NullHandling.defaultLongValue(), valueSelector.getObject()); + Assert.assertFalse(valueSelector.isNull()); + Assert.assertNull(vectorValueSelector.getNullVector()); + Assert.assertFalse(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); + Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) + .computeBitmapResult(resultFactory) + .get(i)); + final String defaultString = String.valueOf(NullHandling.defaultLongValue()); + Assert.assertTrue(valueSetIndex.forValue(defaultString).computeBitmapResult(resultFactory).get(i)); + Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(defaultString)) + .computeBitmapResult(resultFactory) + .get(i)); + } - Assert.assertTrue(valueSetIndex.forValue(null).computeBitmapResult(resultFactory).get(i)); Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(i)); - Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); - Assert.assertTrue(predicateIndex.forPredicate(new SelectorPredicateFactory(null)) - .computeBitmapResult(resultFactory) - .get(i)); Assert.assertFalse(valueSetIndex.forValue(NO_MATCH).computeBitmapResult(resultFactory).get(i)); Assert.assertFalse(predicateIndex.forPredicate(new SelectorPredicateFactory(NO_MATCH)) .computeBitmapResult(resultFactory) @@ -291,6 +322,7 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest } offset.increment(); + vectorOffset.advance(); } } } diff --git a/processing/src/test/java/org/apache/druid/segment/nested/VariantArrayColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java similarity index 52% rename from processing/src/test/java/org/apache/druid/segment/nested/VariantArrayColumnSupplierTest.java rename to processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java index 951d59f456b..a841fc14927 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/VariantArrayColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/VariantColumnSupplierTest.java @@ -19,6 +19,7 @@ package org.apache.druid.segment.nested; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; @@ -30,10 +31,13 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.segment.AutoTypeColumnIndexer; import org.apache.druid.segment.AutoTypeColumnMerger; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.IndexableAdapter; import org.apache.druid.segment.SimpleAscendingOffset; @@ -41,9 +45,15 @@ import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DruidPredicateIndex; import org.apache.druid.segment.column.NullValueIndex; +import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; +import org.apache.druid.segment.data.CompressionFactory; +import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; +import org.apache.druid.segment.vector.NoFilterVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -54,6 +64,8 @@ import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import java.io.File; import java.io.IOException; @@ -70,16 +82,15 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicReference; -public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest +@RunWith(Parameterized.class) +public class VariantColumnSupplierTest extends InitializedNullHandlingTest { - @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); BitmapSerdeFactory bitmapSerdeFactory = RoaringBitmapSerdeFactory.getInstance(); DefaultBitmapResultFactory resultFactory = new DefaultBitmapResultFactory(bitmapSerdeFactory.getBitmapFactory()); - - List> data = Arrays.asList( + static List> LONG_ARRAY = Arrays.asList( Collections.emptyList(), Arrays.asList(1L, null, 2L), null, @@ -88,11 +99,62 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest Arrays.asList(null, null) ); - Closer closer = Closer.create(); + static List> DOUBLE_ARRAY = Arrays.asList( + Collections.emptyList(), + Arrays.asList(1.1, null, 2.2), + null, + Collections.singletonList(null), + Arrays.asList(3.3, 4.4), + Arrays.asList(null, null) + ); - SmooshedFileMapper fileMapper; + static List> STRING_ARRAY = Arrays.asList( + Collections.emptyList(), + Arrays.asList("a", null, "b"), + null, + Collections.singletonList(null), + Arrays.asList("c", "d"), + Arrays.asList(null, null) + ); + + static List VARIANT_NUMERIC = Arrays.asList( + 1L, + 2.2, + null, + 3.3, + 4L, + null + ); + + static List VARIANT_SCALAR = Arrays.asList( + null, + 1L, + null, + "b", + 3.3, + 4L + ); + + static List VARIANT_SCALAR_AND_ARRAY = Arrays.asList( + Collections.emptyList(), + 2L, + null, + Collections.singletonList(null), + Arrays.asList(3L, 4L), + Arrays.asList(null, "a"), + 5.5, + "b" + ); + + static List> VARIANT_ARRAY = Arrays.asList( + Collections.emptyList(), + Arrays.asList("a", null, "b"), + null, + Collections.singletonList(null), + Arrays.asList(3L, 4L), + Arrays.asList(null, 3.3) + ); - ByteBuffer baseBuffer; @BeforeClass public static void staticSetup() @@ -100,29 +162,74 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest NestedDataModule.registerHandlersAndSerde(); } + @Parameterized.Parameters(name = "data = {0}") + public static Collection constructorFeeder() + { + IndexSpec fancy = IndexSpec.builder() + .withLongEncoding(CompressionFactory.LongEncodingStrategy.AUTO) + .withStringDictionaryEncoding( + new StringEncodingStrategy.FrontCoded(16, FrontCodedIndexed.V1) + ) + .build(); + final List constructors = ImmutableList.of( + new Object[]{"ARRAY", LONG_ARRAY, IndexSpec.DEFAULT}, + new Object[]{"ARRAY", LONG_ARRAY, fancy}, + new Object[]{"ARRAY", DOUBLE_ARRAY, IndexSpec.DEFAULT}, + new Object[]{"ARRAY", DOUBLE_ARRAY, fancy}, + new Object[]{"ARRAY", STRING_ARRAY, IndexSpec.DEFAULT}, + new Object[]{"ARRAY", STRING_ARRAY, fancy}, + new Object[]{"DOUBLE,LONG", VARIANT_NUMERIC, IndexSpec.DEFAULT}, + new Object[]{"DOUBLE,LONG", VARIANT_NUMERIC, fancy}, + new Object[]{"DOUBLE,LONG,STRING", VARIANT_SCALAR, IndexSpec.DEFAULT}, + new Object[]{"DOUBLE,LONG,STRING", VARIANT_SCALAR, fancy}, + new Object[]{"ARRAY,ARRAY,DOUBLE,LONG,STRING", VARIANT_SCALAR_AND_ARRAY, IndexSpec.DEFAULT}, + new Object[]{"ARRAY,ARRAY,DOUBLE,LONG,STRING", VARIANT_SCALAR_AND_ARRAY, fancy}, + new Object[]{"ARRAY,ARRAY,ARRAY", VARIANT_ARRAY, IndexSpec.DEFAULT}, + new Object[]{"ARRAY,ARRAY,ARRAY", VARIANT_ARRAY, fancy} + ); + + return constructors; + } + + Closer closer = Closer.create(); + + SmooshedFileMapper fileMapper; + + ByteBuffer baseBuffer; + + FieldTypeInfo.MutableTypeSet expectedTypes; + + ColumnType expectedLogicalType = null; + + private final List data; + private final IndexSpec indexSpec; + + public VariantColumnSupplierTest( + @SuppressWarnings("unused") String name, + List data, + IndexSpec indexSpec + ) + { + this.data = data; + this.indexSpec = indexSpec; + } + @Before public void setup() throws IOException { final String fileNameBase = "test"; - fileMapper = smooshify(fileNameBase, tempFolder.newFolder(), data); + fileMapper = smooshify(fileNameBase, tempFolder.newFolder()); baseBuffer = fileMapper.mapFile(fileNameBase); } private SmooshedFileMapper smooshify( String fileNameBase, - File tmpFile, - List data + File tmpFile ) throws IOException { SegmentWriteOutMediumFactory writeOutMediumFactory = TmpFileSegmentWriteOutMediumFactory.instance(); try (final FileSmoosher smoosher = new FileSmoosher(tmpFile)) { - VariantArrayColumnSerializer serializer = new VariantArrayColumnSerializer( - fileNameBase, - IndexSpec.DEFAULT, - writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()), - closer - ); AutoTypeColumnIndexer indexer = new AutoTypeColumnIndexer(); for (Object o : data) { @@ -136,6 +243,18 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest SortedValueDictionary globalDictionarySortedCollector = mergable.getValueDictionary(); mergable.mergeFieldsInto(sortedFields); + expectedTypes = sortedFields.get(NestedPathFinder.JSON_PATH_ROOT); + for (ColumnType type : FieldTypeInfo.convertToSet(expectedTypes.getByteValue())) { + expectedLogicalType = ColumnType.leastRestrictiveType(expectedLogicalType, type); + } + VariantColumnSerializer serializer = new VariantColumnSerializer( + fileNameBase, + expectedTypes.getSingleType() == null ? expectedTypes.getByteValue() : null, + indexSpec, + writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()), + closer + ); + serializer.openDictionaryWriter(); serializer.serializeDictionaries( globalDictionarySortedCollector.getSortedStrings(), @@ -173,16 +292,16 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest { ColumnBuilder bob = new ColumnBuilder(); bob.setFileMapper(fileMapper); - VariantArrayColumnAndIndexSupplier supplier = VariantArrayColumnAndIndexSupplier.read( - ColumnType.LONG_ARRAY, + VariantColumnAndIndexSupplier supplier = VariantColumnAndIndexSupplier.read( + expectedLogicalType, ByteOrder.nativeOrder(), bitmapSerdeFactory, baseBuffer, bob, NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES ); - try (VariantArrayColumn column = (VariantArrayColumn) supplier.get()) { - smokeTest(supplier, column); + try (VariantColumn column = (VariantColumn) supplier.get()) { + smokeTest(supplier, column, data, expectedTypes); } } @@ -192,8 +311,8 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest // if this test ever starts being to be a flake, there might be thread safety issues ColumnBuilder bob = new ColumnBuilder(); bob.setFileMapper(fileMapper); - VariantArrayColumnAndIndexSupplier supplier = VariantArrayColumnAndIndexSupplier.read( - ColumnType.LONG_ARRAY, + VariantColumnAndIndexSupplier supplier = VariantColumnAndIndexSupplier.read( + expectedLogicalType, ByteOrder.nativeOrder(), bitmapSerdeFactory, baseBuffer, @@ -215,8 +334,8 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest try { threadsStartLatch.await(); for (int iter = 0; iter < 5000; iter++) { - try (VariantArrayColumn column = (VariantArrayColumn) supplier.get()) { - smokeTest(supplier, column); + try (VariantColumn column = (VariantColumn) supplier.get()) { + smokeTest(supplier, column, data, expectedTypes); } } } @@ -231,10 +350,21 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest Assert.assertEquals(expectedReason, failureReason.get()); } - private void smokeTest(VariantArrayColumnAndIndexSupplier supplier, VariantArrayColumn column) + private void smokeTest( + VariantColumnAndIndexSupplier supplier, + VariantColumn column, + List data, + FieldTypeInfo.MutableTypeSet expectedType + ) { SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size()); + NoFilterVectorOffset vectorOffset = new NoFilterVectorOffset(1, 0, data.size()); ColumnValueSelector valueSelector = column.makeColumnValueSelector(offset); + DimensionSelector dimensionSelector = + expectedLogicalType.isPrimitive() ? column.makeDimensionSelector(offset, null) : null; + VectorObjectSelector vectorObjectSelector = column.makeVectorObjectSelector(vectorOffset); + SingleValueDimensionVectorSelector dimensionVectorSelector = + expectedLogicalType.isPrimitive() ? column.makeSingleValueDimensionVectorSelector(vectorOffset) : null; StringValueSetIndex valueSetIndex = supplier.as(StringValueSetIndex.class); Assert.assertNull(valueSetIndex); @@ -245,26 +375,69 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest SortedMap fields = column.getFieldTypeInfo(); Assert.assertEquals( - ImmutableMap.of(NestedPathFinder.JSON_PATH_ROOT, new FieldTypeInfo.MutableTypeSet().add(ColumnType.LONG_ARRAY)), + ImmutableMap.of(NestedPathFinder.JSON_PATH_ROOT, expectedType), fields ); + final ExpressionType expressionType = ExpressionType.fromColumnTypeStrict(expectedLogicalType); for (int i = 0; i < data.size(); i++) { - List row = data.get(i); + Object row = data.get(i); // in default value mode, even though the input row had an empty string, the selector spits out null, so we want // to take the null checking path if (row != null) { - Assert.assertArrayEquals(row.toArray(), (Object[]) valueSelector.getObject()); + if (row instanceof List) { + Assert.assertArrayEquals(((List) row).toArray(), (Object[]) valueSelector.getObject()); + if (expectedType.getSingleType() != null) { + Assert.assertArrayEquals(((List) row).toArray(), (Object[]) vectorObjectSelector.getObjectVector()[0]); + } else { + // mixed type vector object selector coerces to the most common type + Assert.assertArrayEquals(ExprEval.ofType(expressionType, row).asArray(), (Object[]) vectorObjectSelector.getObjectVector()[0]); + } + } else { + Assert.assertEquals(row, valueSelector.getObject()); + if (expectedType.getSingleType() != null) { + Assert.assertEquals( + row, + vectorObjectSelector.getObjectVector()[0] + ); + } else { + // vector object selector always coerces to the most common type + ExprEval eval = ExprEval.ofType(expressionType, row); + if (expectedLogicalType.isArray()) { + Assert.assertArrayEquals(eval.asArray(), (Object[]) vectorObjectSelector.getObjectVector()[0]); + } else { + Assert.assertEquals(eval.value(), vectorObjectSelector.getObjectVector()[0]); + } + } + if (dimensionSelector != null) { + Assert.assertEquals(String.valueOf(row), dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + // null is always 0 + Assert.assertTrue(dimensionSelector.idLookup().lookupId(String.valueOf(row)) > 0); + if (dimensionVectorSelector != null) { + int[] dim = dimensionVectorSelector.getRowVector(); + Assert.assertEquals(String.valueOf(row), dimensionVectorSelector.lookupName(dim[0])); + } + } + } Assert.assertFalse(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); } else { Assert.assertNull(valueSelector.getObject()); + Assert.assertNull(vectorObjectSelector.getObjectVector()[0]); + if (dimensionSelector != null) { + Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); + Assert.assertEquals(0, dimensionSelector.idLookup().lookupId(null)); + if (dimensionVectorSelector != null) { + Assert.assertNull(dimensionVectorSelector.lookupName(dimensionVectorSelector.getRowVector()[0])); + } + } Assert.assertTrue(nullValueIndex.forNull().computeBitmapResult(resultFactory).get(i)); } offset.increment(); + vectorOffset.advance(); } } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java index 0e8cfef9036..c40c4a9bacd 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionPlannerTest.java @@ -23,7 +23,6 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.Parser; -import org.apache.druid.query.expression.NestedDataExpressions; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.column.ColumnCapabilities; @@ -824,7 +823,7 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest ExpressionPlan.Trait.VECTORIZABLE ) ); - Assert.assertEquals(NestedDataExpressions.TYPE, thePlan.getOutputType()); + Assert.assertEquals(ExpressionType.NESTED_DATA, thePlan.getOutputType()); ColumnCapabilities inferred = thePlan.inferColumnCapabilities( ExpressionType.toColumnType(thePlan.getOutputType()) ); diff --git a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java index 7c0f65a973e..6613dc212fd 100644 --- a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java +++ b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java @@ -28,6 +28,7 @@ import com.google.inject.Key; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.bitmap.RoaringBitmapFactory; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.guice.NestedDataModule; import org.apache.druid.guice.annotations.Json; import org.apache.druid.jackson.DefaultObjectMapper; @@ -168,7 +169,17 @@ public class DumpSegmentTest extends InitializedNullHandlingTest ); final byte[] fileBytes = Files.readAllBytes(outputFile.toPath()); final String output = StringUtils.fromUtf8(fileBytes); - Assert.assertEquals("{\"nest\":{\"fields\":[{\"path\":\"$.x\",\"types\":[\"LONG\"]},{\"path\":\"$.y\",\"types\":[\"DOUBLE\"]},{\"path\":\"$.z\",\"types\":[\"STRING\"]}],\"dictionaries\":{\"strings\":[{\"globalId\":0,\"value\":null},{\"globalId\":1,\"value\":\"a\"},{\"globalId\":2,\"value\":\"b\"}],\"longs\":[{\"globalId\":3,\"value\":100},{\"globalId\":4,\"value\":200},{\"globalId\":5,\"value\":400}],\"doubles\":[{\"globalId\":6,\"value\":1.1},{\"globalId\":7,\"value\":2.2},{\"globalId\":8,\"value\":3.3}],\"nullRows\":[]}}}", output); + if (NullHandling.sqlCompatible()) { + Assert.assertEquals( + "{\"nest\":{\"fields\":[{\"path\":\"$.x\",\"types\":[\"LONG\"]},{\"path\":\"$.y\",\"types\":[\"DOUBLE\"]},{\"path\":\"$.z\",\"types\":[\"STRING\"]}],\"dictionaries\":{\"strings\":[{\"globalId\":0,\"value\":null},{\"globalId\":1,\"value\":\"a\"},{\"globalId\":2,\"value\":\"b\"}],\"longs\":[{\"globalId\":3,\"value\":100},{\"globalId\":4,\"value\":200},{\"globalId\":5,\"value\":400}],\"doubles\":[{\"globalId\":6,\"value\":1.1},{\"globalId\":7,\"value\":2.2},{\"globalId\":8,\"value\":3.3}],\"nullRows\":[]}}}", + output + ); + } else { + Assert.assertEquals( + "{\"nest\":{\"fields\":[{\"path\":\"$.x\",\"types\":[\"LONG\"]},{\"path\":\"$.y\",\"types\":[\"DOUBLE\"]},{\"path\":\"$.z\",\"types\":[\"STRING\"]}],\"dictionaries\":{\"strings\":[{\"globalId\":0,\"value\":null},{\"globalId\":1,\"value\":\"a\"},{\"globalId\":2,\"value\":\"b\"}],\"longs\":[{\"globalId\":3,\"value\":0},{\"globalId\":4,\"value\":100},{\"globalId\":5,\"value\":200},{\"globalId\":6,\"value\":400}],\"doubles\":[{\"globalId\":7,\"value\":0.0},{\"globalId\":8,\"value\":1.1},{\"globalId\":9,\"value\":2.2},{\"globalId\":10,\"value\":3.3}],\"nullRows\":[]}}}", + output + ); + } } @Test @@ -191,7 +202,17 @@ public class DumpSegmentTest extends InitializedNullHandlingTest ); final byte[] fileBytes = Files.readAllBytes(outputFile.toPath()); final String output = StringUtils.fromUtf8(fileBytes); - Assert.assertEquals("{\"bitmapSerdeFactory\":{\"type\":\"roaring\"},\"nest\":{\"$.x\":{\"types\":[\"LONG\"],\"dictionary\":[{\"localId\":0,\"globalId\":0,\"value\":null,\"rows\":[4]},{\"localId\":1,\"globalId\":3,\"value\":\"100\",\"rows\":[3]},{\"localId\":2,\"globalId\":4,\"value\":\"200\",\"rows\":[0,2]},{\"localId\":3,\"globalId\":5,\"value\":\"400\",\"rows\":[1]}],\"column\":[{\"row\":0,\"raw\":{\"x\":200,\"y\":2.2},\"fieldId\":2,\"fieldValue\":\"200\"},{\"row\":1,\"raw\":{\"x\":400,\"y\":1.1,\"z\":\"a\"},\"fieldId\":3,\"fieldValue\":\"400\"},{\"row\":2,\"raw\":{\"x\":200,\"z\":\"b\"},\"fieldId\":2,\"fieldValue\":\"200\"},{\"row\":3,\"raw\":{\"x\":100,\"y\":1.1,\"z\":\"a\"},\"fieldId\":1,\"fieldValue\":\"100\"},{\"row\":4,\"raw\":{\"y\":3.3,\"z\":\"b\"},\"fieldId\":0,\"fieldValue\":null}]}}}", output); + if (NullHandling.sqlCompatible()) { + Assert.assertEquals( + "{\"bitmapSerdeFactory\":{\"type\":\"roaring\"},\"nest\":{\"$.x\":{\"types\":[\"LONG\"],\"dictionary\":[{\"localId\":0,\"globalId\":0,\"value\":null,\"rows\":[4]},{\"localId\":1,\"globalId\":3,\"value\":\"100\",\"rows\":[3]},{\"localId\":2,\"globalId\":4,\"value\":\"200\",\"rows\":[0,2]},{\"localId\":3,\"globalId\":5,\"value\":\"400\",\"rows\":[1]}],\"column\":[{\"row\":0,\"raw\":{\"x\":200,\"y\":2.2},\"fieldId\":2,\"fieldValue\":\"200\"},{\"row\":1,\"raw\":{\"x\":400,\"y\":1.1,\"z\":\"a\"},\"fieldId\":3,\"fieldValue\":\"400\"},{\"row\":2,\"raw\":{\"x\":200,\"z\":\"b\"},\"fieldId\":2,\"fieldValue\":\"200\"},{\"row\":3,\"raw\":{\"x\":100,\"y\":1.1,\"z\":\"a\"},\"fieldId\":1,\"fieldValue\":\"100\"},{\"row\":4,\"raw\":{\"y\":3.3,\"z\":\"b\"},\"fieldId\":0,\"fieldValue\":null}]}}}", + output + ); + } else { + Assert.assertEquals( + "{\"bitmapSerdeFactory\":{\"type\":\"roaring\"},\"nest\":{\"$.x\":{\"types\":[\"LONG\"],\"dictionary\":[{\"localId\":0,\"globalId\":0,\"value\":null,\"rows\":[4]},{\"localId\":1,\"globalId\":4,\"value\":\"100\",\"rows\":[3]},{\"localId\":2,\"globalId\":5,\"value\":\"200\",\"rows\":[0,2]},{\"localId\":3,\"globalId\":6,\"value\":\"400\",\"rows\":[1]}],\"column\":[{\"row\":0,\"raw\":{\"x\":200,\"y\":2.2},\"fieldId\":2,\"fieldValue\":\"200\"},{\"row\":1,\"raw\":{\"x\":400,\"y\":1.1,\"z\":\"a\"},\"fieldId\":3,\"fieldValue\":\"400\"},{\"row\":2,\"raw\":{\"x\":200,\"z\":\"b\"},\"fieldId\":2,\"fieldValue\":\"200\"},{\"row\":3,\"raw\":{\"x\":100,\"y\":1.1,\"z\":\"a\"},\"fieldId\":1,\"fieldValue\":\"100\"},{\"row\":4,\"raw\":{\"y\":3.3,\"z\":\"b\"},\"fieldId\":0,\"fieldValue\":null}]}}}", + output + ); + } } public static List createSegments( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java index 0ae4f6fc9d4..e5252fbe5a9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/DruidExpression.java @@ -437,6 +437,22 @@ public class DruidExpression ); } + public DruidExpression map( + final Function extractionMap, + final Function expressionMap, + final ColumnType newType + ) + { + return new DruidExpression( + nodeType, + newType, + simpleExtraction == null ? null : extractionMap.apply(simpleExtraction), + (args) -> expressionMap.apply(expressionGenerator.compile(args)), + arguments, + virtualColumnCreator + ); + } + public DruidExpression withArguments(List newArgs) { return new DruidExpression( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index fc1c40a5b1b..21c23bf2906 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -508,7 +508,15 @@ public class Expressions } final DimFilter equalFilter; - if (druidExpression.isSimpleExtraction()) { + final ColumnType outputType = druidExpression.getDruidType(); + final boolean isOutputNumeric = outputType != null && outputType.isNumeric(); + // if a simple extraction, we can typically use the base column directly for filtering. however, some expressions + // such as cast also appear as a simple extraction because some native layer things can handle the cast + // themselves, so we check the output type of the expression and compare it to the type of the direct column. a + // string column might produce additional null values when converting to a number, so we should use the virtual + // column instead for filtering to ensure that results are correct + if (druidExpression.isSimpleExtraction() && + !(isOutputNumeric && !rowSignature.isNumeric(druidExpression.getDirectColumn()))) { equalFilter = new SelectorDimFilter( druidExpression.getSimpleExtraction().getColumn(), NullHandling.defaultStringValue(), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayAppendOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayAppendOperatorConversion.java index 4c51c10f2ea..9ec338ebfa2 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayAppendOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayAppendOperatorConversion.java @@ -39,8 +39,8 @@ public class ArrayAppendOperatorConversion extends DirectOperatorConversion OperandTypes.family(SqlTypeFamily.STRING) ), OperandTypes.or( - OperandTypes.family(SqlTypeFamily.STRING), - OperandTypes.family(SqlTypeFamily.NUMERIC) + OperandTypes.family(SqlTypeFamily.STRING), + OperandTypes.family(SqlTypeFamily.NUMERIC) ) ) ) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java index 55d138b5f86..0ee93e8f697 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/ArrayOffsetOperatorConversion.java @@ -37,7 +37,7 @@ public class ArrayOffsetOperatorConversion extends DirectOperatorConversion OperandTypes.family(SqlTypeFamily.ARRAY), OperandTypes.family(SqlTypeFamily.STRING) ), - OperandTypes.family(SqlTypeFamily.NUMERIC) + OperandTypes.family(SqlTypeFamily.NUMERIC) ) ) .functionCategory(SqlFunctionCategory.STRING) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java index 7f8c1ddee88..bb0b60db569 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java @@ -108,7 +108,8 @@ public class CastOperatorConversion implements SqlOperatorConversion // Ignore casts for simple extractions (use Function.identity) since it is ok in many cases. typeCastExpression = operandExpression.map( Function.identity(), - expression -> StringUtils.format("CAST(%s, '%s')", expression, toExpressionType.asTypeString()) + expression -> StringUtils.format("CAST(%s, '%s')", expression, toExpressionType.asTypeString()), + toDruidType ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 5c7729eb60c..64014aae32c 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -109,6 +109,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ImmutableMap.builder() .put("t", "2000-01-01") .put("string", "ccc") + .put("string_sparse", "10") .put("nest", ImmutableMap.of("x", 200L, "y", 3.03, "z", "abcdef", "mixed", 1.1, "mixed2", 1L)) .put("long", 3L) .build(), @@ -705,7 +706,8 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ), ImmutableList.of( - new Object[]{NullHandling.defaultStringValue(), 10L}, + new Object[]{NullHandling.defaultStringValue(), 8L}, + new Object[]{"10", 2L}, new Object[]{"yyy", 2L}, new Object[]{"zzz", 2L} ), @@ -811,7 +813,8 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ), ImmutableList.of( - new Object[]{NullHandling.defaultStringValue(), 10L}, + new Object[]{NullHandling.defaultStringValue(), 8L}, + new Object[]{"10", 2L}, new Object[]{"yyy", 2L}, new Object[]{"zzz", 2L} ), @@ -2705,7 +2708,6 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ), ImmutableList.of( - new Object[]{NullHandling.defaultStringValue(), 4L}, new Object[]{"100", 2L} ), RowSignature.builder() @@ -2967,7 +2969,6 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest .build() ), ImmutableList.of( - new Object[]{NullHandling.defaultStringValue(), 4L}, new Object[]{"2.02", 2L} ), RowSignature.builder() @@ -4561,4 +4562,285 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest } ); } + + @Test + public void testGroupByPathSelectorFilterVariantNull() + { + testQuery( + "SELECT " + + "JSON_VALUE(nest, '$.x'), " + + "JSON_VALUE(nester, '$.n.x' RETURNING BIGINT), " + + "SUM(cnt) " + + "FROM druid.nested WHERE JSON_VALUE(nester, '$.n.x' RETURNING BIGINT) IS NULL GROUP BY 1, 2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new NestedFieldVirtualColumn("nester", "$.n.x", "v0", ColumnType.LONG), + new NestedFieldVirtualColumn("nest", "$.x", "v1", ColumnType.STRING) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v1", "d0"), + new DefaultDimensionSpec("v0", "d1", ColumnType.LONG) + ) + ) + .setDimFilter(selector("v0", null, null)) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{NullHandling.defaultStringValue(), NullHandling.defaultLongValue(), 4L}, + new Object[]{"100", NullHandling.defaultLongValue(), 1L}, + new Object[]{"200", NullHandling.defaultLongValue(), 1L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .add("EXPR$2", ColumnType.LONG) + .build() + ); + } + + @Test + public void testSelectPathSelectorFilterVariantNull() + { + testQuery( + "SELECT " + + "JSON_VALUE(nest, '$.x'), " + + "JSON_VALUE(nester, '$.n.x' RETURNING BIGINT) " + + "FROM druid.nested WHERE JSON_VALUE(nester, '$.n.x' RETURNING BIGINT) IS NULL", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new NestedFieldVirtualColumn("nester", "$.n.x", "v0", ColumnType.LONG), + new NestedFieldVirtualColumn("nest", "$.x", "v1", ColumnType.STRING) + ) + .columns( + "v0", "v1" + ) + .filters(selector("v0", null, null)) + .context(QUERY_CONTEXT_DEFAULT) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + ImmutableList.of( + new Object[]{"100", null}, + new Object[]{NullHandling.defaultStringValue(), null}, + new Object[]{"200", null}, + new Object[]{NullHandling.defaultStringValue(), null}, + new Object[]{NullHandling.defaultStringValue(), null}, + new Object[]{NullHandling.defaultStringValue(), null} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByPathSelectorFilterVariantNotNull() + { + testQuery( + "SELECT " + + "JSON_VALUE(nest, '$.x'), " + + "JSON_VALUE(nester, '$.n.x' RETURNING BIGINT), " + + "SUM(cnt) " + + "FROM druid.nested WHERE JSON_VALUE(nester, '$.n.x' RETURNING BIGINT) IS NOT NULL GROUP BY 1, 2", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new NestedFieldVirtualColumn("nester", "$.n.x", "v0", ColumnType.LONG), + new NestedFieldVirtualColumn("nest", "$.x", "v1", ColumnType.STRING) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v1", "d0"), + new DefaultDimensionSpec("v0", "d1", ColumnType.LONG) + ) + ) + .setDimFilter(not(selector("v0", null, null))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"100", 1L, 1L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.STRING) + .add("EXPR$1", ColumnType.LONG) + .add("EXPR$2", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRegularLongLongMixed1FilterNotNull() + { + testQuery( + "SELECT " + + "JSON_VALUE(long, '$' RETURNING BIGINT), " + + "SUM(cnt) " + + "FROM druid.nested_mix WHERE JSON_VALUE(long, '$' RETURNING BIGINT) IS NOT NULL GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "d0", ColumnType.LONG) + ) + ) + .setVirtualColumns(new NestedFieldVirtualColumn("long", "$", "v0", ColumnType.LONG)) + .setDimFilter(not(selector("v0", null, null))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 2L}, + new Object[]{2L, 4L}, + new Object[]{3L, 2L}, + new Object[]{4L, 2L}, + new Object[]{5L, 4L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.LONG) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed1SparseNotNull() + { + testQuery( + "SELECT " + + "JSON_VALUE(string_sparse, '$' RETURNING BIGINT), " + + "SUM(cnt) " + + "FROM druid.nested_mix_2 WHERE JSON_VALUE(string_sparse, '$' RETURNING BIGINT) IS NOT NULL GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED_2) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "d0", ColumnType.LONG) + ) + ) + .setVirtualColumns(new NestedFieldVirtualColumn("string_sparse", "$", "v0", ColumnType.LONG)) + .setDimFilter(not(selector("v0", null, null))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{10L, 2L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.LONG) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } + + @Test + public void testScanStringNotNullCast() + { + skipVectorize(); + testQuery( + "SELECT " + + "CAST(string_sparse as BIGINT)" + + "FROM druid.nested_mix WHERE CAST(string_sparse as BIGINT) IS NOT NULL", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE_MIXED) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn("v0", "CAST(\"string_sparse\", 'LONG')", ColumnType.LONG) + ) + .filters(not(selector("v0", null, null))) + .columns("v0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + NullHandling.sqlCompatible() ? + ImmutableList.of( + new Object[]{10L}, + new Object[]{10L} + ) : + ImmutableList.of( + new Object[]{0L}, + new Object[]{0L}, + new Object[]{10L}, + new Object[]{0L}, + new Object[]{0L}, + new Object[]{0L}, + new Object[]{0L}, + new Object[]{0L}, + new Object[]{0L}, + new Object[]{10L}, + new Object[]{0L}, + new Object[]{0L}, + new Object[]{0L}, + new Object[]{0L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.LONG) + .build() + ); + } + + @Test + public void testGroupByRootSingleTypeStringMixed1SparseNotNullCast2() + { + testQuery( + "SELECT " + + "CAST(string_sparse as BIGINT), " + + "SUM(cnt) " + + "FROM druid.nested_mix WHERE CAST(string_sparse as BIGINT) IS NOT NULL GROUP BY 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(DATA_SOURCE_MIXED) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + dimensions( + new DefaultDimensionSpec("string_sparse", "d0", ColumnType.LONG) + ) + ) + .setVirtualColumns(expressionVirtualColumn("v0", "CAST(\"string_sparse\", 'LONG')", ColumnType.LONG)) + .setDimFilter(not(selector("v0", null, null))) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + NullHandling.sqlCompatible() ? + ImmutableList.of( + new Object[]{10L, 2L} + ) : + ImmutableList.of( + new Object[]{0L, 12L}, + new Object[]{10L, 2L} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.LONG) + .add("EXPR$1", ColumnType.LONG) + .build() + ); + } }