diff --git a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java index c62f7f4b95b..905e0850f71 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessing.java @@ -48,13 +48,19 @@ public class ExpressionProcessing @VisibleForTesting public static void initializeForTests(@Nullable Boolean allowNestedArrays) { - INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null); + INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null, null); } @VisibleForTesting public static void initializeForStrictBooleansTests(boolean useStrict) { - INSTANCE = new ExpressionProcessingConfig(null, useStrict, null); + INSTANCE = new ExpressionProcessingConfig(null, useStrict, null, null); + } + + @VisibleForTesting + public static void initializeForHomogenizeNullMultiValueStrings() + { + INSTANCE = new ExpressionProcessingConfig(null, null, null, true); } /** @@ -62,35 +68,47 @@ public class ExpressionProcessing */ public static boolean allowNestedArrays() { - // this should only be null in a unit test context - // in production this will be injected by the expression processing module - if (INSTANCE == null) { - throw new IllegalStateException( - "Expressions module not initialized, call ExpressionProcessing.initializeForTests()" - ); - } + checkInitialized(); return INSTANCE.allowNestedArrays(); } - + /** + * All boolean expressions are {@link ExpressionType#LONG} + */ public static boolean useStrictBooleans() { - // this should only be null in a unit test context, in production this will be injected by the null handling module - if (INSTANCE == null) { - throw new IllegalStateException("ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()"); - } + checkInitialized(); return INSTANCE.isUseStrictBooleans(); } - + /** + * All {@link ExprType#ARRAY} values will be converted to {@link ExpressionType#STRING} by their column selectors + * (not within expression processing) to be treated as multi-value strings instead of native arrays. + */ public static boolean processArraysAsMultiValueStrings() + { + checkInitialized(); + return INSTANCE.processArraysAsMultiValueStrings(); + } + + /** + * All multi-value string expression input values of 'null', '[]', and '[null]' will be coerced to '[null]'. If false, + * (the default) this will only be done when single value expressions are implicitly mapped across multi-value rows, + * so that the single valued expression will always be evaluated with an input value of 'null' + */ + public static boolean isHomogenizeNullMultiValueStringArrays() + { + checkInitialized(); + return INSTANCE.isHomogenizeNullMultiValueStringArrays(); + } + + private static void checkInitialized() { // this should only be null in a unit test context, in production this will be injected by the null handling module if (INSTANCE == null) { throw new IllegalStateException( - "ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()" + "ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests() or one of its variants" ); } - return INSTANCE.processArraysAsMultiValueStrings(); } } diff --git a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java index 1009a9d0c29..b832578fe31 100644 --- a/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java +++ b/core/src/main/java/org/apache/druid/math/expr/ExpressionProcessingConfig.java @@ -29,8 +29,11 @@ public class ExpressionProcessingConfig public static final String NESTED_ARRAYS_CONFIG_STRING = "druid.expressions.allowNestedArrays"; public static final String NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING = "druid.expressions.useStrictBooleans"; // Coerce arrays to multi value strings - public static final String - PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING = "druid.expressions.processArraysAsMultiValueStrings"; + public static final String PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING = + "druid.expressions.processArraysAsMultiValueStrings"; + // Coerce 'null', '[]', and '[null]' into '[null]' for backwards compat with 0.22 and earlier + public static final String HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS = + "druid.expressions.homogenizeNullMultiValueStringArrays"; @JsonProperty("allowNestedArrays") private final boolean allowNestedArrays; @@ -41,27 +44,27 @@ public class ExpressionProcessingConfig @JsonProperty("processArraysAsMultiValueStrings") private final boolean processArraysAsMultiValueStrings; + @JsonProperty("homogenizeNullMultiValueStringArrays") + private final boolean homogenizeNullMultiValueStringArrays; + @JsonCreator public ExpressionProcessingConfig( @JsonProperty("allowNestedArrays") @Nullable Boolean allowNestedArrays, @JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans, - @JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings + @JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings, + @JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean homogenizeNullMultiValueStringArrays ) { - this.allowNestedArrays = allowNestedArrays == null - ? Boolean.valueOf(System.getProperty(NESTED_ARRAYS_CONFIG_STRING, "false")) - : allowNestedArrays; - if (useStrictBooleans == null) { - this.useStrictBooleans = Boolean.parseBoolean( - System.getProperty(NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING, "false") - ); - } else { - this.useStrictBooleans = useStrictBooleans; - } - this.processArraysAsMultiValueStrings - = processArraysAsMultiValueStrings == null - ? Boolean.valueOf(System.getProperty(PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING, "false")) - : processArraysAsMultiValueStrings; + this.allowNestedArrays = getWithPropertyFallbackFalse(allowNestedArrays, NESTED_ARRAYS_CONFIG_STRING); + this.useStrictBooleans = getWithPropertyFallbackFalse(useStrictBooleans, NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING); + this.processArraysAsMultiValueStrings = getWithPropertyFallbackFalse( + processArraysAsMultiValueStrings, + PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING + ); + this.homogenizeNullMultiValueStringArrays = getWithPropertyFallbackFalse( + homogenizeNullMultiValueStringArrays, + HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS + ); } public boolean allowNestedArrays() @@ -78,4 +81,14 @@ public class ExpressionProcessingConfig { return processArraysAsMultiValueStrings; } + + public boolean isHomogenizeNullMultiValueStringArrays() + { + return homogenizeNullMultiValueStringArrays; + } + + private static boolean getWithPropertyFallbackFalse(@Nullable Boolean value, String property) + { + return value != null ? value : Boolean.valueOf(System.getProperty(property, "false")); + } } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index f9ff05c40ea..ec5f8bff7c4 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.NonnullPair; import org.apache.druid.java.util.common.Pair; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; import org.apache.druid.query.dimension.DefaultDimensionSpec; @@ -256,30 +257,43 @@ public class ExpressionSelectors final List columns = plan.getAnalysis().getRequiredBindingsList(); final Map>> suppliers = new HashMap<>(); for (String columnName : columns) { - final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(columnName); - final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue(); + final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName); + final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue(); final Supplier supplier; - final ExpressionType expressionType = ExpressionType.fromColumnType(columnCapabilities); + final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities); - if (columnCapabilities == null || - columnCapabilities.isArray() || - (plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT) && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)) - ) { - // Unknown ValueType or array type. Try making an Object selector and see if that gives us anything useful. + final boolean useObjectSupplierForMultiValueStringArray = + capabilities != null + // if homogenizing null multi-value string arrays, or if a single valued function that must be applied across + // multi-value rows, we can just use the dimension selector, which has the homogenization behavior built-in + && ((!capabilities.is(ValueType.STRING)) + || (capabilities.is(ValueType.STRING) + && !ExpressionProcessing.isHomogenizeNullMultiValueStringArrays() + && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) + ) + ) + // expression has array output + && plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT); + + final boolean homogenizeNullMultiValueStringArrays = + plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) || ExpressionProcessing.isHomogenizeNullMultiValueStringArrays(); + + if (capabilities == null || capabilities.isArray() || useObjectSupplierForMultiValueStringArray) { + // Unknown type, array type, or output array uses an Object selector and see if that gives anything useful supplier = supplierFromObjectSelector( columnSelectorFactory.makeColumnValueSelector(columnName), - plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) + homogenizeNullMultiValueStringArrays ); - } else if (columnCapabilities.is(ValueType.FLOAT)) { + } else if (capabilities.is(ValueType.FLOAT)) { ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(columnName); supplier = makeNullableNumericSupplier(selector, selector::getFloat); - } else if (columnCapabilities.is(ValueType.LONG)) { + } else if (capabilities.is(ValueType.LONG)) { ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(columnName); supplier = makeNullableNumericSupplier(selector, selector::getLong); - } else if (columnCapabilities.is(ValueType.DOUBLE)) { + } else if (capabilities.is(ValueType.DOUBLE)) { ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(columnName); supplier = makeNullableNumericSupplier(selector, selector::getDouble); - } else if (columnCapabilities.is(ValueType.STRING)) { + } else if (capabilities.is(ValueType.STRING)) { supplier = supplierFromDimensionSelector( columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), multiVal diff --git a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java index 80b5768dd92..c6ef667fe20 100644 --- a/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/org/apache/druid/query/MultiValuedDimensionTest.java @@ -33,6 +33,7 @@ import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.dimension.DefaultDimensionSpec; @@ -502,6 +503,57 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi"); } + @Test + public void testGroupByExpressionMultiMultiBackwardsCompat0dot22andOlder() + { + try { + ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings(); + if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality."); + } + GroupByQuery query = GroupByQuery + .builder() + .setDataSource("xx") + .setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")) + .setGranularity(Granularities.ALL) + .setDimensions(new DefaultDimensionSpec("texpr", "texpr")) + .setVirtualColumns( + new ExpressionVirtualColumn( + "texpr", + "cartesian_map((x,y) -> concat(x, y), tags, othertags)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .setLimit(5) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(context) + .build(); + + Sequence result = helper.runQueryOnSegmentsObjs( + ImmutableList.of( + new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), + new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2")) + ), + query + ); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u1", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u2", "count", 2L), + GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t3u1", "count", 2L) + ); + + TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi"); + } + finally { + ExpressionProcessing.initializeForTests(null); + } + } + @Test public void testGroupByExpressionMultiMultiAuto() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6e429366d86..0d20bad3078 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -21,7 +21,6 @@ package org.apache.druid.sql.calcite; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import junitparams.JUnitParamsRunner; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.java.util.common.IAE; @@ -56,7 +55,6 @@ import org.apache.druid.segment.join.JoinType; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.util.CalciteTests; import org.junit.Test; -import org.junit.runner.RunWith; import java.util.Arrays; import java.util.Collections; @@ -65,7 +63,6 @@ import java.util.List; /** * Tests for array functions and array types */ -@RunWith(JUnitParamsRunner.class) public class CalciteArraysQueryTest extends BaseCalciteQueryTest { // test some query stuffs, sort of limited since no native array column types so either need to use constructor or diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java index 49b959170ce..33b105bde07 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java @@ -21,9 +21,9 @@ package org.apache.druid.sql.calcite; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import junitparams.JUnitParamsRunner; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.query.Druids; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.dimension.DefaultDimensionSpec; @@ -42,11 +42,9 @@ import org.apache.druid.sql.SqlPlanningException; import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.util.CalciteTests; import org.junit.Test; -import org.junit.runner.RunWith; import java.util.List; -@RunWith(JUnitParamsRunner.class) public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest { // various queries on multi-valued string dimensions using them like strings @@ -655,6 +653,70 @@ public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest ); } + @Test + public void testMultiValueStringConcatBackwardsCompat0dot22andOlder() throws Exception + { + try { + ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings(); + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + + ImmutableList results; + if (useDefault) { + results = ImmutableList.of( + new Object[]{"", 6L}, + new Object[]{"b", 4L}, + new Object[]{"a", 2L}, + new Object[]{"c", 2L}, + new Object[]{"d", 2L} + ); + } else { + results = ImmutableList.of( + new Object[]{null, 4L}, + new Object[]{"b", 4L}, + new Object[]{"", 2L}, + new Object[]{"a", 2L}, + new Object[]{"c", 2L}, + new Object[]{"d", 2L} + ); + } + testQuery( + "SELECT MV_CONCAT(dim3, dim3), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE3) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns(expressionVirtualColumn( + "v0", + "array_concat(\"dim3\",\"dim3\")", + ColumnType.STRING + )) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setLimitSpec(new DefaultLimitSpec( + ImmutableList.of(new OrderByColumnSpec( + "a0", + OrderByColumnSpec.Direction.DESCENDING, + StringComparators.NUMERIC + )), + Integer.MAX_VALUE + )) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + results + ); + } + finally { + ExpressionProcessing.initializeForTests(null); + } + } + @Test public void testMultiValueStringOffset() throws Exception { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java index 89948062a0a..27500323875 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteParameterQueryTest.java @@ -20,7 +20,6 @@ package org.apache.druid.sql.calcite; import com.google.common.collect.ImmutableList; -import junitparams.JUnitParamsRunner; import org.apache.calcite.avatica.SqlType; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; @@ -43,7 +42,6 @@ import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.http.SqlParameter; import org.junit.Test; -import org.junit.runner.RunWith; import java.util.ArrayList; import java.util.List; @@ -54,7 +52,6 @@ import java.util.List; * were merely chosen to produce a selection of parameter types and positions within query expressions and have been * renamed to reflect this */ -@RunWith(JUnitParamsRunner.class) public class CalciteParameterQueryTest extends BaseCalciteQueryTest { @Test