add backwards compatibility mode for multi-value string array null value coercion (#12210)

This commit is contained in:
Clint Wylie 2022-01-31 22:38:15 -08:00 committed by GitHub
parent 978b8f7dde
commit f9b406c8f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 209 additions and 56 deletions

View File

@ -48,13 +48,19 @@ public class ExpressionProcessing
@VisibleForTesting
public static void initializeForTests(@Nullable Boolean allowNestedArrays)
{
INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null);
INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null, null);
}
@VisibleForTesting
public static void initializeForStrictBooleansTests(boolean useStrict)
{
INSTANCE = new ExpressionProcessingConfig(null, useStrict, null);
INSTANCE = new ExpressionProcessingConfig(null, useStrict, null, null);
}
@VisibleForTesting
public static void initializeForHomogenizeNullMultiValueStrings()
{
INSTANCE = new ExpressionProcessingConfig(null, null, null, true);
}
/**
@ -62,35 +68,47 @@ public class ExpressionProcessing
*/
public static boolean allowNestedArrays()
{
// this should only be null in a unit test context
// in production this will be injected by the expression processing module
if (INSTANCE == null) {
throw new IllegalStateException(
"Expressions module not initialized, call ExpressionProcessing.initializeForTests()"
);
}
checkInitialized();
return INSTANCE.allowNestedArrays();
}
/**
* All boolean expressions are {@link ExpressionType#LONG}
*/
public static boolean useStrictBooleans()
{
// this should only be null in a unit test context, in production this will be injected by the null handling module
if (INSTANCE == null) {
throw new IllegalStateException("ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()");
}
checkInitialized();
return INSTANCE.isUseStrictBooleans();
}
/**
* All {@link ExprType#ARRAY} values will be converted to {@link ExpressionType#STRING} by their column selectors
* (not within expression processing) to be treated as multi-value strings instead of native arrays.
*/
public static boolean processArraysAsMultiValueStrings()
{
checkInitialized();
return INSTANCE.processArraysAsMultiValueStrings();
}
/**
* All multi-value string expression input values of 'null', '[]', and '[null]' will be coerced to '[null]'. If false,
* (the default) this will only be done when single value expressions are implicitly mapped across multi-value rows,
* so that the single valued expression will always be evaluated with an input value of 'null'
*/
public static boolean isHomogenizeNullMultiValueStringArrays()
{
checkInitialized();
return INSTANCE.isHomogenizeNullMultiValueStringArrays();
}
private static void checkInitialized()
{
// this should only be null in a unit test context, in production this will be injected by the null handling module
if (INSTANCE == null) {
throw new IllegalStateException(
"ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()"
"ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests() or one of its variants"
);
}
return INSTANCE.processArraysAsMultiValueStrings();
}
}

View File

@ -29,8 +29,11 @@ public class ExpressionProcessingConfig
public static final String NESTED_ARRAYS_CONFIG_STRING = "druid.expressions.allowNestedArrays";
public static final String NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING = "druid.expressions.useStrictBooleans";
// Coerce arrays to multi value strings
public static final String
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING = "druid.expressions.processArraysAsMultiValueStrings";
public static final String PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING =
"druid.expressions.processArraysAsMultiValueStrings";
// Coerce 'null', '[]', and '[null]' into '[null]' for backwards compat with 0.22 and earlier
public static final String HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS =
"druid.expressions.homogenizeNullMultiValueStringArrays";
@JsonProperty("allowNestedArrays")
private final boolean allowNestedArrays;
@ -41,27 +44,27 @@ public class ExpressionProcessingConfig
@JsonProperty("processArraysAsMultiValueStrings")
private final boolean processArraysAsMultiValueStrings;
@JsonProperty("homogenizeNullMultiValueStringArrays")
private final boolean homogenizeNullMultiValueStringArrays;
@JsonCreator
public ExpressionProcessingConfig(
@JsonProperty("allowNestedArrays") @Nullable Boolean allowNestedArrays,
@JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans,
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings,
@JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean homogenizeNullMultiValueStringArrays
)
{
this.allowNestedArrays = allowNestedArrays == null
? Boolean.valueOf(System.getProperty(NESTED_ARRAYS_CONFIG_STRING, "false"))
: allowNestedArrays;
if (useStrictBooleans == null) {
this.useStrictBooleans = Boolean.parseBoolean(
System.getProperty(NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING, "false")
);
} else {
this.useStrictBooleans = useStrictBooleans;
}
this.processArraysAsMultiValueStrings
= processArraysAsMultiValueStrings == null
? Boolean.valueOf(System.getProperty(PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING, "false"))
: processArraysAsMultiValueStrings;
this.allowNestedArrays = getWithPropertyFallbackFalse(allowNestedArrays, NESTED_ARRAYS_CONFIG_STRING);
this.useStrictBooleans = getWithPropertyFallbackFalse(useStrictBooleans, NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING);
this.processArraysAsMultiValueStrings = getWithPropertyFallbackFalse(
processArraysAsMultiValueStrings,
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING
);
this.homogenizeNullMultiValueStringArrays = getWithPropertyFallbackFalse(
homogenizeNullMultiValueStringArrays,
HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS
);
}
public boolean allowNestedArrays()
@ -78,4 +81,14 @@ public class ExpressionProcessingConfig
{
return processArraysAsMultiValueStrings;
}
public boolean isHomogenizeNullMultiValueStringArrays()
{
return homogenizeNullMultiValueStringArrays;
}
private static boolean getWithPropertyFallbackFalse(@Nullable Boolean value, String property)
{
return value != null ? value : Boolean.valueOf(System.getProperty(property, "false"));
}
}

View File

@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.NonnullPair;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
@ -256,30 +257,43 @@ public class ExpressionSelectors
final List<String> columns = plan.getAnalysis().getRequiredBindingsList();
final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new HashMap<>();
for (String columnName : columns) {
final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(columnName);
final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue();
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName);
final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue();
final Supplier<Object> supplier;
final ExpressionType expressionType = ExpressionType.fromColumnType(columnCapabilities);
final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities);
if (columnCapabilities == null ||
columnCapabilities.isArray() ||
(plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT) && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED))
) {
// Unknown ValueType or array type. Try making an Object selector and see if that gives us anything useful.
final boolean useObjectSupplierForMultiValueStringArray =
capabilities != null
// if homogenizing null multi-value string arrays, or if a single valued function that must be applied across
// multi-value rows, we can just use the dimension selector, which has the homogenization behavior built-in
&& ((!capabilities.is(ValueType.STRING))
|| (capabilities.is(ValueType.STRING)
&& !ExpressionProcessing.isHomogenizeNullMultiValueStringArrays()
&& !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
)
)
// expression has array output
&& plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
final boolean homogenizeNullMultiValueStringArrays =
plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) || ExpressionProcessing.isHomogenizeNullMultiValueStringArrays();
if (capabilities == null || capabilities.isArray() || useObjectSupplierForMultiValueStringArray) {
// Unknown type, array type, or output array uses an Object selector and see if that gives anything useful
supplier = supplierFromObjectSelector(
columnSelectorFactory.makeColumnValueSelector(columnName),
plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
homogenizeNullMultiValueStringArrays
);
} else if (columnCapabilities.is(ValueType.FLOAT)) {
} else if (capabilities.is(ValueType.FLOAT)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getFloat);
} else if (columnCapabilities.is(ValueType.LONG)) {
} else if (capabilities.is(ValueType.LONG)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getLong);
} else if (columnCapabilities.is(ValueType.DOUBLE)) {
} else if (capabilities.is(ValueType.DOUBLE)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getDouble);
} else if (columnCapabilities.is(ValueType.STRING)) {
} else if (capabilities.is(ValueType.STRING)) {
supplier = supplierFromDimensionSelector(
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)),
multiVal

View File

@ -33,6 +33,7 @@ import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.aggregation.AggregationTestHelper;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
@ -502,6 +503,57 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
}
@Test
public void testGroupByExpressionMultiMultiBackwardsCompat0dot22andOlder()
{
try {
ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
}
GroupByQuery query = GroupByQuery
.builder()
.setDataSource("xx")
.setQuerySegmentSpec(new LegacySegmentSpec("1970/3000"))
.setGranularity(Granularities.ALL)
.setDimensions(new DefaultDimensionSpec("texpr", "texpr"))
.setVirtualColumns(
new ExpressionVirtualColumn(
"texpr",
"cartesian_map((x,y) -> concat(x, y), tags, othertags)",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
)
)
.setLimit(5)
.setAggregatorSpecs(new CountAggregatorFactory("count"))
.setContext(context)
.build();
Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(
ImmutableList.of(
new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")),
new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))
),
query
);
List<ResultRow> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u1", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u2", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u1", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u2", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t3u1", "count", 2L)
);
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
}
finally {
ExpressionProcessing.initializeForTests(null);
}
}
@Test
public void testGroupByExpressionMultiMultiAuto()
{

View File

@ -21,7 +21,6 @@ package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import junitparams.JUnitParamsRunner;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.HumanReadableBytes;
import org.apache.druid.java.util.common.IAE;
@ -56,7 +55,6 @@ import org.apache.druid.segment.join.JoinType;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.Arrays;
import java.util.Collections;
@ -65,7 +63,6 @@ import java.util.List;
/**
* Tests for array functions and array types
*/
@RunWith(JUnitParamsRunner.class)
public class CalciteArraysQueryTest extends BaseCalciteQueryTest
{
// test some query stuffs, sort of limited since no native array column types so either need to use constructor or

View File

@ -21,9 +21,9 @@ package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import junitparams.JUnitParamsRunner;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.Druids;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
@ -42,11 +42,9 @@ import org.apache.druid.sql.SqlPlanningException;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.List;
@RunWith(JUnitParamsRunner.class)
public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
{
// various queries on multi-valued string dimensions using them like strings
@ -655,6 +653,70 @@ public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testMultiValueStringConcatBackwardsCompat0dot22andOlder() throws Exception
{
try {
ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
// Cannot vectorize due to usage of expressions.
cannotVectorize();
ImmutableList<Object[]> results;
if (useDefault) {
results = ImmutableList.of(
new Object[]{"", 6L},
new Object[]{"b", 4L},
new Object[]{"a", 2L},
new Object[]{"c", 2L},
new Object[]{"d", 2L}
);
} else {
results = ImmutableList.of(
new Object[]{null, 4L},
new Object[]{"b", 4L},
new Object[]{"", 2L},
new Object[]{"a", 2L},
new Object[]{"c", 2L},
new Object[]{"d", 2L}
);
}
testQuery(
"SELECT MV_CONCAT(dim3, dim3), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE3)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(expressionVirtualColumn(
"v0",
"array_concat(\"dim3\",\"dim3\")",
ColumnType.STRING
))
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setLimitSpec(new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec(
"a0",
OrderByColumnSpec.Direction.DESCENDING,
StringComparators.NUMERIC
)),
Integer.MAX_VALUE
))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
results
);
}
finally {
ExpressionProcessing.initializeForTests(null);
}
}
@Test
public void testMultiValueStringOffset() throws Exception
{

View File

@ -20,7 +20,6 @@
package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList;
import junitparams.JUnitParamsRunner;
import org.apache.calcite.avatica.SqlType;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.DateTimes;
@ -43,7 +42,6 @@ import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.http.SqlParameter;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.ArrayList;
import java.util.List;
@ -54,7 +52,6 @@ import java.util.List;
* were merely chosen to produce a selection of parameter types and positions within query expressions and have been
* renamed to reflect this
*/
@RunWith(JUnitParamsRunner.class)
public class CalciteParameterQueryTest extends BaseCalciteQueryTest
{
@Test