add backwards compatibility mode for multi-value string array null value coercion (#12210)

This commit is contained in:
Clint Wylie 2022-01-31 22:38:15 -08:00 committed by GitHub
parent 978b8f7dde
commit f9b406c8f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 209 additions and 56 deletions

View File

@ -48,13 +48,19 @@ public class ExpressionProcessing
@VisibleForTesting @VisibleForTesting
public static void initializeForTests(@Nullable Boolean allowNestedArrays) public static void initializeForTests(@Nullable Boolean allowNestedArrays)
{ {
INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null); INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null, null);
} }
@VisibleForTesting @VisibleForTesting
public static void initializeForStrictBooleansTests(boolean useStrict) public static void initializeForStrictBooleansTests(boolean useStrict)
{ {
INSTANCE = new ExpressionProcessingConfig(null, useStrict, null); INSTANCE = new ExpressionProcessingConfig(null, useStrict, null, null);
}
@VisibleForTesting
public static void initializeForHomogenizeNullMultiValueStrings()
{
INSTANCE = new ExpressionProcessingConfig(null, null, null, true);
} }
/** /**
@ -62,35 +68,47 @@ public class ExpressionProcessing
*/ */
public static boolean allowNestedArrays() public static boolean allowNestedArrays()
{ {
// this should only be null in a unit test context checkInitialized();
// in production this will be injected by the expression processing module
if (INSTANCE == null) {
throw new IllegalStateException(
"Expressions module not initialized, call ExpressionProcessing.initializeForTests()"
);
}
return INSTANCE.allowNestedArrays(); return INSTANCE.allowNestedArrays();
} }
/**
* All boolean expressions are {@link ExpressionType#LONG}
*/
public static boolean useStrictBooleans() public static boolean useStrictBooleans()
{ {
// this should only be null in a unit test context, in production this will be injected by the null handling module checkInitialized();
if (INSTANCE == null) {
throw new IllegalStateException("ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()");
}
return INSTANCE.isUseStrictBooleans(); return INSTANCE.isUseStrictBooleans();
} }
/**
* All {@link ExprType#ARRAY} values will be converted to {@link ExpressionType#STRING} by their column selectors
* (not within expression processing) to be treated as multi-value strings instead of native arrays.
*/
public static boolean processArraysAsMultiValueStrings() public static boolean processArraysAsMultiValueStrings()
{
checkInitialized();
return INSTANCE.processArraysAsMultiValueStrings();
}
/**
* All multi-value string expression input values of 'null', '[]', and '[null]' will be coerced to '[null]'. If false,
* (the default) this will only be done when single value expressions are implicitly mapped across multi-value rows,
* so that the single valued expression will always be evaluated with an input value of 'null'
*/
public static boolean isHomogenizeNullMultiValueStringArrays()
{
checkInitialized();
return INSTANCE.isHomogenizeNullMultiValueStringArrays();
}
private static void checkInitialized()
{ {
// this should only be null in a unit test context, in production this will be injected by the null handling module // this should only be null in a unit test context, in production this will be injected by the null handling module
if (INSTANCE == null) { if (INSTANCE == null) {
throw new IllegalStateException( throw new IllegalStateException(
"ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()" "ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests() or one of its variants"
); );
} }
return INSTANCE.processArraysAsMultiValueStrings();
} }
} }

View File

@ -29,8 +29,11 @@ public class ExpressionProcessingConfig
public static final String NESTED_ARRAYS_CONFIG_STRING = "druid.expressions.allowNestedArrays"; public static final String NESTED_ARRAYS_CONFIG_STRING = "druid.expressions.allowNestedArrays";
public static final String NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING = "druid.expressions.useStrictBooleans"; public static final String NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING = "druid.expressions.useStrictBooleans";
// Coerce arrays to multi value strings // Coerce arrays to multi value strings
public static final String public static final String PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING =
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING = "druid.expressions.processArraysAsMultiValueStrings"; "druid.expressions.processArraysAsMultiValueStrings";
// Coerce 'null', '[]', and '[null]' into '[null]' for backwards compat with 0.22 and earlier
public static final String HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS =
"druid.expressions.homogenizeNullMultiValueStringArrays";
@JsonProperty("allowNestedArrays") @JsonProperty("allowNestedArrays")
private final boolean allowNestedArrays; private final boolean allowNestedArrays;
@ -41,27 +44,27 @@ public class ExpressionProcessingConfig
@JsonProperty("processArraysAsMultiValueStrings") @JsonProperty("processArraysAsMultiValueStrings")
private final boolean processArraysAsMultiValueStrings; private final boolean processArraysAsMultiValueStrings;
@JsonProperty("homogenizeNullMultiValueStringArrays")
private final boolean homogenizeNullMultiValueStringArrays;
@JsonCreator @JsonCreator
public ExpressionProcessingConfig( public ExpressionProcessingConfig(
@JsonProperty("allowNestedArrays") @Nullable Boolean allowNestedArrays, @JsonProperty("allowNestedArrays") @Nullable Boolean allowNestedArrays,
@JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans, @JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans,
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings @JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings,
@JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean homogenizeNullMultiValueStringArrays
) )
{ {
this.allowNestedArrays = allowNestedArrays == null this.allowNestedArrays = getWithPropertyFallbackFalse(allowNestedArrays, NESTED_ARRAYS_CONFIG_STRING);
? Boolean.valueOf(System.getProperty(NESTED_ARRAYS_CONFIG_STRING, "false")) this.useStrictBooleans = getWithPropertyFallbackFalse(useStrictBooleans, NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING);
: allowNestedArrays; this.processArraysAsMultiValueStrings = getWithPropertyFallbackFalse(
if (useStrictBooleans == null) { processArraysAsMultiValueStrings,
this.useStrictBooleans = Boolean.parseBoolean( PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING
System.getProperty(NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING, "false") );
this.homogenizeNullMultiValueStringArrays = getWithPropertyFallbackFalse(
homogenizeNullMultiValueStringArrays,
HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS
); );
} else {
this.useStrictBooleans = useStrictBooleans;
}
this.processArraysAsMultiValueStrings
= processArraysAsMultiValueStrings == null
? Boolean.valueOf(System.getProperty(PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING, "false"))
: processArraysAsMultiValueStrings;
} }
public boolean allowNestedArrays() public boolean allowNestedArrays()
@ -78,4 +81,14 @@ public class ExpressionProcessingConfig
{ {
return processArraysAsMultiValueStrings; return processArraysAsMultiValueStrings;
} }
public boolean isHomogenizeNullMultiValueStringArrays()
{
return homogenizeNullMultiValueStringArrays;
}
private static boolean getWithPropertyFallbackFalse(@Nullable Boolean value, String property)
{
return value != null ? value : Boolean.valueOf(System.getProperty(property, "false"));
}
} }

View File

@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.NonnullPair;
import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings; import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DefaultDimensionSpec;
@ -256,30 +257,43 @@ public class ExpressionSelectors
final List<String> columns = plan.getAnalysis().getRequiredBindingsList(); final List<String> columns = plan.getAnalysis().getRequiredBindingsList();
final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new HashMap<>(); final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new HashMap<>();
for (String columnName : columns) { for (String columnName : columns) {
final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(columnName); final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName);
final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue(); final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue();
final Supplier<Object> supplier; final Supplier<Object> supplier;
final ExpressionType expressionType = ExpressionType.fromColumnType(columnCapabilities); final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities);
if (columnCapabilities == null || final boolean useObjectSupplierForMultiValueStringArray =
columnCapabilities.isArray() || capabilities != null
(plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT) && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)) // if homogenizing null multi-value string arrays, or if a single valued function that must be applied across
) { // multi-value rows, we can just use the dimension selector, which has the homogenization behavior built-in
// Unknown ValueType or array type. Try making an Object selector and see if that gives us anything useful. && ((!capabilities.is(ValueType.STRING))
|| (capabilities.is(ValueType.STRING)
&& !ExpressionProcessing.isHomogenizeNullMultiValueStringArrays()
&& !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
)
)
// expression has array output
&& plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
final boolean homogenizeNullMultiValueStringArrays =
plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) || ExpressionProcessing.isHomogenizeNullMultiValueStringArrays();
if (capabilities == null || capabilities.isArray() || useObjectSupplierForMultiValueStringArray) {
// Unknown type, array type, or output array uses an Object selector and see if that gives anything useful
supplier = supplierFromObjectSelector( supplier = supplierFromObjectSelector(
columnSelectorFactory.makeColumnValueSelector(columnName), columnSelectorFactory.makeColumnValueSelector(columnName),
plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) homogenizeNullMultiValueStringArrays
); );
} else if (columnCapabilities.is(ValueType.FLOAT)) { } else if (capabilities.is(ValueType.FLOAT)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName); ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getFloat); supplier = makeNullableNumericSupplier(selector, selector::getFloat);
} else if (columnCapabilities.is(ValueType.LONG)) { } else if (capabilities.is(ValueType.LONG)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName); ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getLong); supplier = makeNullableNumericSupplier(selector, selector::getLong);
} else if (columnCapabilities.is(ValueType.DOUBLE)) { } else if (capabilities.is(ValueType.DOUBLE)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName); ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getDouble); supplier = makeNullableNumericSupplier(selector, selector::getDouble);
} else if (columnCapabilities.is(ValueType.STRING)) { } else if (capabilities.is(ValueType.STRING)) {
supplier = supplierFromDimensionSelector( supplier = supplierFromDimensionSelector(
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)),
multiVal multiVal

View File

@ -33,6 +33,7 @@ import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.aggregation.AggregationTestHelper;
import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DefaultDimensionSpec;
@ -502,6 +503,57 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi"); TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
} }
@Test
public void testGroupByExpressionMultiMultiBackwardsCompat0dot22andOlder()
{
try {
ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
}
GroupByQuery query = GroupByQuery
.builder()
.setDataSource("xx")
.setQuerySegmentSpec(new LegacySegmentSpec("1970/3000"))
.setGranularity(Granularities.ALL)
.setDimensions(new DefaultDimensionSpec("texpr", "texpr"))
.setVirtualColumns(
new ExpressionVirtualColumn(
"texpr",
"cartesian_map((x,y) -> concat(x, y), tags, othertags)",
ColumnType.STRING,
TestExprMacroTable.INSTANCE
)
)
.setLimit(5)
.setAggregatorSpecs(new CountAggregatorFactory("count"))
.setContext(context)
.build();
Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(
ImmutableList.of(
new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")),
new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))
),
query
);
List<ResultRow> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u1", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u2", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u1", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u2", "count", 2L),
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t3u1", "count", 2L)
);
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
}
finally {
ExpressionProcessing.initializeForTests(null);
}
}
@Test @Test
public void testGroupByExpressionMultiMultiAuto() public void testGroupByExpressionMultiMultiAuto()
{ {

View File

@ -21,7 +21,6 @@ package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import junitparams.JUnitParamsRunner;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.HumanReadableBytes; import org.apache.druid.java.util.common.HumanReadableBytes;
import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.IAE;
@ -56,7 +55,6 @@ import org.apache.druid.segment.join.JoinType;
import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -65,7 +63,6 @@ import java.util.List;
/** /**
* Tests for array functions and array types * Tests for array functions and array types
*/ */
@RunWith(JUnitParamsRunner.class)
public class CalciteArraysQueryTest extends BaseCalciteQueryTest public class CalciteArraysQueryTest extends BaseCalciteQueryTest
{ {
// test some query stuffs, sort of limited since no native array column types so either need to use constructor or // test some query stuffs, sort of limited since no native array column types so either need to use constructor or

View File

@ -21,9 +21,9 @@ package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import junitparams.JUnitParamsRunner;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.Druids; import org.apache.druid.query.Druids;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DefaultDimensionSpec;
@ -42,11 +42,9 @@ import org.apache.druid.sql.SqlPlanningException;
import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.calcite.util.CalciteTests;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.List; import java.util.List;
@RunWith(JUnitParamsRunner.class)
public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
{ {
// various queries on multi-valued string dimensions using them like strings // various queries on multi-valued string dimensions using them like strings
@ -655,6 +653,70 @@ public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
); );
} }
@Test
public void testMultiValueStringConcatBackwardsCompat0dot22andOlder() throws Exception
{
try {
ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
// Cannot vectorize due to usage of expressions.
cannotVectorize();
ImmutableList<Object[]> results;
if (useDefault) {
results = ImmutableList.of(
new Object[]{"", 6L},
new Object[]{"b", 4L},
new Object[]{"a", 2L},
new Object[]{"c", 2L},
new Object[]{"d", 2L}
);
} else {
results = ImmutableList.of(
new Object[]{null, 4L},
new Object[]{"b", 4L},
new Object[]{"", 2L},
new Object[]{"a", 2L},
new Object[]{"c", 2L},
new Object[]{"d", 2L}
);
}
testQuery(
"SELECT MV_CONCAT(dim3, dim3), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE3)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(expressionVirtualColumn(
"v0",
"array_concat(\"dim3\",\"dim3\")",
ColumnType.STRING
))
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)
)
)
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
.setLimitSpec(new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec(
"a0",
OrderByColumnSpec.Direction.DESCENDING,
StringComparators.NUMERIC
)),
Integer.MAX_VALUE
))
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
results
);
}
finally {
ExpressionProcessing.initializeForTests(null);
}
}
@Test @Test
public void testMultiValueStringOffset() throws Exception public void testMultiValueStringOffset() throws Exception
{ {

View File

@ -20,7 +20,6 @@
package org.apache.druid.sql.calcite; package org.apache.druid.sql.calcite;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import junitparams.JUnitParamsRunner;
import org.apache.calcite.avatica.SqlType; import org.apache.calcite.avatica.SqlType;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.DateTimes;
@ -43,7 +42,6 @@ import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests; import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.http.SqlParameter; import org.apache.druid.sql.http.SqlParameter;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -54,7 +52,6 @@ import java.util.List;
* were merely chosen to produce a selection of parameter types and positions within query expressions and have been * were merely chosen to produce a selection of parameter types and positions within query expressions and have been
* renamed to reflect this * renamed to reflect this
*/ */
@RunWith(JUnitParamsRunner.class)
public class CalciteParameterQueryTest extends BaseCalciteQueryTest public class CalciteParameterQueryTest extends BaseCalciteQueryTest
{ {
@Test @Test