mirror of https://github.com/apache/druid.git
add backwards compatibility mode for multi-value string array null value coercion (#12210)
This commit is contained in:
parent
978b8f7dde
commit
f9b406c8f2
|
@ -48,13 +48,19 @@ public class ExpressionProcessing
|
|||
@VisibleForTesting
|
||||
public static void initializeForTests(@Nullable Boolean allowNestedArrays)
|
||||
{
|
||||
INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null);
|
||||
INSTANCE = new ExpressionProcessingConfig(allowNestedArrays, null, null, null);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public static void initializeForStrictBooleansTests(boolean useStrict)
|
||||
{
|
||||
INSTANCE = new ExpressionProcessingConfig(null, useStrict, null);
|
||||
INSTANCE = new ExpressionProcessingConfig(null, useStrict, null, null);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public static void initializeForHomogenizeNullMultiValueStrings()
|
||||
{
|
||||
INSTANCE = new ExpressionProcessingConfig(null, null, null, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -62,35 +68,47 @@ public class ExpressionProcessing
|
|||
*/
|
||||
public static boolean allowNestedArrays()
|
||||
{
|
||||
// this should only be null in a unit test context
|
||||
// in production this will be injected by the expression processing module
|
||||
if (INSTANCE == null) {
|
||||
throw new IllegalStateException(
|
||||
"Expressions module not initialized, call ExpressionProcessing.initializeForTests()"
|
||||
);
|
||||
}
|
||||
checkInitialized();
|
||||
return INSTANCE.allowNestedArrays();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* All boolean expressions are {@link ExpressionType#LONG}
|
||||
*/
|
||||
public static boolean useStrictBooleans()
|
||||
{
|
||||
// this should only be null in a unit test context, in production this will be injected by the null handling module
|
||||
if (INSTANCE == null) {
|
||||
throw new IllegalStateException("ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()");
|
||||
}
|
||||
checkInitialized();
|
||||
return INSTANCE.isUseStrictBooleans();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* All {@link ExprType#ARRAY} values will be converted to {@link ExpressionType#STRING} by their column selectors
|
||||
* (not within expression processing) to be treated as multi-value strings instead of native arrays.
|
||||
*/
|
||||
public static boolean processArraysAsMultiValueStrings()
|
||||
{
|
||||
checkInitialized();
|
||||
return INSTANCE.processArraysAsMultiValueStrings();
|
||||
}
|
||||
|
||||
/**
|
||||
* All multi-value string expression input values of 'null', '[]', and '[null]' will be coerced to '[null]'. If false,
|
||||
* (the default) this will only be done when single value expressions are implicitly mapped across multi-value rows,
|
||||
* so that the single valued expression will always be evaluated with an input value of 'null'
|
||||
*/
|
||||
public static boolean isHomogenizeNullMultiValueStringArrays()
|
||||
{
|
||||
checkInitialized();
|
||||
return INSTANCE.isHomogenizeNullMultiValueStringArrays();
|
||||
}
|
||||
|
||||
private static void checkInitialized()
|
||||
{
|
||||
// this should only be null in a unit test context, in production this will be injected by the null handling module
|
||||
if (INSTANCE == null) {
|
||||
throw new IllegalStateException(
|
||||
"ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests()"
|
||||
"ExpressionProcessing module not initialized, call ExpressionProcessing.initializeForTests() or one of its variants"
|
||||
);
|
||||
}
|
||||
return INSTANCE.processArraysAsMultiValueStrings();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,8 +29,11 @@ public class ExpressionProcessingConfig
|
|||
public static final String NESTED_ARRAYS_CONFIG_STRING = "druid.expressions.allowNestedArrays";
|
||||
public static final String NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING = "druid.expressions.useStrictBooleans";
|
||||
// Coerce arrays to multi value strings
|
||||
public static final String
|
||||
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING = "druid.expressions.processArraysAsMultiValueStrings";
|
||||
public static final String PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING =
|
||||
"druid.expressions.processArraysAsMultiValueStrings";
|
||||
// Coerce 'null', '[]', and '[null]' into '[null]' for backwards compat with 0.22 and earlier
|
||||
public static final String HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS =
|
||||
"druid.expressions.homogenizeNullMultiValueStringArrays";
|
||||
|
||||
@JsonProperty("allowNestedArrays")
|
||||
private final boolean allowNestedArrays;
|
||||
|
@ -41,27 +44,27 @@ public class ExpressionProcessingConfig
|
|||
@JsonProperty("processArraysAsMultiValueStrings")
|
||||
private final boolean processArraysAsMultiValueStrings;
|
||||
|
||||
@JsonProperty("homogenizeNullMultiValueStringArrays")
|
||||
private final boolean homogenizeNullMultiValueStringArrays;
|
||||
|
||||
@JsonCreator
|
||||
public ExpressionProcessingConfig(
|
||||
@JsonProperty("allowNestedArrays") @Nullable Boolean allowNestedArrays,
|
||||
@JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans,
|
||||
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings
|
||||
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings,
|
||||
@JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean homogenizeNullMultiValueStringArrays
|
||||
)
|
||||
{
|
||||
this.allowNestedArrays = allowNestedArrays == null
|
||||
? Boolean.valueOf(System.getProperty(NESTED_ARRAYS_CONFIG_STRING, "false"))
|
||||
: allowNestedArrays;
|
||||
if (useStrictBooleans == null) {
|
||||
this.useStrictBooleans = Boolean.parseBoolean(
|
||||
System.getProperty(NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING, "false")
|
||||
this.allowNestedArrays = getWithPropertyFallbackFalse(allowNestedArrays, NESTED_ARRAYS_CONFIG_STRING);
|
||||
this.useStrictBooleans = getWithPropertyFallbackFalse(useStrictBooleans, NULL_HANDLING_LEGACY_LOGICAL_OPS_STRING);
|
||||
this.processArraysAsMultiValueStrings = getWithPropertyFallbackFalse(
|
||||
processArraysAsMultiValueStrings,
|
||||
PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING
|
||||
);
|
||||
this.homogenizeNullMultiValueStringArrays = getWithPropertyFallbackFalse(
|
||||
homogenizeNullMultiValueStringArrays,
|
||||
HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS
|
||||
);
|
||||
} else {
|
||||
this.useStrictBooleans = useStrictBooleans;
|
||||
}
|
||||
this.processArraysAsMultiValueStrings
|
||||
= processArraysAsMultiValueStrings == null
|
||||
? Boolean.valueOf(System.getProperty(PROCESS_ARRAYS_AS_MULTIVALUE_STRINGS_CONFIG_STRING, "false"))
|
||||
: processArraysAsMultiValueStrings;
|
||||
}
|
||||
|
||||
public boolean allowNestedArrays()
|
||||
|
@ -78,4 +81,14 @@ public class ExpressionProcessingConfig
|
|||
{
|
||||
return processArraysAsMultiValueStrings;
|
||||
}
|
||||
|
||||
public boolean isHomogenizeNullMultiValueStringArrays()
|
||||
{
|
||||
return homogenizeNullMultiValueStringArrays;
|
||||
}
|
||||
|
||||
private static boolean getWithPropertyFallbackFalse(@Nullable Boolean value, String property)
|
||||
{
|
||||
return value != null ? value : Boolean.valueOf(System.getProperty(property, "false"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.NonnullPair;
|
|||
import org.apache.druid.java.util.common.Pair;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.math.expr.ExpressionProcessing;
|
||||
import org.apache.druid.math.expr.ExpressionType;
|
||||
import org.apache.druid.math.expr.InputBindings;
|
||||
import org.apache.druid.query.dimension.DefaultDimensionSpec;
|
||||
|
@ -256,30 +257,43 @@ public class ExpressionSelectors
|
|||
final List<String> columns = plan.getAnalysis().getRequiredBindingsList();
|
||||
final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new HashMap<>();
|
||||
for (String columnName : columns) {
|
||||
final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(columnName);
|
||||
final boolean multiVal = columnCapabilities != null && columnCapabilities.hasMultipleValues().isTrue();
|
||||
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName);
|
||||
final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue();
|
||||
final Supplier<Object> supplier;
|
||||
final ExpressionType expressionType = ExpressionType.fromColumnType(columnCapabilities);
|
||||
final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities);
|
||||
|
||||
if (columnCapabilities == null ||
|
||||
columnCapabilities.isArray() ||
|
||||
(plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT) && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED))
|
||||
) {
|
||||
// Unknown ValueType or array type. Try making an Object selector and see if that gives us anything useful.
|
||||
final boolean useObjectSupplierForMultiValueStringArray =
|
||||
capabilities != null
|
||||
// if homogenizing null multi-value string arrays, or if a single valued function that must be applied across
|
||||
// multi-value rows, we can just use the dimension selector, which has the homogenization behavior built-in
|
||||
&& ((!capabilities.is(ValueType.STRING))
|
||||
|| (capabilities.is(ValueType.STRING)
|
||||
&& !ExpressionProcessing.isHomogenizeNullMultiValueStringArrays()
|
||||
&& !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
|
||||
)
|
||||
)
|
||||
// expression has array output
|
||||
&& plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
|
||||
|
||||
final boolean homogenizeNullMultiValueStringArrays =
|
||||
plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) || ExpressionProcessing.isHomogenizeNullMultiValueStringArrays();
|
||||
|
||||
if (capabilities == null || capabilities.isArray() || useObjectSupplierForMultiValueStringArray) {
|
||||
// Unknown type, array type, or output array uses an Object selector and see if that gives anything useful
|
||||
supplier = supplierFromObjectSelector(
|
||||
columnSelectorFactory.makeColumnValueSelector(columnName),
|
||||
plan.is(ExpressionPlan.Trait.NEEDS_APPLIED)
|
||||
homogenizeNullMultiValueStringArrays
|
||||
);
|
||||
} else if (columnCapabilities.is(ValueType.FLOAT)) {
|
||||
} else if (capabilities.is(ValueType.FLOAT)) {
|
||||
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
|
||||
supplier = makeNullableNumericSupplier(selector, selector::getFloat);
|
||||
} else if (columnCapabilities.is(ValueType.LONG)) {
|
||||
} else if (capabilities.is(ValueType.LONG)) {
|
||||
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
|
||||
supplier = makeNullableNumericSupplier(selector, selector::getLong);
|
||||
} else if (columnCapabilities.is(ValueType.DOUBLE)) {
|
||||
} else if (capabilities.is(ValueType.DOUBLE)) {
|
||||
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
|
||||
supplier = makeNullableNumericSupplier(selector, selector::getDouble);
|
||||
} else if (columnCapabilities.is(ValueType.STRING)) {
|
||||
} else if (capabilities.is(ValueType.STRING)) {
|
||||
supplier = supplierFromDimensionSelector(
|
||||
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)),
|
||||
multiVal
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.druid.java.util.common.DateTimes;
|
|||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
import org.apache.druid.java.util.common.guava.Sequence;
|
||||
import org.apache.druid.math.expr.ExpressionProcessing;
|
||||
import org.apache.druid.query.aggregation.AggregationTestHelper;
|
||||
import org.apache.druid.query.aggregation.CountAggregatorFactory;
|
||||
import org.apache.druid.query.dimension.DefaultDimensionSpec;
|
||||
|
@ -502,6 +503,57 @@ public class MultiValuedDimensionTest extends InitializedNullHandlingTest
|
|||
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByExpressionMultiMultiBackwardsCompat0dot22andOlder()
|
||||
{
|
||||
try {
|
||||
ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
|
||||
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
|
||||
expectedException.expect(RuntimeException.class);
|
||||
expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
|
||||
}
|
||||
GroupByQuery query = GroupByQuery
|
||||
.builder()
|
||||
.setDataSource("xx")
|
||||
.setQuerySegmentSpec(new LegacySegmentSpec("1970/3000"))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setDimensions(new DefaultDimensionSpec("texpr", "texpr"))
|
||||
.setVirtualColumns(
|
||||
new ExpressionVirtualColumn(
|
||||
"texpr",
|
||||
"cartesian_map((x,y) -> concat(x, y), tags, othertags)",
|
||||
ColumnType.STRING,
|
||||
TestExprMacroTable.INSTANCE
|
||||
)
|
||||
)
|
||||
.setLimit(5)
|
||||
.setAggregatorSpecs(new CountAggregatorFactory("count"))
|
||||
.setContext(context)
|
||||
.build();
|
||||
|
||||
Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(
|
||||
ImmutableList.of(
|
||||
new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")),
|
||||
new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))
|
||||
),
|
||||
query
|
||||
);
|
||||
|
||||
List<ResultRow> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u1", "count", 2L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t1u2", "count", 2L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u1", "count", 2L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t2u2", "count", 2L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "texpr", "t3u1", "count", 2L)
|
||||
);
|
||||
|
||||
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-multi-multi");
|
||||
}
|
||||
finally {
|
||||
ExpressionProcessing.initializeForTests(null);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByExpressionMultiMultiAuto()
|
||||
{
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.apache.druid.sql.calcite;
|
|||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import junitparams.JUnitParamsRunner;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.HumanReadableBytes;
|
||||
import org.apache.druid.java.util.common.IAE;
|
||||
|
@ -56,7 +55,6 @@ import org.apache.druid.segment.join.JoinType;
|
|||
import org.apache.druid.sql.calcite.filtration.Filtration;
|
||||
import org.apache.druid.sql.calcite.util.CalciteTests;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
@ -65,7 +63,6 @@ import java.util.List;
|
|||
/**
|
||||
* Tests for array functions and array types
|
||||
*/
|
||||
@RunWith(JUnitParamsRunner.class)
|
||||
public class CalciteArraysQueryTest extends BaseCalciteQueryTest
|
||||
{
|
||||
// test some query stuffs, sort of limited since no native array column types so either need to use constructor or
|
||||
|
|
|
@ -21,9 +21,9 @@ package org.apache.druid.sql.calcite;
|
|||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import junitparams.JUnitParamsRunner;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
import org.apache.druid.math.expr.ExpressionProcessing;
|
||||
import org.apache.druid.query.Druids;
|
||||
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
|
||||
import org.apache.druid.query.dimension.DefaultDimensionSpec;
|
||||
|
@ -42,11 +42,9 @@ import org.apache.druid.sql.SqlPlanningException;
|
|||
import org.apache.druid.sql.calcite.filtration.Filtration;
|
||||
import org.apache.druid.sql.calcite.util.CalciteTests;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@RunWith(JUnitParamsRunner.class)
|
||||
public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
|
||||
{
|
||||
// various queries on multi-valued string dimensions using them like strings
|
||||
|
@ -655,6 +653,70 @@ public class CalciteMultiValueStringQueryTest extends BaseCalciteQueryTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiValueStringConcatBackwardsCompat0dot22andOlder() throws Exception
|
||||
{
|
||||
try {
|
||||
ExpressionProcessing.initializeForHomogenizeNullMultiValueStrings();
|
||||
// Cannot vectorize due to usage of expressions.
|
||||
cannotVectorize();
|
||||
|
||||
ImmutableList<Object[]> results;
|
||||
if (useDefault) {
|
||||
results = ImmutableList.of(
|
||||
new Object[]{"", 6L},
|
||||
new Object[]{"b", 4L},
|
||||
new Object[]{"a", 2L},
|
||||
new Object[]{"c", 2L},
|
||||
new Object[]{"d", 2L}
|
||||
);
|
||||
} else {
|
||||
results = ImmutableList.of(
|
||||
new Object[]{null, 4L},
|
||||
new Object[]{"b", 4L},
|
||||
new Object[]{"", 2L},
|
||||
new Object[]{"a", 2L},
|
||||
new Object[]{"c", 2L},
|
||||
new Object[]{"d", 2L}
|
||||
);
|
||||
}
|
||||
testQuery(
|
||||
"SELECT MV_CONCAT(dim3, dim3), SUM(cnt) FROM druid.numfoo GROUP BY 1 ORDER BY 2 DESC",
|
||||
ImmutableList.of(
|
||||
GroupByQuery.builder()
|
||||
.setDataSource(CalciteTests.DATASOURCE3)
|
||||
.setInterval(querySegmentSpec(Filtration.eternity()))
|
||||
.setGranularity(Granularities.ALL)
|
||||
.setVirtualColumns(expressionVirtualColumn(
|
||||
"v0",
|
||||
"array_concat(\"dim3\",\"dim3\")",
|
||||
ColumnType.STRING
|
||||
))
|
||||
.setDimensions(
|
||||
dimensions(
|
||||
new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt")))
|
||||
.setLimitSpec(new DefaultLimitSpec(
|
||||
ImmutableList.of(new OrderByColumnSpec(
|
||||
"a0",
|
||||
OrderByColumnSpec.Direction.DESCENDING,
|
||||
StringComparators.NUMERIC
|
||||
)),
|
||||
Integer.MAX_VALUE
|
||||
))
|
||||
.setContext(QUERY_CONTEXT_DEFAULT)
|
||||
.build()
|
||||
),
|
||||
results
|
||||
);
|
||||
}
|
||||
finally {
|
||||
ExpressionProcessing.initializeForTests(null);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiValueStringOffset() throws Exception
|
||||
{
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
package org.apache.druid.sql.calcite;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import junitparams.JUnitParamsRunner;
|
||||
import org.apache.calcite.avatica.SqlType;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.DateTimes;
|
||||
|
@ -43,7 +42,6 @@ import org.apache.druid.sql.calcite.filtration.Filtration;
|
|||
import org.apache.druid.sql.calcite.util.CalciteTests;
|
||||
import org.apache.druid.sql.http.SqlParameter;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -54,7 +52,6 @@ import java.util.List;
|
|||
* were merely chosen to produce a selection of parameter types and positions within query expressions and have been
|
||||
* renamed to reflect this
|
||||
*/
|
||||
@RunWith(JUnitParamsRunner.class)
|
||||
public class CalciteParameterQueryTest extends BaseCalciteQueryTest
|
||||
{
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue