Fix ExpressionPredicateIndexSupplier numeric replace-with-default behavior. (#16448)

* Fix ExpressionPredicateIndexSupplier numeric replace-with-default behavior.

In replace-with-default mode, null numeric values from the index should be
interpreted as zeroes by expressions. This makes the index supplier more
consistent with the behavior of the selectors created by the expression
virtual column.

* Fix test case.
This commit is contained in:
Gian Merlino 2024-05-15 02:41:47 -07:00 committed by GitHub
parent c419ae5f73
commit 0fb09445a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 248 additions and 2 deletions

View File

@ -20,6 +20,7 @@
package org.apache.druid.math.expr;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.query.filter.DruidDoublePredicate;
import org.apache.druid.query.filter.DruidFloatPredicate;
import org.apache.druid.query.filter.DruidLongPredicate;
@ -75,8 +76,32 @@ public class ExpressionPredicateIndexSupplier implements ColumnIndexSupplier
@Override
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
final java.util.function.Function<Object, ExprEval<?>> evalFunction =
inputValue -> expr.eval(InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue));
final java.util.function.Function<Object, ExprEval<?>> evalFunction;
if (NullHandling.sqlCompatible()) {
evalFunction =
inputValue -> expr.eval(InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue));
} else {
switch (inputType.getType()) {
case LONG:
evalFunction =
inputValue -> expr.eval(
InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue == null ? 0L : inputValue)
);
break;
case DOUBLE:
evalFunction =
inputValue -> expr.eval(
InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue == null ? 0.0 : inputValue)
);
break;
default:
evalFunction =
inputValue -> expr.eval(InputBindings.forInputSupplier(inputColumn, inputType, () -> inputValue));
}
}
return new DictionaryScanningBitmapIndex(inputColumnIndexes.getCardinality())
{

View File

@ -143,6 +143,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
new ExpressionVirtualColumn("vd0", "d0", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vf0", "f0", ColumnType.FLOAT, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vl0", "l0", ColumnType.LONG, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vd0-nvl-2", "nvl(vd0, 2.0)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vd0-add-sub", "d0 + (d0 - d0)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vf0-add-sub", "f0 + (f0 - f0)", ColumnType.FLOAT, TestExprMacroTable.INSTANCE),
new ExpressionVirtualColumn("vl0-add-sub", "l0 + (l0 - l0)", ColumnType.LONG, TestExprMacroTable.INSTANCE),

View File

@ -816,6 +816,22 @@ public class BoundFilterTest extends BaseFilterTest
? ImmutableList.of("0", "3", "7")
: ImmutableList.of("0")
);
assertFilterMatches(
new BoundDimFilter(
"vd0-nvl-2",
"0",
null,
true,
false,
false,
null,
StringComparators.NUMERIC
),
NullHandling.replaceWithDefault()
? ImmutableList.of("1", "3", "4", "5", "6")
: ImmutableList.of("1", "2", "3", "4", "5", "6", "7")
);
}
@Test

View File

@ -229,6 +229,12 @@ public class NullFilterTests
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
assertFilterMatches(NullFilter.forColumn("vd0-nvl-2"), ImmutableList.of());
assertFilterMatches(
NotDimFilter.of(NullFilter.forColumn("vd0-nvl-2")),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
assertFilterMatches(NullFilter.forColumn("vf0-add-sub"), ImmutableList.of());
assertFilterMatches(
NotDimFilter.of(NullFilter.forColumn("vf0-add-sub")),
@ -274,6 +280,12 @@ public class NullFilterTests
assertFilterMatches(NullFilter.forColumn("vl0"), ImmutableList.of("3"));
assertFilterMatches(NotDimFilter.of(NullFilter.forColumn("vl0")), ImmutableList.of("0", "1", "2", "4", "5"));
assertFilterMatches(NullFilter.forColumn("vd0-nvl-2"), ImmutableList.of());
assertFilterMatches(
NotDimFilter.of(NullFilter.forColumn("vd0-nvl-2")),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
if (NullHandling.sqlCompatible()) {
// these fail in default value mode that cannot be tested as numeric default values becuase of type
// mismatch for subtract operation

View File

@ -895,6 +895,21 @@ public class RangeFilterTests
: ImmutableList.of("0")
);
assertFilterMatches(
new RangeFilter(
"vd0-nvl-2",
ColumnType.DOUBLE,
0.0,
null,
true,
false,
null
),
NullHandling.replaceWithDefault()
? ImmutableList.of("1", "3", "4", "5", "6")
: ImmutableList.of("1", "2", "3", "4", "5", "6", "7")
);
if (NullHandling.sqlCompatible() || canTestNumericNullsAsDefaultValues) {
// these fail in default value mode that cannot be tested as numeric default values becuase of type
// mismatch for subtract operation

View File

@ -7145,4 +7145,181 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.build()
);
}
@Test
public void testNvlJsonValueDoubleMissingColumn()
{
testQuery(
"SELECT\n"
+ "JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE),\n"
+ "NVL(JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE), 1.0),\n"
+ "NVL(JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE), 1.0) > 0\n"
+ "FROM druid.nested\n"
+ "WHERE NVL(JSON_VALUE(nest, '$.nonexistent' RETURNING DOUBLE), 1.0) > 0\n"
+ "LIMIT 1",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
expressionVirtualColumn("v0", "nvl(\"v1\",1.0)", ColumnType.DOUBLE),
new NestedFieldVirtualColumn(
"nest",
"$.nonexistent",
"v1",
ColumnType.DOUBLE
),
expressionVirtualColumn("v2", "notnull(nvl(\"v1\",1.0))", ColumnType.LONG)
)
.filters(range("v0", ColumnType.LONG, NullHandling.sqlCompatible() ? 0.0 : "0", null, true, false))
.limit(1)
.columns("v0", "v1", "v2")
.build()
),
NullHandling.sqlCompatible()
? ImmutableList.of(new Object[]{null, 1.0, true})
: ImmutableList.of(),
RowSignature.builder()
.add("EXPR$0", ColumnType.DOUBLE)
.add("EXPR$1", ColumnType.DOUBLE)
.add("EXPR$2", ColumnType.LONG)
.build()
);
}
@Test
public void testNvlJsonValueDoubleSometimesMissing()
{
testQuery(
"SELECT\n"
+ "JSON_VALUE(nest, '$.y' RETURNING DOUBLE),\n"
+ "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0),\n"
+ "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0,\n"
+ "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) = 1.0\n"
+ "FROM druid.nested",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
new NestedFieldVirtualColumn("nest", "$.y", "v0", ColumnType.DOUBLE),
expressionVirtualColumn("v1", "nvl(\"v0\",1.0)", ColumnType.DOUBLE),
expressionVirtualColumn("v2", "(nvl(\"v0\",1.0) > 0)", ColumnType.LONG),
expressionVirtualColumn("v3", "(nvl(\"v0\",1.0) == 1.0)", ColumnType.LONG)
)
.columns("v0", "v1", "v2", "v3")
.build()
),
NullHandling.sqlCompatible()
? ImmutableList.of(
new Object[]{2.02, 2.02, true, false},
new Object[]{null, 1.0, true, true},
new Object[]{3.03, 3.03, true, false},
new Object[]{null, 1.0, true, true},
new Object[]{null, 1.0, true, true},
new Object[]{2.02, 2.02, true, false},
new Object[]{null, 1.0, true, true}
)
: ImmutableList.of(
new Object[]{2.02, 2.02, true, false},
new Object[]{null, 0.0, false, false},
new Object[]{3.03, 3.03, true, false},
new Object[]{null, 0.0, false, false},
new Object[]{null, 0.0, false, false},
new Object[]{2.02, 2.02, true, false},
new Object[]{null, 0.0, false, false}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.DOUBLE)
.add("EXPR$1", ColumnType.DOUBLE)
.add("EXPR$2", ColumnType.LONG)
.add("EXPR$3", ColumnType.LONG)
.build()
);
}
@Test
public void testNvlJsonValueDoubleSometimesMissingRangeFilter()
{
testQuery(
"SELECT\n"
+ "JSON_VALUE(nest, '$.y' RETURNING DOUBLE),\n"
+ "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0),\n"
+ "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0\n"
+ "FROM druid.nested\n"
+ "WHERE NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
expressionVirtualColumn("v0", "nvl(\"v1\",1.0)", ColumnType.DOUBLE),
new NestedFieldVirtualColumn("nest", "$.y", "v1", ColumnType.DOUBLE),
expressionVirtualColumn("v2", "notnull(nvl(\"v1\",1.0))", ColumnType.LONG)
)
.filters(range("v0", ColumnType.LONG, NullHandling.sqlCompatible() ? 0.0 : "0", null, true, false))
.columns("v0", "v1", "v2")
.build()
),
NullHandling.sqlCompatible()
? ImmutableList.of(
new Object[]{2.02, 2.02, true},
new Object[]{null, 1.0, true},
new Object[]{3.03, 3.03, true},
new Object[]{null, 1.0, true},
new Object[]{null, 1.0, true},
new Object[]{2.02, 2.02, true},
new Object[]{null, 1.0, true}
)
: ImmutableList.of(
new Object[]{2.02, 2.02, true},
new Object[]{3.03, 3.03, true},
new Object[]{2.02, 2.02, true}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.DOUBLE)
.add("EXPR$1", ColumnType.DOUBLE)
.add("EXPR$2", ColumnType.LONG)
.build()
);
}
@Test
public void testNvlJsonValueDoubleSometimesMissingEqualityFilter()
{
testQuery(
"SELECT\n"
+ "JSON_VALUE(nest, '$.y' RETURNING DOUBLE),\n"
+ "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0),\n"
+ "NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) > 0\n"
+ "FROM druid.nested\n"
+ "WHERE NVL(JSON_VALUE(nest, '$.y' RETURNING DOUBLE), 1.0) = 1.0",
ImmutableList.of(
newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
expressionVirtualColumn("v0", "nvl(\"v1\",1.0)", ColumnType.DOUBLE),
new NestedFieldVirtualColumn("nest", "$.y", "v1", ColumnType.DOUBLE),
expressionVirtualColumn("v2", "notnull(nvl(\"v1\",1.0))", ColumnType.LONG)
)
.filters(equality("v0", 1.0, ColumnType.DOUBLE))
.columns("v0", "v1", "v2")
.build()
),
NullHandling.sqlCompatible()
? ImmutableList.of(
new Object[]{null, 1.0, true},
new Object[]{null, 1.0, true},
new Object[]{null, 1.0, true},
new Object[]{null, 1.0, true}
)
: ImmutableList.of(),
RowSignature.builder()
.add("EXPR$0", ColumnType.DOUBLE)
.add("EXPR$1", ColumnType.DOUBLE)
.add("EXPR$2", ColumnType.LONG)
.build()
);
}
}