fix array presenting columns to not match single element arrays to scalars for equality (#15503)

* fix array presenting columns to not match single element arrays to scalars for equality
* update docs to clarify usage model of mixed type columns
This commit is contained in:
Clint Wylie 2023-12-08 01:22:07 -08:00 committed by GitHub
parent 5fda8613ad
commit 1eafe983ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 48 additions and 14 deletions

View File

@ -263,13 +263,18 @@ native boolean types, Druid ingests these values as longs if `druid.expressions.
the [array functions](../querying/sql-array-functions.md) or [UNNEST](../querying/sql-functions.md#unnest). Nested the [array functions](../querying/sql-array-functions.md) or [UNNEST](../querying/sql-functions.md#unnest). Nested
columns can be queried with the [JSON functions](../querying/sql-json-functions.md). columns can be queried with the [JSON functions](../querying/sql-json-functions.md).
Mixed type columns are stored in the _least_ restrictive type that can represent all values in the column. For example: Mixed type columns follow the same rules for schema differences between segments, and present as the _least_ restrictive
type that can represent all values in the column. For example:
- Mixed numeric columns are `DOUBLE` - Mixed numeric columns are `DOUBLE`
- If there are any strings present, then the column is a `STRING` - If there are any strings present, then the column is a `STRING`
- If there are arrays, then the column becomes an array with the least restrictive element type - If there are arrays, then the column becomes an array with the least restrictive element type
- Any nested data or arrays of nested data become `COMPLEX<json>` nested columns. - Any nested data or arrays of nested data become `COMPLEX<json>` nested columns.
Grouping, filtering, and aggregating mixed type values will handle these columns as if all values are represented as the
least restrictive type. The exception to this is the scan query, which will return the values in their original mixed
types, but any downstream operations on these values will still coerce them to the common type.
If you're already using string-based schema discovery and want to migrate, see [Migrating to type-aware schema discovery](#migrating-to-type-aware-schema-discovery). If you're already using string-based schema discovery and want to migrate, see [Migrating to type-aware schema discovery](#migrating-to-type-aware-schema-discovery).
#### String-based schema discovery #### String-based schema discovery

View File

@ -392,6 +392,9 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
@Override @Override
public Predicate<Object[]> makeArrayPredicate(@Nullable TypeSignature<ValueType> arrayType) public Predicate<Object[]> makeArrayPredicate(@Nullable TypeSignature<ValueType> arrayType)
{ {
if (!matchValue.isArray()) {
return Predicates.alwaysFalse();
}
if (arrayType == null) { if (arrayType == null) {
// fall back to per row detection if input array type is unknown // fall back to per row detection if input array type is unknown
return typeDetectingArrayPredicateSupplier.get(); return typeDetectingArrayPredicateSupplier.get();

View File

@ -320,6 +320,9 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
@Override @Override
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType) public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
{ {
if (!valueType.isArray()) {
return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap);
}
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value); final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value);
final ExprEval<?> castForComparison = ExprEval.castForEqualityComparison( final ExprEval<?> castForComparison = ExprEval.castForEqualityComparison(
eval, eval,

View File

@ -488,7 +488,6 @@ public class NestedDataTimeseriesQueryTest extends InitializedNullHandlingTest
.intervals(Collections.singletonList(Intervals.ETERNITY)) .intervals(Collections.singletonList(Intervals.ETERNITY))
.filters( .filters(
new AndDimFilter( new AndDimFilter(
new EqualityFilter("variantWithArrays", ColumnType.STRING, "1", null),
new EqualityFilter("v0", ColumnType.STRING, "1", null) new EqualityFilter("v0", ColumnType.STRING, "1", null)
) )
) )
@ -524,7 +523,6 @@ public class NestedDataTimeseriesQueryTest extends InitializedNullHandlingTest
.intervals(Collections.singletonList(Intervals.ETERNITY)) .intervals(Collections.singletonList(Intervals.ETERNITY))
.filters( .filters(
new AndDimFilter( new AndDimFilter(
new EqualityFilter("variantWithArrays", ColumnType.DOUBLE, 3.0, null),
new EqualityFilter("v0", ColumnType.DOUBLE, 3.0, null) new EqualityFilter("v0", ColumnType.DOUBLE, 3.0, null)
) )
) )

View File

@ -412,22 +412,18 @@ public class EqualityFilterTests
{ {
if (isAutoSchema()) { if (isAutoSchema()) {
// auto ingests arrays instead of strings // auto ingests arrays instead of strings
// single values are implicitly upcast to single element arrays, so we get some matches here...
if (NullHandling.sqlCompatible()) { if (NullHandling.sqlCompatible()) {
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null), ImmutableList.of("2")); assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null), ImmutableList.of());
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of(""), null), ImmutableList.of("2"));
} }
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", null), ImmutableList.of("3")); assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", null), ImmutableList.of());
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of("a"), null), ImmutableList.of("3"));
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", null), ImmutableList.of()); assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", null), ImmutableList.of());
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null), ImmutableList.of("4")); assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null), ImmutableList.of());
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of("c"), null), ImmutableList.of("4"));
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", null), ImmutableList.of()); assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", null), ImmutableList.of());
// array matchers can match the whole array // array matchers can match the whole array
if (NullHandling.sqlCompatible()) {
assertFilterMatches(
new EqualityFilter("dim2", ColumnType.STRING, ImmutableList.of(""), null),
ImmutableList.of("2")
);
}
assertFilterMatches( assertFilterMatches(
new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"a", "b"}, null), new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"a", "b"}, null),
ImmutableList.of("0") ImmutableList.of("0")
@ -994,7 +990,7 @@ public class EqualityFilterTests
"3", .. [1.1, 2.2, 3.3] "3", .. [1.1, 2.2, 3.3]
"4", .. 12.34 "4", .. 12.34
"5", .. [100, 200, 300] "5", .. [100, 200, 300]
*/ */
Assume.assumeTrue(isAutoSchema()); Assume.assumeTrue(isAutoSchema());
assertFilterMatches( assertFilterMatches(
@ -1018,6 +1014,7 @@ public class EqualityFilterTests
ImmutableList.of("0", "1", "2", "3", "4", "5") ImmutableList.of("0", "1", "2", "3", "4", "5")
); );
// variant columns must be matched as arrays if they contain any arrays
assertFilterMatches( assertFilterMatches(
new EqualityFilter( new EqualityFilter(
"variant", "variant",
@ -1025,6 +1022,15 @@ public class EqualityFilterTests
"abc", "abc",
null null
), ),
ImmutableList.of()
);
assertFilterMatches(
new EqualityFilter(
"variant",
ColumnType.STRING_ARRAY,
ImmutableList.of("abc"),
null
),
ImmutableList.of("0") ImmutableList.of("0")
); );
@ -1035,6 +1041,15 @@ public class EqualityFilterTests
100L, 100L,
null null
), ),
ImmutableList.of()
);
assertFilterMatches(
new EqualityFilter(
"variant",
ColumnType.LONG_ARRAY,
ImmutableList.of(100L),
null
),
ImmutableList.of("1", "2") ImmutableList.of("1", "2")
); );
@ -1045,6 +1060,15 @@ public class EqualityFilterTests
"100", "100",
null null
), ),
ImmutableList.of()
);
assertFilterMatches(
new EqualityFilter(
"variant",
ColumnType.STRING_ARRAY,
new Object[]{"100"},
null
),
ImmutableList.of("1", "2") ImmutableList.of("1", "2")
); );
@ -1255,6 +1279,7 @@ public class EqualityFilterTests
"cachedOptimizedFilter" "cachedOptimizedFilter"
) )
.withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE) .withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE)
.withPrefabValues(ExprEval.class, ExprEval.of("hello"), ExprEval.of(1.0))
.withIgnoredFields("predicateFactory", "cachedOptimizedFilter", "matchValue") .withIgnoredFields("predicateFactory", "cachedOptimizedFilter", "matchValue")
.verify(); .verify();
} }