mirror of https://github.com/apache/druid.git
fix array presenting columns to not match single element arrays to scalars for equality (#15503)
* fix array presenting columns to not match single element arrays to scalars for equality * update docs to clarify usage model of mixed type columns
This commit is contained in:
parent
5fda8613ad
commit
1eafe983ec
|
@ -263,13 +263,18 @@ native boolean types, Druid ingests these values as longs if `druid.expressions.
|
||||||
the [array functions](../querying/sql-array-functions.md) or [UNNEST](../querying/sql-functions.md#unnest). Nested
|
the [array functions](../querying/sql-array-functions.md) or [UNNEST](../querying/sql-functions.md#unnest). Nested
|
||||||
columns can be queried with the [JSON functions](../querying/sql-json-functions.md).
|
columns can be queried with the [JSON functions](../querying/sql-json-functions.md).
|
||||||
|
|
||||||
Mixed type columns are stored in the _least_ restrictive type that can represent all values in the column. For example:
|
Mixed type columns follow the same rules for schema differences between segments, and present as the _least_ restrictive
|
||||||
|
type that can represent all values in the column. For example:
|
||||||
|
|
||||||
- Mixed numeric columns are `DOUBLE`
|
- Mixed numeric columns are `DOUBLE`
|
||||||
- If there are any strings present, then the column is a `STRING`
|
- If there are any strings present, then the column is a `STRING`
|
||||||
- If there are arrays, then the column becomes an array with the least restrictive element type
|
- If there are arrays, then the column becomes an array with the least restrictive element type
|
||||||
- Any nested data or arrays of nested data become `COMPLEX<json>` nested columns.
|
- Any nested data or arrays of nested data become `COMPLEX<json>` nested columns.
|
||||||
|
|
||||||
|
Grouping, filtering, and aggregating mixed type values will handle these columns as if all values are represented as the
|
||||||
|
least restrictive type. The exception to this is the scan query, which will return the values in their original mixed
|
||||||
|
types, but any downstream operations on these values will still coerce them to the common type.
|
||||||
|
|
||||||
If you're already using string-based schema discovery and want to migrate, see [Migrating to type-aware schema discovery](#migrating-to-type-aware-schema-discovery).
|
If you're already using string-based schema discovery and want to migrate, see [Migrating to type-aware schema discovery](#migrating-to-type-aware-schema-discovery).
|
||||||
|
|
||||||
#### String-based schema discovery
|
#### String-based schema discovery
|
||||||
|
|
|
@ -392,6 +392,9 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
|
||||||
@Override
|
@Override
|
||||||
public Predicate<Object[]> makeArrayPredicate(@Nullable TypeSignature<ValueType> arrayType)
|
public Predicate<Object[]> makeArrayPredicate(@Nullable TypeSignature<ValueType> arrayType)
|
||||||
{
|
{
|
||||||
|
if (!matchValue.isArray()) {
|
||||||
|
return Predicates.alwaysFalse();
|
||||||
|
}
|
||||||
if (arrayType == null) {
|
if (arrayType == null) {
|
||||||
// fall back to per row detection if input array type is unknown
|
// fall back to per row detection if input array type is unknown
|
||||||
return typeDetectingArrayPredicateSupplier.get();
|
return typeDetectingArrayPredicateSupplier.get();
|
||||||
|
|
|
@ -320,6 +320,9 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
||||||
@Override
|
@Override
|
||||||
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
|
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
|
||||||
{
|
{
|
||||||
|
if (!valueType.isArray()) {
|
||||||
|
return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap);
|
||||||
|
}
|
||||||
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value);
|
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value);
|
||||||
final ExprEval<?> castForComparison = ExprEval.castForEqualityComparison(
|
final ExprEval<?> castForComparison = ExprEval.castForEqualityComparison(
|
||||||
eval,
|
eval,
|
||||||
|
|
|
@ -488,7 +488,6 @@ public class NestedDataTimeseriesQueryTest extends InitializedNullHandlingTest
|
||||||
.intervals(Collections.singletonList(Intervals.ETERNITY))
|
.intervals(Collections.singletonList(Intervals.ETERNITY))
|
||||||
.filters(
|
.filters(
|
||||||
new AndDimFilter(
|
new AndDimFilter(
|
||||||
new EqualityFilter("variantWithArrays", ColumnType.STRING, "1", null),
|
|
||||||
new EqualityFilter("v0", ColumnType.STRING, "1", null)
|
new EqualityFilter("v0", ColumnType.STRING, "1", null)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -524,7 +523,6 @@ public class NestedDataTimeseriesQueryTest extends InitializedNullHandlingTest
|
||||||
.intervals(Collections.singletonList(Intervals.ETERNITY))
|
.intervals(Collections.singletonList(Intervals.ETERNITY))
|
||||||
.filters(
|
.filters(
|
||||||
new AndDimFilter(
|
new AndDimFilter(
|
||||||
new EqualityFilter("variantWithArrays", ColumnType.DOUBLE, 3.0, null),
|
|
||||||
new EqualityFilter("v0", ColumnType.DOUBLE, 3.0, null)
|
new EqualityFilter("v0", ColumnType.DOUBLE, 3.0, null)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -412,22 +412,18 @@ public class EqualityFilterTests
|
||||||
{
|
{
|
||||||
if (isAutoSchema()) {
|
if (isAutoSchema()) {
|
||||||
// auto ingests arrays instead of strings
|
// auto ingests arrays instead of strings
|
||||||
// single values are implicitly upcast to single element arrays, so we get some matches here...
|
|
||||||
if (NullHandling.sqlCompatible()) {
|
if (NullHandling.sqlCompatible()) {
|
||||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null), ImmutableList.of("2"));
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null), ImmutableList.of());
|
||||||
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of(""), null), ImmutableList.of("2"));
|
||||||
}
|
}
|
||||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", null), ImmutableList.of("3"));
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", null), ImmutableList.of());
|
||||||
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of("a"), null), ImmutableList.of("3"));
|
||||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", null), ImmutableList.of());
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", null), ImmutableList.of());
|
||||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null), ImmutableList.of("4"));
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null), ImmutableList.of());
|
||||||
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of("c"), null), ImmutableList.of("4"));
|
||||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", null), ImmutableList.of());
|
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", null), ImmutableList.of());
|
||||||
|
|
||||||
// array matchers can match the whole array
|
// array matchers can match the whole array
|
||||||
if (NullHandling.sqlCompatible()) {
|
|
||||||
assertFilterMatches(
|
|
||||||
new EqualityFilter("dim2", ColumnType.STRING, ImmutableList.of(""), null),
|
|
||||||
ImmutableList.of("2")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
assertFilterMatches(
|
assertFilterMatches(
|
||||||
new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"a", "b"}, null),
|
new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"a", "b"}, null),
|
||||||
ImmutableList.of("0")
|
ImmutableList.of("0")
|
||||||
|
@ -994,7 +990,7 @@ public class EqualityFilterTests
|
||||||
"3", .. [1.1, 2.2, 3.3]
|
"3", .. [1.1, 2.2, 3.3]
|
||||||
"4", .. 12.34
|
"4", .. 12.34
|
||||||
"5", .. [100, 200, 300]
|
"5", .. [100, 200, 300]
|
||||||
|
|
||||||
*/
|
*/
|
||||||
Assume.assumeTrue(isAutoSchema());
|
Assume.assumeTrue(isAutoSchema());
|
||||||
assertFilterMatches(
|
assertFilterMatches(
|
||||||
|
@ -1018,6 +1014,7 @@ public class EqualityFilterTests
|
||||||
ImmutableList.of("0", "1", "2", "3", "4", "5")
|
ImmutableList.of("0", "1", "2", "3", "4", "5")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// variant columns must be matched as arrays if they contain any arrays
|
||||||
assertFilterMatches(
|
assertFilterMatches(
|
||||||
new EqualityFilter(
|
new EqualityFilter(
|
||||||
"variant",
|
"variant",
|
||||||
|
@ -1025,6 +1022,15 @@ public class EqualityFilterTests
|
||||||
"abc",
|
"abc",
|
||||||
null
|
null
|
||||||
),
|
),
|
||||||
|
ImmutableList.of()
|
||||||
|
);
|
||||||
|
assertFilterMatches(
|
||||||
|
new EqualityFilter(
|
||||||
|
"variant",
|
||||||
|
ColumnType.STRING_ARRAY,
|
||||||
|
ImmutableList.of("abc"),
|
||||||
|
null
|
||||||
|
),
|
||||||
ImmutableList.of("0")
|
ImmutableList.of("0")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1035,6 +1041,15 @@ public class EqualityFilterTests
|
||||||
100L,
|
100L,
|
||||||
null
|
null
|
||||||
),
|
),
|
||||||
|
ImmutableList.of()
|
||||||
|
);
|
||||||
|
assertFilterMatches(
|
||||||
|
new EqualityFilter(
|
||||||
|
"variant",
|
||||||
|
ColumnType.LONG_ARRAY,
|
||||||
|
ImmutableList.of(100L),
|
||||||
|
null
|
||||||
|
),
|
||||||
ImmutableList.of("1", "2")
|
ImmutableList.of("1", "2")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1045,6 +1060,15 @@ public class EqualityFilterTests
|
||||||
"100",
|
"100",
|
||||||
null
|
null
|
||||||
),
|
),
|
||||||
|
ImmutableList.of()
|
||||||
|
);
|
||||||
|
assertFilterMatches(
|
||||||
|
new EqualityFilter(
|
||||||
|
"variant",
|
||||||
|
ColumnType.STRING_ARRAY,
|
||||||
|
new Object[]{"100"},
|
||||||
|
null
|
||||||
|
),
|
||||||
ImmutableList.of("1", "2")
|
ImmutableList.of("1", "2")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1255,6 +1279,7 @@ public class EqualityFilterTests
|
||||||
"cachedOptimizedFilter"
|
"cachedOptimizedFilter"
|
||||||
)
|
)
|
||||||
.withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE)
|
.withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE)
|
||||||
|
.withPrefabValues(ExprEval.class, ExprEval.of("hello"), ExprEval.of(1.0))
|
||||||
.withIgnoredFields("predicateFactory", "cachedOptimizedFilter", "matchValue")
|
.withIgnoredFields("predicateFactory", "cachedOptimizedFilter", "matchValue")
|
||||||
.verify();
|
.verify();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue