mirror of https://github.com/apache/druid.git
fix array presenting columns to not match single element arrays to scalars for equality (#15503)
* fix array presenting columns to not match single element arrays to scalars for equality * update docs to clarify usage model of mixed type columns
This commit is contained in:
parent
5fda8613ad
commit
1eafe983ec
|
@ -263,13 +263,18 @@ native boolean types, Druid ingests these values as longs if `druid.expressions.
|
|||
the [array functions](../querying/sql-array-functions.md) or [UNNEST](../querying/sql-functions.md#unnest). Nested
|
||||
columns can be queried with the [JSON functions](../querying/sql-json-functions.md).
|
||||
|
||||
Mixed type columns are stored in the _least_ restrictive type that can represent all values in the column. For example:
|
||||
Mixed type columns follow the same rules for schema differences between segments, and present as the _least_ restrictive
|
||||
type that can represent all values in the column. For example:
|
||||
|
||||
- Mixed numeric columns are `DOUBLE`
|
||||
- If there are any strings present, then the column is a `STRING`
|
||||
- If there are arrays, then the column becomes an array with the least restrictive element type
|
||||
- Any nested data or arrays of nested data become `COMPLEX<json>` nested columns.
|
||||
|
||||
Grouping, filtering, and aggregating mixed type values will handle these columns as if all values are represented as the
|
||||
least restrictive type. The exception to this is the scan query, which will return the values in their original mixed
|
||||
types, but any downstream operations on these values will still coerce them to the common type.
|
||||
|
||||
If you're already using string-based schema discovery and want to migrate, see [Migrating to type-aware schema discovery](#migrating-to-type-aware-schema-discovery).
|
||||
|
||||
#### String-based schema discovery
|
||||
|
|
|
@ -392,6 +392,9 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
|
|||
@Override
|
||||
public Predicate<Object[]> makeArrayPredicate(@Nullable TypeSignature<ValueType> arrayType)
|
||||
{
|
||||
if (!matchValue.isArray()) {
|
||||
return Predicates.alwaysFalse();
|
||||
}
|
||||
if (arrayType == null) {
|
||||
// fall back to per row detection if input array type is unknown
|
||||
return typeDetectingArrayPredicateSupplier.get();
|
||||
|
|
|
@ -320,6 +320,9 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
@Override
|
||||
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
|
||||
{
|
||||
if (!valueType.isArray()) {
|
||||
return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap);
|
||||
}
|
||||
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value);
|
||||
final ExprEval<?> castForComparison = ExprEval.castForEqualityComparison(
|
||||
eval,
|
||||
|
|
|
@ -488,7 +488,6 @@ public class NestedDataTimeseriesQueryTest extends InitializedNullHandlingTest
|
|||
.intervals(Collections.singletonList(Intervals.ETERNITY))
|
||||
.filters(
|
||||
new AndDimFilter(
|
||||
new EqualityFilter("variantWithArrays", ColumnType.STRING, "1", null),
|
||||
new EqualityFilter("v0", ColumnType.STRING, "1", null)
|
||||
)
|
||||
)
|
||||
|
@ -524,7 +523,6 @@ public class NestedDataTimeseriesQueryTest extends InitializedNullHandlingTest
|
|||
.intervals(Collections.singletonList(Intervals.ETERNITY))
|
||||
.filters(
|
||||
new AndDimFilter(
|
||||
new EqualityFilter("variantWithArrays", ColumnType.DOUBLE, 3.0, null),
|
||||
new EqualityFilter("v0", ColumnType.DOUBLE, 3.0, null)
|
||||
)
|
||||
)
|
||||
|
|
|
@ -412,22 +412,18 @@ public class EqualityFilterTests
|
|||
{
|
||||
if (isAutoSchema()) {
|
||||
// auto ingests arrays instead of strings
|
||||
// single values are implicitly upcast to single element arrays, so we get some matches here...
|
||||
if (NullHandling.sqlCompatible()) {
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null), ImmutableList.of("2"));
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "", null), ImmutableList.of());
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of(""), null), ImmutableList.of("2"));
|
||||
}
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", null), ImmutableList.of("3"));
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "a", null), ImmutableList.of());
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of("a"), null), ImmutableList.of("3"));
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "b", null), ImmutableList.of());
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null), ImmutableList.of("4"));
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "c", null), ImmutableList.of());
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING_ARRAY, ImmutableList.of("c"), null), ImmutableList.of("4"));
|
||||
assertFilterMatches(new EqualityFilter("dim2", ColumnType.STRING, "d", null), ImmutableList.of());
|
||||
|
||||
// array matchers can match the whole array
|
||||
if (NullHandling.sqlCompatible()) {
|
||||
assertFilterMatches(
|
||||
new EqualityFilter("dim2", ColumnType.STRING, ImmutableList.of(""), null),
|
||||
ImmutableList.of("2")
|
||||
);
|
||||
}
|
||||
assertFilterMatches(
|
||||
new EqualityFilter("dim2", ColumnType.STRING_ARRAY, new Object[]{"a", "b"}, null),
|
||||
ImmutableList.of("0")
|
||||
|
@ -994,7 +990,7 @@ public class EqualityFilterTests
|
|||
"3", .. [1.1, 2.2, 3.3]
|
||||
"4", .. 12.34
|
||||
"5", .. [100, 200, 300]
|
||||
|
||||
|
||||
*/
|
||||
Assume.assumeTrue(isAutoSchema());
|
||||
assertFilterMatches(
|
||||
|
@ -1018,6 +1014,7 @@ public class EqualityFilterTests
|
|||
ImmutableList.of("0", "1", "2", "3", "4", "5")
|
||||
);
|
||||
|
||||
// variant columns must be matched as arrays if they contain any arrays
|
||||
assertFilterMatches(
|
||||
new EqualityFilter(
|
||||
"variant",
|
||||
|
@ -1025,6 +1022,15 @@ public class EqualityFilterTests
|
|||
"abc",
|
||||
null
|
||||
),
|
||||
ImmutableList.of()
|
||||
);
|
||||
assertFilterMatches(
|
||||
new EqualityFilter(
|
||||
"variant",
|
||||
ColumnType.STRING_ARRAY,
|
||||
ImmutableList.of("abc"),
|
||||
null
|
||||
),
|
||||
ImmutableList.of("0")
|
||||
);
|
||||
|
||||
|
@ -1035,6 +1041,15 @@ public class EqualityFilterTests
|
|||
100L,
|
||||
null
|
||||
),
|
||||
ImmutableList.of()
|
||||
);
|
||||
assertFilterMatches(
|
||||
new EqualityFilter(
|
||||
"variant",
|
||||
ColumnType.LONG_ARRAY,
|
||||
ImmutableList.of(100L),
|
||||
null
|
||||
),
|
||||
ImmutableList.of("1", "2")
|
||||
);
|
||||
|
||||
|
@ -1045,6 +1060,15 @@ public class EqualityFilterTests
|
|||
"100",
|
||||
null
|
||||
),
|
||||
ImmutableList.of()
|
||||
);
|
||||
assertFilterMatches(
|
||||
new EqualityFilter(
|
||||
"variant",
|
||||
ColumnType.STRING_ARRAY,
|
||||
new Object[]{"100"},
|
||||
null
|
||||
),
|
||||
ImmutableList.of("1", "2")
|
||||
);
|
||||
|
||||
|
@ -1255,6 +1279,7 @@ public class EqualityFilterTests
|
|||
"cachedOptimizedFilter"
|
||||
)
|
||||
.withPrefabValues(ColumnType.class, ColumnType.STRING, ColumnType.DOUBLE)
|
||||
.withPrefabValues(ExprEval.class, ExprEval.of("hello"), ExprEval.of(1.0))
|
||||
.withIgnoredFields("predicateFactory", "cachedOptimizedFilter", "matchValue")
|
||||
.verify();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue