From 358892e5b035b3f530f84976ce83acb1443ea539 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 5 Feb 2024 01:42:09 -0800 Subject: [PATCH] add nested array index support, fix some bugs (#15752) This PR wires up ValueIndexes and ArrayElementIndexes for nested arrays, ValueIndexes for nested long and double columns, and fixes a handful of bugs I found after adding nested columns to the filter test gauntlet. --- .../org/apache/druid/math/expr/ExprEval.java | 2 +- .../apache/druid/query/filter/NullFilter.java | 2 +- .../druid/query/filter/RangeFilter.java | 12 +- .../CompressedNestedDataComplexColumn.java | 2 + .../NestedFieldColumnIndexSupplier.java | 348 ++++++++++++++- .../ArrayContainsElementFilterTests.java | 417 +++++++++++++++++- .../druid/segment/filter/BaseFilterTest.java | 100 ++++- .../segment/filter/EqualityFilterTests.java | 414 ++++++++++++++++- .../segment/filter/RangeFilterTests.java | 215 ++++++++- .../NestedFieldColumnIndexSupplierTest.java | 22 + 10 files changed, 1496 insertions(+), 38 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index 9c0f5e2736a..d74b623379c 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -666,7 +666,7 @@ public abstract class ExprEval if (valueToCompare.isArray() && !typeToCompareWith.isArray()) { final Object[] array = valueToCompare.asArray(); // cannot cast array to scalar if array length is greater than 1 - if (array != null && array.length > 1) { + if (array != null && array.length != 1) { return null; } } diff --git a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java index 168ca2bc0e6..cc2bdd2cf63 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/NullFilter.java @@ -216,7 +216,7 @@ public class NullFilter extends AbstractOptimizableDimFilter implements Filter .build(); } - private static class NullPredicateFactory implements DruidPredicateFactory + public static class NullPredicateFactory implements DruidPredicateFactory { public static final NullPredicateFactory INSTANCE = new NullPredicateFactory(); diff --git a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java index c34daa9156d..63fc48559ac 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/RangeFilter.java @@ -461,7 +461,11 @@ public class RangeFilter extends AbstractOptimizableDimFilter implements Filter final DimFilterToStringBuilder builder = new DimFilterToStringBuilder(); if (lower != null) { - builder.append(lower); + if (matchValueType.isArray()) { + builder.append(Arrays.deepToString(lowerEval.asArray())); + } else { + builder.append(lower); + } if (lowerOpen) { builder.append(" < "); } else { @@ -479,7 +483,11 @@ public class RangeFilter extends AbstractOptimizableDimFilter implements Filter } else { builder.append(" <= "); } - builder.append(upper); + if (matchValueType.isArray()) { + builder.append(Arrays.deepToString(upperEval.asArray())); + } else { + builder.append(upper); + } } return builder.appendFilterTuning(filterTuning).build(); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index 7b0e99db5f3..cc259c3be0c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -114,6 +114,7 @@ public abstract class CompressedNestedDataComplexColumn stringDictionarySupplier; private final Supplier> longDictionarySupplier; private final Supplier> doubleDictionarySupplier; + @Nullable private final Supplier arrayDictionarySupplier; private final SmooshedFileMapper fileMapper; private final String rootFieldPath; @@ -1012,6 +1013,7 @@ public abstract class CompressedNestedDataComplexColumn> globalLongDictionarySupplier; private final Supplier> globalDoubleDictionarySupplier; + private final Supplier globalArrayDictionarySupplier; + @SuppressWarnings({"FieldCanBeLocal", "unused"}) @Nullable private final GenericIndexed arrayElementBitmaps; @@ -96,6 +111,7 @@ public class NestedFieldColumnIndexSupplier globalStringDictionarySupplier, Supplier> globalLongDictionarySupplier, Supplier> globalDoubleDictionarySupplier, + @Nullable Supplier globalArrayDictionarySupplier, @Nullable Supplier> arrayElementDictionarySupplier, @Nullable GenericIndexed arrayElementBitmaps, int numRows @@ -120,10 +137,12 @@ public class NestedFieldColumnIndexSupplier valueType) + { + final ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value); + final ExprEval castForComparison = ExprEval.castForEqualityComparison(eval, ExpressionType.LONG); + final ImmutableBitmap nullValueBitmap = localDictionarySupplier.get().get(0) == 0 + ? bitmaps.get(0) + : bitmapFactory.makeEmptyImmutableBitmap(); + if (castForComparison == null) { + return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap); + } + final long longValue = castForComparison.asLong(); + + + return new SimpleBitmapColumnIndex() + { + final FixedIndexed localDictionary = localDictionarySupplier.get(); + final FixedIndexed globalDictionary = globalLongDictionarySupplier.get(); + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) + { + final int globalId = globalDictionary.indexOf(longValue); + if (globalId < 0) { + if (includeUnknown) { + return bitmapResultFactory.wrapDimensionValue(nullValueBitmap); + } + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + final int id = localDictionary.indexOf(globalId + adjustLongId); + if (includeUnknown) { + if (id < 0) { + return bitmapResultFactory.wrapDimensionValue(nullValueBitmap); + } + return bitmapResultFactory.unionDimensionValueBitmaps( + ImmutableList.of(getBitmap(id), nullValueBitmap) + ); + } + if (id < 0) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(id)); + } + }; + } + } + private class NestedLongStringValueSetIndex implements StringValueSetIndexes { @Override @@ -665,7 +745,7 @@ public class NestedFieldColumnIndexSupplier valueType) + { + final ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value); + final ExprEval castForComparison = ExprEval.castForEqualityComparison(eval, ExpressionType.DOUBLE); + final ImmutableBitmap nullValueBitmap = localDictionarySupplier.get().get(0) == 0 + ? bitmaps.get(0) + : bitmapFactory.makeEmptyImmutableBitmap(); + if (castForComparison == null) { + return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap); + } + final double doubleValue = castForComparison.asDouble(); + + return new SimpleBitmapColumnIndex() + { + final FixedIndexed localDictionary = localDictionarySupplier.get(); + final FixedIndexed globalDictionary = globalDoubleDictionarySupplier.get(); + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) + { + final int globalId = globalDictionary.indexOf(doubleValue); + if (globalId < 0) { + if (includeUnknown) { + return bitmapResultFactory.wrapDimensionValue(nullValueBitmap); + } + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + final int id = localDictionary.indexOf(globalId + adjustDoubleId); + if (includeUnknown) { + if (id < 0) { + return bitmapResultFactory.wrapDimensionValue(nullValueBitmap); + } + return bitmapResultFactory.unionDimensionValueBitmaps( + ImmutableList.of(getBitmap(id), nullValueBitmap) + ); + } + if (id < 0) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(id)); + } + }; + } + } + private class NestedDoubleStringValueSetIndex implements StringValueSetIndexes { @Override @@ -1095,6 +1224,10 @@ public class NestedFieldColumnIndexSupplier stringDictionary = globalStringDictionarySupplier.get(); final FixedIndexed longDictionary = globalLongDictionarySupplier.get(); final FixedIndexed doubleDictionary = globalDoubleDictionarySupplier.get(); + @Nullable + final FrontCodedIntArrayIndexed arrayDictionary = globalArrayDictionarySupplier == null + ? null + : globalArrayDictionarySupplier.get(); IntList getIndexes(@Nullable String value) { @@ -1260,6 +1393,10 @@ public class NestedFieldColumnIndexSupplier> arrayPredicateSupplier = Suppliers.memoize( + () -> matcherFactory.makeArrayPredicate(singleType) + ); + // in the future, this could use an int iterator final Iterator iterator = localDictionary.iterator(); int next; @@ -1292,7 +1429,25 @@ public class NestedFieldColumnIndexSupplier= adjustDoubleId) { + if (nextValue >= adjustArrayId) { + // this shouldn't be possible since arrayIds will only exist if array dictionary is not null + // v4 columns however have a null array dictionary + Preconditions.checkNotNull(arrayDictionary); + final int[] array = arrayDictionary.get(nextValue - adjustArrayId); + final Object[] arrayObj = new Object[array.length]; + for (int i = 0; i < arrayObj.length; i++) { + if (array[i] == 0) { + arrayObj[i] = null; + } else if (array[i] >= adjustDoubleId) { + arrayObj[i] = doubleDictionary.get(array[i] - adjustDoubleId); + } else if (array[i] >= adjustLongId) { + arrayObj[i] = longDictionary.get(array[i] - adjustLongId); + } else { + arrayObj[i] = StringUtils.fromUtf8Nullable(stringDictionary.get(array[i])); + } + } + nextSet = arrayPredicateSupplier.get().apply(arrayObj).matches(includeUnknown); + } else if (nextValue >= adjustDoubleId) { nextSet = doublePredicate.applyDouble(doubleDictionary.get(nextValue - adjustDoubleId)) .matches(includeUnknown); } else if (nextValue >= adjustLongId) { @@ -1313,4 +1468,187 @@ public class NestedFieldColumnIndexSupplier valueType) + { + if (!valueType.isArray()) { + return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap); + } + final ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value); + final ExprEval castForComparison = ExprEval.castForEqualityComparison( + eval, + ExpressionType.fromColumnTypeStrict(singleType) + ); + if (castForComparison == null) { + return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap); + } + final Object[] arrayToMatch = castForComparison.asArray(); + Indexed elements; + final int elementOffset; + + switch (singleType.getElementType().getType()) { + case STRING: + elements = globalStringDictionarySupplier.get(); + elementOffset = 0; + break; + case LONG: + elements = globalLongDictionarySupplier.get(); + elementOffset = adjustLongId; + break; + case DOUBLE: + elements = globalDoubleDictionarySupplier.get(); + elementOffset = adjustDoubleId; + break; + default: + throw DruidException.defensive( + "Unhandled array type [%s] how did this happen?", + singleType.getElementType() + ); + } + + final int[] ids = new int[arrayToMatch.length]; + for (int i = 0; i < arrayToMatch.length; i++) { + if (arrayToMatch[i] == null) { + ids[i] = 0; + } else if (singleType.getElementType().is(ValueType.STRING)) { + ids[i] = elements.indexOf(StringUtils.toUtf8ByteBuffer((String) arrayToMatch[i])); + } else { + ids[i] = elements.indexOf(arrayToMatch[i]) + elementOffset; + } + if (ids[i] < 0) { + if (value == null) { + return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap); + } + } + } + + final FixedIndexed localDictionary = localDictionarySupplier.get(); + final FrontCodedIntArrayIndexed globalArrayDictionary = globalArrayDictionarySupplier.get(); + return new SimpleBitmapColumnIndex() + { + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) + { + final int localId = localDictionary.indexOf(globalArrayDictionary.indexOf(ids) + adjustArrayId); + if (includeUnknown) { + if (localId < 0) { + return bitmapResultFactory.wrapDimensionValue(nullValueBitmap); + } + return bitmapResultFactory.unionDimensionValueBitmaps( + ImmutableList.of(getBitmap(localId), nullValueBitmap) + ); + } + if (localId < 0) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + return bitmapResultFactory.wrapDimensionValue(getBitmap(localId)); + } + }; + } + } + + private class NestedArrayElementIndexes implements ArrayElementIndexes + { + private final ImmutableBitmap nullValueBitmap = localDictionarySupplier.get().get(0) == 0 + ? bitmaps.get(0) + : bitmapFactory.makeEmptyImmutableBitmap(); + + @Nullable + @Override + public BitmapColumnIndex containsValue(@Nullable Object value, TypeSignature elementValueType) + { + // this column doesn't store nested arrays, bail out if checking if we contain an array + if (elementValueType.isArray()) { + return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap); + } + final ExprEval eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(elementValueType), value); + + final ExprEval castForComparison = ExprEval.castForEqualityComparison( + eval, + ExpressionType.fromColumnTypeStrict(singleType.isArray() ? singleType.getElementType() : singleType) + ); + if (castForComparison == null) { + return new AllFalseBitmapColumnIndex(bitmapFactory, nullValueBitmap); + } + final FixedIndexed elementDictionary = arrayElementDictionarySupplier.get(); + final Indexed globalElements; + final int elementOffset; + switch (singleType.getElementType().getType()) { + case STRING: + globalElements = globalStringDictionarySupplier.get(); + elementOffset = 0; + break; + case LONG: + globalElements = globalLongDictionarySupplier.get(); + elementOffset = adjustLongId; + break; + case DOUBLE: + globalElements = globalDoubleDictionarySupplier.get(); + elementOffset = adjustDoubleId; + break; + default: + throw DruidException.defensive( + "Unhandled array type [%s] how did this happen?", + singleType.getElementType() + ); + } + + return new SimpleBitmapColumnIndex() + { + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) + { + final int elementId = getElementId(); + if (includeUnknown) { + if (elementId < 0) { + return bitmapResultFactory.wrapDimensionValue(nullValueBitmap); + } + return bitmapResultFactory.unionDimensionValueBitmaps( + ImmutableList.of(getElementBitmap(elementId), nullValueBitmap) + ); + } + if (elementId < 0) { + return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap()); + } + return bitmapResultFactory.wrapDimensionValue(getElementBitmap(elementId)); + } + + private int getElementId() + { + if (castForComparison.value() == null) { + return 0; + } + + if (castForComparison.type().is(ExprType.STRING)) { + return elementDictionary.indexOf( + globalElements.indexOf(StringUtils.toUtf8ByteBuffer(castForComparison.asString())) + ); + } else { + return elementDictionary.indexOf( + globalElements.indexOf(castForComparison.value()) + elementOffset + ); + } + } + + private ImmutableBitmap getElementBitmap(int idx) + { + if (idx < 0) { + return bitmapFactory.makeEmptyImmutableBitmap(); + } + final ImmutableBitmap bitmap = arrayElementBitmaps.get(idx); + return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap; + } + }; + } + } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ArrayContainsElementFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/ArrayContainsElementFilterTests.java index 75f5dd60f58..434273870ff 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ArrayContainsElementFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ArrayContainsElementFilterTests.java @@ -75,8 +75,7 @@ public class ArrayContainsElementFilterTests @Test public void testArrayStringColumn() { - // only auto schema supports array columns... skip other segment types - Assume.assumeTrue(isAutoSchema()); + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); /* dim0 .. arrayString "0", .. ["a", "b", "c"] @@ -160,8 +159,7 @@ public class ArrayContainsElementFilterTests @Test public void testArrayLongColumn() { - // only auto schema supports array columns... skip other segment types - Assume.assumeTrue(isAutoSchema()); + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); /* dim0 .. arrayLong "0", .. [1L, 2L, 3L] @@ -241,8 +239,7 @@ public class ArrayContainsElementFilterTests @Test public void testArrayDoubleColumn() { - // only auto schema supports array columns... skip other segment types - Assume.assumeTrue(isAutoSchema()); + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); /* dim0 .. arrayDouble "0", .. [1.1, 2.2, 3.3] @@ -300,8 +297,7 @@ public class ArrayContainsElementFilterTests @Test public void testArrayStringColumnContainsArrays() { - // only auto schema supports array columns... skip other segment types - Assume.assumeTrue(isAutoSchema()); + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); // these are not nested arrays, expect no matches assertFilterMatches( new ArrayContainsElementFilter( @@ -330,9 +326,7 @@ public class ArrayContainsElementFilterTests @Test public void testArrayLongColumnContainsArrays() { - // only auto schema supports array columns... skip other segment types - - Assume.assumeTrue(isAutoSchema()); + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); // these are not nested arrays, expect no matches assertFilterMatches( @@ -362,8 +356,7 @@ public class ArrayContainsElementFilterTests @Test public void testArrayDoubleColumnContainsArrays() { - // only auto schema supports array columns... skip other segment types - Assume.assumeTrue(isAutoSchema()); + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); // these are not nested arrays, expect no matches assertFilterMatches( new ArrayContainsElementFilter( @@ -472,7 +465,7 @@ public class ArrayContainsElementFilterTests public void testArrayContainsNestedArray() { // only auto schema supports array columns... skip other segment types - Assume.assumeTrue(isAutoSchema()); + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); assertFilterMatchesSkipVectorize( new ArrayContainsElementFilter("nestedArrayLong", ColumnType.LONG_ARRAY, new Object[]{1L, 2L, 3L}, null), ImmutableList.of("0", "2") @@ -516,8 +509,404 @@ public class ArrayContainsElementFilterTests ImmutableList.of() ); } + } + @Test + public void testNestedArrayStringColumn() + { + // duplicate of testArrayStringColumn but targeting nested.arrayString + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); + /* + dim0 .. arrayString + "0", .. ["a", "b", "c"] + "1", .. [] + "2", .. null + "3", .. ["a", "b", "c"] + "4", .. ["c", "d"] + "5", .. [null] + */ + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING, + "a", + null + ), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING, + "a", + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("1", "4", "5") + : ImmutableList.of("1", "2", "4", "5") + ); + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING, + "c", + null + ), + ImmutableList.of("0", "3", "4") + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING, + "c", + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("1", "5") + : ImmutableList.of("1", "2", "5") + ); + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING, + null, + null + ), + ImmutableList.of("5") + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING, + null, + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "3", "4") + : ImmutableList.of("0", "1", "2", "3", "4") + ); + } + + @Test + public void testNestedArrayLongColumn() + { + // duplicate of testArrayLongColumn but targeting nested.arrayLong + Assume.assumeFalse(testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")); + /* + dim0 .. arrayLong + "0", .. [1L, 2L, 3L] + "1", .. [] + "2", .. [1L, 2L, 3L] + "3", .. null + "4", .. [null] + "5", .. [123L, 345L] + */ + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.LONG, + 2L, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.LONG, + 2L, + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("1", "4", "5") + : ImmutableList.of("1", "3", "4", "5") + ); + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.LONG, + null, + null + ), + ImmutableList.of("4") + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.LONG, + null, + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "2", "5") + : ImmutableList.of("0", "1", "2", "3", "5") + ); + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.DOUBLE, + 2.0, + null + ), + ImmutableList.of("0", "2") + ); + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.STRING, + "2", + null + ), + ImmutableList.of("0", "2") + ); + } + + @Test + public void testNestedArrayDoubleColumn() + { + // duplicate of testArrayDoubleColumn but targeting nested.arrayDouble + Assume.assumeTrue(canTestArrayColumns()); + /* + dim0 .. arrayDouble + "0", .. [1.1, 2.2, 3.3] + "1", .. [1.1, 2.2, 3.3] + "2", .. [null] + "3", .. [] + "4", .. [-1.1, -333.3] + "5", .. null + */ + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayDouble", + ColumnType.DOUBLE, + 2.2, + null + ), + ImmutableList.of("0", "1") + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayDouble", + ColumnType.DOUBLE, + 2.2, + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("2", "3", "4") + : ImmutableList.of("2", "3", "4", "5") + ); + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayDouble", + ColumnType.STRING, + "2.2", + null + ), + ImmutableList.of("0", "1") + ); + + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayDouble", + ColumnType.DOUBLE, + null, + null + ), + ImmutableList.of("2") + ); + } + + @Test + public void testNestedArrayStringColumnContainsArrays() + { + // duplicate of testArrayStringColumnContainsArrays but targeting nested.arrayString + Assume.assumeTrue(canTestArrayColumns()); + // these are not nested arrays, expect no matches + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + ImmutableList.of("a", "b", "c"), + null + ), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + ImmutableList.of("a", "b", "c"), + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "3", "4", "5") + : ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } + + @Test + public void testNestedArrayLongColumnContainsArrays() + { + // duplicate of testArrayLongColumnContainsArrays but targeting nested.arrayLong + Assume.assumeTrue(canTestArrayColumns()); + + // these are not nested arrays, expect no matches + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + ImmutableList.of(1L, 2L, 3L), + null + ), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + ImmutableList.of(1L, 2L, 3L), + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "2", "4", "5") + : ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } + + @Test + public void testNestedArrayDoubleColumnContainsArrays() + { + // duplicate of testArrayDoubleColumnContainsArrays but targeting nested.arrayDouble + Assume.assumeTrue(canTestArrayColumns()); + // these are not nested arrays, expect no matches + assertFilterMatches( + new ArrayContainsElementFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.3), + null + ), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of( + new ArrayContainsElementFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.3), + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "2", "3", "4") + : ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } + + @Test + public void testNestedScalarColumnContains() + { + Assume.assumeTrue(canTestArrayColumns()); + + // duplicate of testScalarColumnContains but targeting nested columns + assertFilterMatches( + new ArrayContainsElementFilter("nested.s0", ColumnType.STRING, "a", null), + ImmutableList.of("1", "5") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.s0", ColumnType.STRING, "b", null), + ImmutableList.of("2") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.s0", ColumnType.STRING, "c", null), + ImmutableList.of("4") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.s0", ColumnType.STRING, "noexist", null), + ImmutableList.of() + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.s0", ColumnType.STRING_ARRAY, ImmutableList.of("c"), null), + ImmutableList.of("4") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.s0", ColumnType.STRING_ARRAY, ImmutableList.of("a", "c"), null), + ImmutableList.of() + ); + + assertFilterMatches( + new ArrayContainsElementFilter("nested.d0", ColumnType.DOUBLE, 10.1, null), + ImmutableList.of("1") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.d0", ColumnType.DOUBLE, 120.0245, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.d0", ColumnType.DOUBLE, 765.432, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.d0", ColumnType.DOUBLE, 765.431, null), + ImmutableList.of() + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.d0", ColumnType.DOUBLE_ARRAY, new Object[]{10.1}, null), + ImmutableList.of("1") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.d0", ColumnType.DOUBLE_ARRAY, new Object[]{10.1, 120.0245}, null), + ImmutableList.of() + ); + + assertFilterMatches( + new ArrayContainsElementFilter("nested.l0", ColumnType.LONG, 100L, null), + ImmutableList.of("1") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.l0", ColumnType.LONG, 40L, null), + ImmutableList.of("2") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.l0", ColumnType.LONG, 9001L, null), + ImmutableList.of("4") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.l0", ColumnType.LONG, 9000L, null), + ImmutableList.of() + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.l0", ColumnType.LONG_ARRAY, ImmutableList.of(9001L), null), + ImmutableList.of("4") + ); + assertFilterMatches( + new ArrayContainsElementFilter("nested.l0", ColumnType.LONG_ARRAY, ImmutableList.of(40L, 9001L), null), + ImmutableList.of() + ); } } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java index 1106801729b..f4ecad4485b 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/BaseFilterTest.java @@ -41,6 +41,7 @@ import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.frame.FrameType; import org.apache.druid.frame.segment.FrameSegment; import org.apache.druid.frame.segment.FrameStorageAdapter; +import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.Intervals; @@ -78,6 +79,7 @@ import org.apache.druid.segment.RowAdapters; import org.apache.druid.segment.RowBasedColumnSelectorFactory; import org.apache.druid.segment.RowBasedStorageAdapter; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; @@ -99,6 +101,7 @@ import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.virtual.ListFilteredVirtualColumn; +import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; @@ -152,7 +155,13 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest new ListFilteredVirtualColumn("allow-dim0", DefaultDimensionSpec.of("dim0"), ImmutableSet.of("3", "4"), true), new ListFilteredVirtualColumn("deny-dim0", DefaultDimensionSpec.of("dim0"), ImmutableSet.of("3", "4"), false), new ListFilteredVirtualColumn("allow-dim2", DefaultDimensionSpec.of("dim2"), ImmutableSet.of("a"), true), - new ListFilteredVirtualColumn("deny-dim2", DefaultDimensionSpec.of("dim2"), ImmutableSet.of("a"), false) + new ListFilteredVirtualColumn("deny-dim2", DefaultDimensionSpec.of("dim2"), ImmutableSet.of("a"), false), + new NestedFieldVirtualColumn("nested", "$.s0", "nested.s0", ColumnType.STRING), + new NestedFieldVirtualColumn("nested", "$.d0", "nested.d0", ColumnType.DOUBLE), + new NestedFieldVirtualColumn("nested", "$.l0", "nested.l0", ColumnType.LONG), + new NestedFieldVirtualColumn("nested", "$.arrayLong", "nested.arrayLong", ColumnType.LONG_ARRAY), + new NestedFieldVirtualColumn("nested", "$.arrayDouble", "nested.arrayDouble", ColumnType.DOUBLE_ARRAY), + new NestedFieldVirtualColumn("nested", "$.arrayString", "nested.arrayString", ColumnType.STRING_ARRAY) ) ); @@ -178,6 +187,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest .add(new AutoTypeColumnSchema("arrayLong", ColumnType.LONG_ARRAY)) .add(new AutoTypeColumnSchema("arrayDouble", ColumnType.DOUBLE_ARRAY)) .add(new AutoTypeColumnSchema("variant", null)) + .add(new AutoTypeColumnSchema("nested", null)) .build() ); @@ -203,6 +213,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest .add("arrayLong", ColumnType.LONG_ARRAY) .add("arrayDouble", ColumnType.DOUBLE_ARRAY) .add("variant", ColumnType.STRING_ARRAY) + .add("nested", ColumnType.NESTED_DATA) .build(); static final List DEFAULT_ROWS = ImmutableList.of( @@ -218,7 +229,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest ImmutableList.of("a", "b", "c"), ImmutableList.of(1L, 2L, 3L), ImmutableList.of(1.1, 2.2, 3.3), - "abc" + "abc", + TestHelper.makeMapWithExplicitNull( + "s0", "", + "d0", 0.0, + "f0", 0.0f, + "l0", 0L, + "arrayString", ImmutableList.of("a", "b", "c"), + "arrayLong", ImmutableList.of(1L, 2L, 3L), + "arrayDouble", ImmutableList.of(1.1, 2.2, 3.3), + "variant", "abc" + ) ), makeDefaultSchemaRow( "1", @@ -232,7 +253,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest ImmutableList.of(), ImmutableList.of(), new Object[]{1.1, 2.2, 3.3}, - 100L + 100L, + TestHelper.makeMapWithExplicitNull( + "s0", "a", + "d0", 10.1, + "f0", 10.1f, + "l0", 100L, + "arrayString", ImmutableList.of(), + "arrayLong", ImmutableList.of(), + "arrayDouble", new Object[]{1.1, 2.2, 3.3}, + "variant", 100L + ) ), makeDefaultSchemaRow( "2", @@ -246,7 +277,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest null, new Object[]{1L, 2L, 3L}, Collections.singletonList(null), - "100" + "100", + TestHelper.makeMapWithExplicitNull( + "s0", "b", + "d0", null, + "f0", 5.5f, + "l0", 40L, + "arrayString", null, + "arrayLong", new Object[]{1L, 2L, 3L}, + "arrayDouble", Collections.singletonList(null), + "variant", "100" + ) ), makeDefaultSchemaRow( "3", @@ -260,7 +301,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest new Object[]{"a", "b", "c"}, null, ImmutableList.of(), - Arrays.asList(1.1, 2.2, 3.3) + Arrays.asList(1.1, 2.2, 3.3), + TestHelper.makeMapWithExplicitNull( + "s0", null, + "d0", 120.0245, + "f0", 110.0f, + "l0", null, + "arrayString", new Object[]{"a", "b", "c"}, + "arrayLong", null, + "arrayDouble", ImmutableList.of(), + "variant", Arrays.asList(1.1, 2.2, 3.3) + ) ), makeDefaultSchemaRow( "4", @@ -274,7 +325,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest ImmutableList.of("c", "d"), Collections.singletonList(null), new Object[]{-1.1, -333.3}, - 12.34 + 12.34, + TestHelper.makeMapWithExplicitNull( + "s0", "c", + "d0", 60.0, + "f0", null, + "l0", 9001L, + "arrayString", ImmutableList.of("c", "d"), + "arrayLong", Collections.singletonList(null), + "arrayDouble", new Object[]{-1.1, -333.3}, + "variant", 12.34 + ) ), makeDefaultSchemaRow( "5", @@ -288,7 +349,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest Collections.singletonList(null), new Object[]{123L, 345L}, null, - Arrays.asList(100, 200, 300) + Arrays.asList(100, 200, 300), + TestHelper.makeMapWithExplicitNull( + "s0", "a", + "d0", 765.432, + "f0", 123.45f, + "l0", 12345L, + "arrayString", Collections.singletonList(null), + "arrayLong", new Object[]{123L, 345L}, + "arrayDouble", null, + "variant", Arrays.asList(100, 200, 300) + ) ) ); @@ -372,6 +443,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest @Before public void setUp() throws Exception { + NestedDataModule.registerHandlersAndSerde(); String className = getClass().getName(); Map> adaptersForClass = adapterCache.get().get(className); if (adaptersForClass == null) { @@ -572,7 +644,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {}) ) .put("frame (row-based)", input -> { - // remove array type columns from frames since they aren't currently supported other than string + // remove variant type columns from row frames since they aren't currently supported input.mapSchema( schema -> new IncrementalIndexSchema( @@ -584,7 +656,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest schema.getDimensionsSpec() .getDimensions() .stream() - .filter(dimensionSchema -> !(dimensionSchema instanceof AutoTypeColumnSchema)) + .filter(dimensionSchema -> !dimensionSchema.getName().equals("variant")) .collect(Collectors.toList()) ), schema.getMetrics(), @@ -595,7 +667,7 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest return Pair.of(segment.asStorageAdapter(), segment); }) .put("frame (columnar)", input -> { - // remove array type columns from frames since they aren't currently supported other than string + // remove array type columns from columnar frames since they aren't currently supported input.mapSchema( schema -> new IncrementalIndexSchema( @@ -674,6 +746,14 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest return false; } + protected boolean canTestArrayColumns() + { + if (testName.contains("frame (columnar)") || testName.contains("rowBasedWithoutTypeSignature")) { + return false; + } + return true; + } + private Filter makeFilter(final DimFilter dimFilter) { if (dimFilter == null) { diff --git a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java index 6521d5cf957..9c2bd90071c 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java @@ -773,7 +773,7 @@ public class EqualityFilterTests @Test public void testNumeric() { - /* + /* dim0 d0 f0 l0 "0" .. 0.0, 0.0f, 0L "1" .. 10.1, 10.1f, 100L @@ -839,8 +839,7 @@ public class EqualityFilterTests @Test public void testArrays() { - // only auto schema supports array columns... skip other segment types - Assume.assumeTrue(isAutoSchema()); + Assume.assumeTrue(canTestArrayColumns()); /* dim0 .. arrayString arrayLong arrayDouble "0", .. ["a", "b", "c"], [1L, 2L, 3L], [1.1, 2.2, 3.3] @@ -1112,6 +1111,7 @@ public class EqualityFilterTests "5", .. [100, 200, 300] */ + // only auto well supports variant types Assume.assumeTrue(isAutoSchema()); assertFilterMatches( new EqualityFilter( @@ -1202,6 +1202,414 @@ public class EqualityFilterTests ImmutableList.of("5") ); } + + @Test + public void testNestedColumnEquality() + { + // nested column mirrors the top level columns, so these cases are copied from other tests + Assume.assumeTrue(canTestArrayColumns()); + + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + new EqualityFilter("nested.s0", ColumnType.STRING, "", null), + ImmutableList.of("0") + ); + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "", null)), + ImmutableList.of("1", "2", "4", "5") + ); + } + assertFilterMatches(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null), ImmutableList.of("1", "5")); + assertFilterMatches(new EqualityFilter("nested.s0", ColumnType.STRING, "b", null), ImmutableList.of("2")); + assertFilterMatches(new EqualityFilter("nested.s0", ColumnType.STRING, "c", null), ImmutableList.of("4")); + assertFilterMatches(new EqualityFilter("nested.s0", ColumnType.STRING, "noexist", null), ImmutableList.of()); + + if (NullHandling.sqlCompatible()) { + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null)), + ImmutableList.of("0", "2", "4") + ); + // "(s0 = 'a') is not true", same rows as "s0 <> 'a'", but also with null rows + assertFilterMatches( + NotDimFilter.of(IsTrueDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null))), + ImmutableList.of("0", "2", "3", "4") + ); + // "(s0 = 'a') is true", equivalent to "s0 = 'a'" + assertFilterMatches( + IsTrueDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null)), + ImmutableList.of("1", "5") + ); + // "(s0 = 'a') is false", equivalent results to "s0 <> 'a'" + assertFilterMatches( + IsFalseDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null)), + ImmutableList.of("0", "2", "4") + ); + // "(s0 = 'a') is not false", same rows as "s0 = 'a'", but also with null rows + assertFilterMatches( + NotDimFilter.of(IsFalseDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null))), + ImmutableList.of("1", "3", "5") + ); + + try { + // make sure if 3vl is disabled with behave with 2vl + NullHandling.initializeForTestsWithValues(false, false, null); + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null)), + ImmutableList.of("0", "2", "3", "4") + ); + } + finally { + NullHandling.initializeForTests(); + } + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "noexist", null)), + ImmutableList.of("0", "1", "2", "4", "5") + ); + } else { + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null)), + ImmutableList.of("0", "2", "3", "4") + ); + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "noexist", null)), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + // in default value mode, is true/is false are basically pointless since they have the same behavior as = and <> + // "(s0 = 'a') is not true" equivalent to "s0 <> 'a'" + assertFilterMatches( + NotDimFilter.of(IsTrueDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null))), + ImmutableList.of("0", "2", "3", "4") + ); + // "(s0 = 'a') is true", equivalent to "s0 = 'a'" + assertFilterMatches( + IsTrueDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null)), + ImmutableList.of("1", "5") + ); + // "(s0 = 'a') is false" equivalent to "s0 <> 'a'" + assertFilterMatches( + IsFalseDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null)), + ImmutableList.of("0", "2", "3", "4") + ); + // "(s0 = 'a') is not false", equivalent to "s0 = 'a'" + assertFilterMatches( + NotDimFilter.of(IsFalseDimFilter.of(new EqualityFilter("nested.s0", ColumnType.STRING, "a", null))), + ImmutableList.of("1", "5") + ); + } + + /* + dim0 d0 l0 + "0" .. 0.0, 0L + "1" .. 10.1, 100L + "2" .. null, 40L + "3" .. 120.0245, null + "4" .. 60.0, 9001L + "5" .. 765.432, 12345L + */ + + // nested columns do not coerce null to default values + + assertFilterMatches(new EqualityFilter("nested.d0", ColumnType.DOUBLE, 0.0, null), ImmutableList.of("0")); + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.d0", ColumnType.DOUBLE, 0.0, null)), + NullHandling.sqlCompatible() + ? ImmutableList.of("1", "3", "4", "5") + : ImmutableList.of("1", "2", "3", "4", "5") + ); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.LONG, 0L, null), ImmutableList.of("0")); + assertFilterMatches( + NotDimFilter.of(new EqualityFilter("nested.l0", ColumnType.LONG, 0L, null)), + NullHandling.sqlCompatible() + ? ImmutableList.of("1", "2", "4", "5") + : ImmutableList.of("1", "2", "3", "4", "5") + ); + + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.STRING, "0", null), ImmutableList.of("0")); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.STRING, "0", null), ImmutableList.of("0")); + + assertFilterMatches(new EqualityFilter("nested.d0", ColumnType.DOUBLE, 10.1, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("nested.d0", ColumnType.DOUBLE, 120.0245, null), ImmutableList.of("3")); + assertFilterMatches(new EqualityFilter("nested.d0", ColumnType.DOUBLE, 765.432, null), ImmutableList.of("5")); + assertFilterMatches(new EqualityFilter("nested.d0", ColumnType.DOUBLE, 765.431, null), ImmutableList.of()); + + // different type matcher + assertFilterMatches( + new EqualityFilter("nested.d0", ColumnType.LONG, 0L, null), + ImmutableList.of("0") + ); + assertFilterMatches(new EqualityFilter("d0", ColumnType.LONG, 60L, null), ImmutableList.of("4")); + + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.LONG, 100L, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.LONG, 40L, null), ImmutableList.of("2")); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.LONG, 9001L, null), ImmutableList.of("4")); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.LONG, 9000L, null), ImmutableList.of()); + + // test loss of precision + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.DOUBLE, 100.1, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.DOUBLE, 100.0, null), ImmutableList.of("1")); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.DOUBLE, 40.1, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.DOUBLE, 40.0, null), ImmutableList.of("2")); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.DOUBLE, 9001.1, null), ImmutableList.of()); + assertFilterMatches(new EqualityFilter("nested.l0", ColumnType.DOUBLE, 9001.0, null), ImmutableList.of("4")); + + /* + dim0 .. arrayString arrayLong arrayDouble + "0", .. ["a", "b", "c"], [1L, 2L, 3L], [1.1, 2.2, 3.3] + "1", .. [], [], [1.1, 2.2, 3.3] + "2", .. null, [1L, 2L, 3L], [null] + "3", .. ["a", "b", "c"], null, [] + "4", .. ["c", "d"], [null], [-1.1, -333.3] + "5", .. [null], [123L, 345L], null + */ + + assertFilterMatches( + new EqualityFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + ImmutableList.of("a", "b", "c"), + null + ), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + NotDimFilter.of( + new EqualityFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + ImmutableList.of("a", "b", "c"), + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("1", "4", "5") + : ImmutableList.of("1", "2", "4", "5") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + new Object[]{"a", "b", "c"}, + null + ), + ImmutableList.of("0", "3") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + ImmutableList.of(), + null + ), + ImmutableList.of("1") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + new Object[]{null}, + null + ), + ImmutableList.of("5") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + new Object[]{null, null}, + null + ), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of( + new EqualityFilter( + "nested.arrayString", + ColumnType.STRING_ARRAY, + new Object[]{null, null}, + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "3", "4", "5") + : ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + ImmutableList.of(1L, 2L, 3L), + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + NotDimFilter.of( + new EqualityFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + ImmutableList.of(1L, 2L, 3L), + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("1", "4", "5") + : ImmutableList.of("1", "3", "4", "5") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{1L, 2L, 3L}, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + ImmutableList.of(), + null + ), + ImmutableList.of("1") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{null}, + null + ), + ImmutableList.of("4") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{null, null}, + null + ), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of( + new EqualityFilter( + "nested.arrayLong", + ColumnType.LONG_ARRAY, + new Object[]{null, null}, + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "2", "4", "5") + : ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + // test loss of precision matching long arrays with double array match values + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.0, 2.0, 3.0}, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.1, 2.2, 3.3}, + null + ), + ImmutableList.of() + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{null}, + null + ), + ImmutableList.of("4") + ); + + + assertFilterMatches( + new EqualityFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.3), + null + ), + ImmutableList.of("0", "1") + ); + assertFilterMatches( + NotDimFilter.of( + new EqualityFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.3), + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("2", "3", "4") + : ImmutableList.of("2", "3", "4", "5") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.1, 2.2, 3.3}, + null + ), + ImmutableList.of("0", "1") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(), + null + ), + ImmutableList.of("3") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + new Object[]{null}, + null + ), + ImmutableList.of("2") + ); + assertFilterMatches( + new EqualityFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.4), + null + ), + ImmutableList.of() + ); + assertFilterMatches( + NotDimFilter.of( + new EqualityFilter( + "nested.arrayDouble", + ColumnType.DOUBLE_ARRAY, + ImmutableList.of(1.1, 2.2, 3.4), + null + ) + ), + NullHandling.sqlCompatible() + ? ImmutableList.of("0", "1", "2", "3", "4") + : ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } } public static class EqualityFilterNonParameterizedTests extends InitializedNullHandlingTest diff --git a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java index 5c981d81992..c21a2198193 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/RangeFilterTests.java @@ -43,6 +43,7 @@ import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.RangeFilter; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.AfterClass; @@ -78,7 +79,17 @@ public class RangeFilterTests 10L, new Object[]{"x", "y"}, new Object[]{100, 200}, - new Object[]{1.1, null, 3.3} + new Object[]{1.1, null, 3.3}, + null, + TestHelper.makeMapWithExplicitNull( + "s0", "d", + "d0", 6.6, + "f0", null, + "l0", 10L, + "arrayString", new Object[]{"x", "y"}, + "arrayLong", new Object[]{100, 200}, + "arrayDouble", new Object[]{1.1, null, 3.3} + ) )) .add(makeDefaultSchemaRow( "7", @@ -91,7 +102,17 @@ public class RangeFilterTests null, new Object[]{null, "hello", "world"}, new Object[]{1234, 3456L, null}, - new Object[]{1.23, 4.56, 6.78} + new Object[]{1.23, 4.56, 6.78}, + null, + TestHelper.makeMapWithExplicitNull( + "s0", "e", + "d0", null, + "f0", 3.0f, + "l0", null, + "arrayString", new Object[]{null, "hello", "world"}, + "arrayLong", new Object[]{1234, 3456L, null}, + "arrayDouble", new Object[]{1.23, 4.56, 6.78} + ) )) .build(); @@ -1628,6 +1649,196 @@ public class RangeFilterTests ImmutableList.of("1", "2", "5") ); } + + @Test + public void testNested() + { + // nested column mirrors the top level columns, so these cases are copied from other tests + Assume.assumeTrue(canTestArrayColumns()); + assertFilterMatches( + new RangeFilter("nested.d0", ColumnType.DOUBLE, 120.0, 120.03, false, false, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new RangeFilter("nested.d0", ColumnType.FLOAT, 120.02f, 120.03f, false, false, null), + ImmutableList.of("3") + ); + assertFilterMatches( + new RangeFilter("nested.d0", ColumnType.FLOAT, 59.5f, 60.01f, false, false, null), + ImmutableList.of("4") + ); + assertFilterMatches( + new RangeFilter("nested.l0", ColumnType.LONG, 12344L, 12346L, false, false, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new RangeFilter("nested.l0", ColumnType.DOUBLE, 12344.0, 12345.5, false, false, null), + ImmutableList.of("5") + ); + assertFilterMatches( + new RangeFilter("nested.l0", ColumnType.FLOAT, 12344.0f, 12345.5f, false, false, null), + ImmutableList.of("5") + ); + + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + null, + new Object[]{1.0, 2.0, 3.0}, + true, + false, + null + ), + ImmutableList.of("0", "1", "2", "4") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + null, + new Object[]{1.0, 2.0, 3.0}, + true, + true, + null + ), + ImmutableList.of("1", "4") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + null, + new Object[]{1.1, 2.1, 3.1}, + true, + true, + null + ), + ImmutableList.of("0", "1", "2", "4") + ); + + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.0, 2.0, 3.0}, + null, + false, + false, + null + ), + ImmutableList.of("0", "2", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{0.8, 1.8, 2.8}, + null, + false, + false, + null + ), + ImmutableList.of("0", "2", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{0.8, 1.8, 2.8}, + null, + true, + false, + null + ), + ImmutableList.of("0", "2", "5", "6", "7") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.0, 2.0, 3.0}, + null, + true, + true, + null + ), + ImmutableList.of("5", "6", "7") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.1, 2.1, 3.1}, + null, + false, + true, + null + ), + ImmutableList.of("5", "6", "7") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{1.1, 2.1, 3.1}, + null, + true, + true, + null + ), + ImmutableList.of("5", "6", "7") + ); + + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{0.8, 1.8, 2.8}, + new Object[]{1.1, 2.1, 3.1}, + true, + true, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{0.8, 1.8, 2.8}, + new Object[]{1.1, 2.1, 3.1}, + false, + true, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{0.8, 1.8, 2.8}, + new Object[]{1.1, 2.1, 3.1}, + true, + false, + null + ), + ImmutableList.of("0", "2") + ); + assertFilterMatches( + new RangeFilter( + "nested.arrayLong", + ColumnType.DOUBLE_ARRAY, + new Object[]{0.8, 1.8, 2.8}, + new Object[]{1.1, 2.1, 3.1}, + false, + false, + null + ), + ImmutableList.of("0", "2") + ); + } } public static class RangeFilterNonParameterizedTests extends InitializedNullHandlingTest diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java index a3ad411e2c7..8ed9ad23fdb 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java @@ -35,6 +35,8 @@ import org.apache.druid.segment.column.TypeStrategies; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.FixedIndexed; import org.apache.druid.segment.data.FixedIndexedWriter; +import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; +import org.apache.druid.segment.data.FrontCodedIntArrayIndexedWriter; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.GenericIndexedWriter; import org.apache.druid.segment.data.Indexed; @@ -86,6 +88,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT Supplier> globalStrings; Supplier> globalLongs; Supplier> globalDoubles; + Supplier globalArrays; @Before @@ -94,6 +97,8 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT ByteBuffer stringBuffer = ByteBuffer.allocate(1 << 12); ByteBuffer longBuffer = ByteBuffer.allocate(1 << 12).order(ByteOrder.nativeOrder()); ByteBuffer doubleBuffer = ByteBuffer.allocate(1 << 12).order(ByteOrder.nativeOrder()); + ByteBuffer arrayBuffer = ByteBuffer.allocate(1 << 12).order(ByteOrder.nativeOrder()); + GenericIndexedWriter stringWriter = new GenericIndexedWriter<>( new OnHeapMemorySegmentWriteOutMedium(), @@ -148,10 +153,19 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT doubleWriter.write(9.9); writeToBuffer(doubleBuffer, doubleWriter); + FrontCodedIntArrayIndexedWriter arrayWriter = new FrontCodedIntArrayIndexedWriter( + new OnHeapMemorySegmentWriteOutMedium(), + ByteOrder.nativeOrder(), + 4 + ); + arrayWriter.open(); + writeToBuffer(arrayBuffer, arrayWriter); + GenericIndexed strings = GenericIndexed.read(stringBuffer, GenericIndexed.UTF8_STRATEGY); globalStrings = () -> strings.singleThreaded(); globalLongs = FixedIndexed.read(longBuffer, TypeStrategies.LONG, ByteOrder.nativeOrder(), Long.BYTES); globalDoubles = FixedIndexed.read(doubleBuffer, TypeStrategies.DOUBLE, ByteOrder.nativeOrder(), Double.BYTES); + globalArrays = FrontCodedIntArrayIndexed.read(arrayBuffer, ByteOrder.nativeOrder()); } @Test @@ -1309,6 +1323,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT stringIndexed, longIndexed, doubleIndexed, + globalArrays, null, null, ROW_COUNT @@ -1509,6 +1524,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT globalStrings, globalLongs, globalDoubles, + globalArrays, null, null, ROW_COUNT @@ -1593,6 +1609,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT globalStrings, globalLongs, globalDoubles, + globalArrays, null, null, ROW_COUNT @@ -1673,6 +1690,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT globalStrings, globalLongs, globalDoubles, + globalArrays, null, null, ROW_COUNT @@ -1758,6 +1776,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT globalStrings, globalLongs, globalDoubles, + globalArrays, null, null, ROW_COUNT @@ -1838,6 +1857,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT globalStrings, globalLongs, globalDoubles, + globalArrays, null, null, ROW_COUNT @@ -1923,6 +1943,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT globalStrings, globalLongs, globalDoubles, + globalArrays, null, null, ROW_COUNT @@ -2018,6 +2039,7 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT globalStrings, globalLongs, globalDoubles, + globalArrays, null, null, ROW_COUNT