diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java index ee8251edb27..f0785d85587 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSupplier.java @@ -26,6 +26,7 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.StringEncodingStrategies; import org.apache.druid.segment.data.BitmapSerdeFactory; @@ -35,6 +36,8 @@ import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.VByte; +import org.apache.druid.segment.index.SimpleImmutableBitmapIndex; +import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde; import javax.annotation.Nullable; @@ -42,7 +45,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; -public class NestedDataColumnSupplier implements Supplier +public class NestedDataColumnSupplier implements Supplier, ColumnIndexSupplier { public static NestedDataColumnSupplier read( ColumnType logicalType, @@ -242,4 +245,14 @@ public class NestedDataColumnSupplier implements Supplier T as(Class clazz) + { + if (clazz.equals(NullValueIndex.class)) { + return (T) (NullValueIndex) () -> new SimpleImmutableBitmapIndex(nullValues); + } + return null; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java index 8353fd07ceb..55ec10466e9 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java @@ -22,6 +22,7 @@ package org.apache.druid.segment.serde; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; @@ -308,6 +309,14 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType; builder.setType(logicalType); builder.setNestedCommonFormatColumnSupplier(supplier); + // in default value mode, SQL planning by default uses selector filters for things like 'is null', which does + // not work correctly for complex types (or arrays). so, only hook up this index in sql compatible mode so that + // query results are consistent when using an index or the value matcher + // additionally, nested columns only have a null value index, so we only bother with the index supplier if there + // are actually any null rows, otherwise we use the default 'no indexes' supplier + if (NullHandling.sqlCompatible() && hasNulls) { + builder.setIndexSupplier(supplier, false, false); + } builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, hasNulls, enforceLogicalType)); } } diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index cedef264a53..c6e3cd17438 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -917,6 +917,74 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest Assert.assertEquals(results.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); } + @Test + public void testIngestAndScanSegmentsNestedColumnNotNullFilter() throws Exception + { + Druids.ScanQueryBuilder builder = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .filters(NotDimFilter.of(NullFilter.forColumn("complexObj"))) + .columns("complexObj") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()); + Query scanQuery = builder.build(); + final AggregatorFactory[] aggs = new AggregatorFactory[]{new CountAggregatorFactory("count")}; + List realtimeSegs = ImmutableList.of( + NestedDataTestUtils.createIncrementalIndex( + tempFolder, + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE, + NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, + NestedDataTestUtils.TIMESTAMP_SPEC, + NestedDataTestUtils.AUTO_DISCOVERY, + TransformSpec.NONE, + aggs, + Granularities.NONE, + true + ) + ); + List segs = NestedDataTestUtils.createSegments( + tempFolder, + closer, + NestedDataTestUtils.ALL_TYPES_TEST_DATA_FILE, + NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, + NestedDataTestUtils.TIMESTAMP_SPEC, + NestedDataTestUtils.AUTO_DISCOVERY, + TransformSpec.NONE, + aggs, + Granularities.NONE, + true, + IndexSpec.DEFAULT + ); + + + final Sequence seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery); + final Sequence seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery); + + List resultsRealtime = seq.toList(); + List resultsSegments = seq2.toList(); + logResults(resultsSegments); + logResults(resultsRealtime); + Assert.assertEquals(1, resultsRealtime.size()); + Assert.assertEquals(resultsRealtime.size(), resultsSegments.size()); + if (NullHandling.replaceWithDefault()) { + Assert.assertEquals( + "[[{x=400, y=[{l=[null], m=100, n=5}, {l=[a, b, c], m=a, n=1}], z={}}], [{x=10, y=[{l=[b, b, c], m=b, n=2}, [1, 2, 3]], z={a=[5.5], b=false}}], [{x=1234, y=[{l=[a, b, c], m=a, n=1}, {l=[a, b, c], m=a, n=1}], z={a=[1.1, 2.2, 3.3], b=true}}], [{x=1234, z={a=[1.1, 2.2, 3.3], b=true}}], [{x=11, y=[], z={a=[null], b=false}}], [{x=4.4, y=[{l=[], m=100, n=3}, {l=[a]}, {l=[b], n=[]}], z={a=[], b=true}}]]", + resultsSegments.get(0).getEvents().toString() + ); + } else { + Assert.assertEquals( + "[[{x=400, y=[{l=[null], m=100, n=5}, {l=[a, b, c], m=a, n=1}], z={}}], [{x=10, y=[{l=[b, b, c], m=b, n=2}, [1, 2, 3]], z={a=[5.5], b=false}}], [{x=1234, y=[{l=[a, b, c], m=a, n=1}, {l=[a, b, c], m=a, n=1}], z={a=[1.1, 2.2, 3.3], b=true}}], [{x=1234, z={a=[1.1, 2.2, 3.3], b=true}}], [{x=11, y=[], z={a=[null], b=false}}], [{x=4.4, y=[{l=[], m=100, n=3}, {l=[a]}, {l=[b], n=[]}], z={a=[], b=true}}]]", + resultsSegments.get(0).getEvents().toString() + ); + } + Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); + } + private static void logResults(List results) { StringBuilder bob = new StringBuilder();