From 29652bd2460768dad9eb3c51a9995f988528b136 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Apr 2023 19:04:51 -0700 Subject: [PATCH] fix NPE that can happen when merging all null nested v4 format columns (#14068) --- .../CompressedNestedDataComplexColumn.java | 5 ++- .../druid/query/NestedDataTestUtils.java | 45 +++++++++++++++---- .../query/scan/NestedDataScanQueryTest.java | 29 ++++++++++++ 3 files changed, 69 insertions(+), 10 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index d71f3fa9fc3..fb051f74b71 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -134,7 +134,7 @@ public abstract class CompressedNestedDataComplexColumn stringDictionary, Supplier> longDictionarySupplier, Supplier> doubleDictionarySupplier, - Supplier arrayDictionarySupplier, + @Nullable Supplier arrayDictionarySupplier, SmooshedFileMapper fileMapper, BitmapSerdeFactory bitmapSerdeFactory, ByteOrder byteOrder, @@ -220,6 +220,9 @@ public abstract class CompressedNestedDataComplexColumn getArrayDictionary() { + if (arrayDictionarySupplier == null) { + return Indexed.empty(); + } Iterable arrays = () -> { final TStringDictionary stringDictionary = stringDictionarySupplier.get(); final FixedIndexed longDictionary = longDictionarySupplier.get(); diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 9a43c77b9c2..a4801be561b 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -40,6 +40,7 @@ import org.apache.druid.java.util.common.parsers.JSONPathSpec; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.IndexSpec; @@ -92,7 +93,7 @@ public class NestedDataTestUtils .useSchemaDiscovery(true) .build(); - public static final DimensionsSpec TSV_SCHEMA = + public static final DimensionsSpec TSV_V4_SCHEMA = DimensionsSpec.builder() .setDimensions( Arrays.asList( @@ -100,7 +101,22 @@ public class NestedDataTestUtils new NestedDataDimensionSchema("nest_json"), new NestedDataDimensionSchema("nester_json"), new NestedDataDimensionSchema("variant_json"), - new NestedDataDimensionSchema("list_json") + new NestedDataDimensionSchema("list_json"), + new NestedDataDimensionSchema("nonexistent") + ) + ) + .build(); + + public static final DimensionsSpec TSV_SCHEMA = + DimensionsSpec.builder() + .setDimensions( + Arrays.asList( + new AutoTypeColumnSchema("dim"), + new AutoTypeColumnSchema("nest_json"), + new AutoTypeColumnSchema("nester_json"), + new AutoTypeColumnSchema("variant_json"), + new AutoTypeColumnSchema("list_json"), + new AutoTypeColumnSchema("nonexistent") ) ) .build(); @@ -110,12 +126,6 @@ public class NestedDataTestUtils null ); - public static final InputRowSchema SIMPLE_DATA_TSV_SCHEMA = new InputRowSchema( - TIMESTAMP_SPEC, - TSV_SCHEMA, - null - ); - public static DelimitedInputFormat SIMPLE_DATA_TSV_INPUT_FORMAT = new DelimitedInputFormat( Arrays.asList( "timestamp", @@ -161,6 +171,22 @@ public class NestedDataTestUtils tempFolder, closer, Granularities.NONE, + TSV_SCHEMA, + true + ); + } + + public static List createSimpleSegmentsTsvV4( + TemporaryFolder tempFolder, + Closer closer + ) + throws Exception + { + return createSimpleNestedTestDataTsvSegments( + tempFolder, + closer, + Granularities.NONE, + TSV_V4_SCHEMA, true ); } @@ -169,6 +195,7 @@ public class NestedDataTestUtils TemporaryFolder tempFolder, Closer closer, Granularity granularity, + DimensionsSpec dimensionsSpec, boolean rollup ) throws Exception { @@ -178,7 +205,7 @@ public class NestedDataTestUtils SIMPLE_DATA_TSV_FILE, SIMPLE_DATA_TSV_INPUT_FORMAT, TIMESTAMP_SPEC, - SIMPLE_DATA_TSV_SCHEMA.getDimensionsSpec(), + dimensionsSpec, SIMPLE_DATA_TSV_TRANSFORM, COUNT, granularity, diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index adc379b38b1..ff469de7bcb 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -262,6 +262,35 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest Assert.assertEquals(resultsSegments.get(0).getEvents().toString(), resultsRealtime.get(0).getEvents().toString()); } + @Test + public void testIngestAndScanSegmentsTsvV4() throws Exception + { + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .virtualColumns( + new NestedFieldVirtualColumn("nest", "$.x", "x"), + new NestedFieldVirtualColumn("nester", "$.x[0]", "x_0"), + new NestedFieldVirtualColumn("nester", "$.y.c[1]", "y_c_1") + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + List segs = NestedDataTestUtils.createSimpleSegmentsTsvV4(tempFolder, closer); + + final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(8, ((List) results.get(0).getEvents()).size()); + logResults(results); + } + @Test public void testIngestAndScanSegmentsTsv() throws Exception {