From cc5964fbcb88cbd742936ba249c007900ce7c726 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 22 Feb 2024 08:05:50 -0800 Subject: [PATCH] fix NestedCommonFormatColumnHandler to use nullable comparator when castToType is set (#15921) Fixes a bug when the undocumented castToType parameter is set on 'auto' column schema, which should have been using the 'nullable' comparator to allow null values to be present when merging columns, but wasn't which would lead to null pointer exceptions. Also fixes an issue I noticed while adding tests that if 'FLOAT' type was specified for the castToType parameter it would be an exception because that type is not expected to be present, since 'auto' uses the native expressions to determine the input types and expressions don't have direct support for floats, only doubles. In the future I should probably split this functionality out of the 'auto' schema (maybe even have a simpler version of the auto indexer dedicated to handling non-nested data) but still have the same results of writing out the newer 'nested common format' columns used by 'auto', but I haven't taken that on in this PR. --- .../druid/segment/AutoTypeColumnSchema.java | 9 ++- .../NestedCommonFormatColumnHandler.java | 3 +- .../query/scan/NestedDataScanQueryTest.java | 65 +++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java index a72ae37d874..9959c9ef249 100644 --- a/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java +++ b/processing/src/main/java/org/apache/druid/segment/AutoTypeColumnSchema.java @@ -76,7 +76,14 @@ public class AutoTypeColumnSchema extends DimensionSchema ) { super(name, null, true); - this.castToType = castToType; + // auto doesn't currently do FLOAT since expressions only support DOUBLE + if (ColumnType.FLOAT.equals(castToType)) { + this.castToType = ColumnType.DOUBLE; + } else if (ColumnType.FLOAT_ARRAY.equals(castToType)) { + this.castToType = ColumnType.DOUBLE_ARRAY; + } else { + this.castToType = castToType; + } } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java b/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java index 5e811e22995..e679b2d2d9c 100644 --- a/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/NestedCommonFormatColumnHandler.java @@ -99,8 +99,9 @@ public class NestedCommonFormatColumnHandler implements DimensionHandler getEncodedValueSelectorComparator() { if (castTo != null) { + final Comparator typeComparator = castTo.getNullableStrategy(); return (s1, s2) -> - castTo.getStrategy().compare( + typeComparator.compare( StructuredData.unwrap(s1.getObject()), StructuredData.unwrap(s2.getObject()) ); diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index c6e3cd17438..64c68c943f5 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.Module; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; @@ -42,6 +43,7 @@ import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; +import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.Segment; import org.apache.druid.segment.column.ColumnType; @@ -546,6 +548,69 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(), resultsSegments.get(0).getEvents().toString()); } + @Test + public void testIngestAndScanSegmentsRealtimeAutoExplicit() throws Exception + { + DimensionsSpec spec = DimensionsSpec.builder() + .setDimensions( + ImmutableList.of( + new AutoTypeColumnSchema("str", ColumnType.STRING), + new AutoTypeColumnSchema("long", ColumnType.LONG), + new AutoTypeColumnSchema("double", ColumnType.FLOAT) + ) + ) + .build(); + Query scanQuery = Druids.newScanQueryBuilder() + .dataSource("test_datasource") + .intervals( + new MultipleIntervalSegmentSpec( + Collections.singletonList(Intervals.ETERNITY) + ) + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .limit(100) + .context(ImmutableMap.of()) + .build(); + List realtimeSegs = ImmutableList.of( + NestedDataTestUtils.createIncrementalIndex( + tempFolder, + NestedDataTestUtils.TYPES_DATA_FILE, + NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, + NestedDataTestUtils.TIMESTAMP_SPEC, + spec, + TransformSpec.NONE, + NestedDataTestUtils.COUNT, + Granularities.DAY, + true + ) + ); + List segs = NestedDataTestUtils.createSegments( + tempFolder, + closer, + NestedDataTestUtils.TYPES_DATA_FILE, + NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT, + NestedDataTestUtils.TIMESTAMP_SPEC, + spec, + TransformSpec.NONE, + NestedDataTestUtils.COUNT, + Granularities.DAY, + true, + IndexSpec.DEFAULT + ); + + + final Sequence seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery); + final Sequence seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery); + + List resultsRealtime = seq.toList(); + List resultsSegments = seq2.toList(); + logResults(resultsSegments); + logResults(resultsRealtime); + Assert.assertEquals(1, resultsRealtime.size()); + Assert.assertEquals(resultsRealtime.size(), resultsSegments.size()); + Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(), resultsSegments.get(0).getEvents().toString()); + } + @Test public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryArrayTypes() throws Exception {