fix NestedCommonFormatColumnHandler to use nullable comparator when castToType is set (#15921)

Fixes a bug when the undocumented castToType parameter is set on 'auto' column schema, which should have been using the 'nullable' comparator to allow null values to be present when merging columns, but wasn't which would lead to null pointer exceptions. Also fixes an issue I noticed while adding tests that if 'FLOAT' type was specified for the castToType parameter it would be an exception because that type is not expected to be present, since 'auto' uses the native expressions to determine the input types and expressions don't have direct support for floats, only doubles.

In the future I should probably split this functionality out of the 'auto' schema (maybe even have a simpler version of the auto indexer dedicated to handling non-nested data) but still have the same results of writing out the newer 'nested common format' columns used by 'auto', but I haven't taken that on in this PR.
This commit is contained in:
Clint Wylie 2024-02-22 08:05:50 -08:00 committed by GitHub
parent 80942d5754
commit cc5964fbcb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 75 additions and 2 deletions

View File

@ -76,7 +76,14 @@ public class AutoTypeColumnSchema extends DimensionSchema
)
{
super(name, null, true);
this.castToType = castToType;
// auto doesn't currently do FLOAT since expressions only support DOUBLE
if (ColumnType.FLOAT.equals(castToType)) {
this.castToType = ColumnType.DOUBLE;
} else if (ColumnType.FLOAT_ARRAY.equals(castToType)) {
this.castToType = ColumnType.DOUBLE_ARRAY;
} else {
this.castToType = castToType;
}
}
@Override

View File

@ -99,8 +99,9 @@ public class NestedCommonFormatColumnHandler implements DimensionHandler<Structu
public Comparator<ColumnValueSelector> getEncodedValueSelectorComparator()
{
if (castTo != null) {
final Comparator<Object> typeComparator = castTo.getNullableStrategy();
return (s1, s2) ->
castTo.getStrategy().compare(
typeComparator.compare(
StructuredData.unwrap(s1.getObject()),
StructuredData.unwrap(s2.getObject())
);

View File

@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.Module;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.guice.NestedDataModule;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.granularity.Granularities;
@ -42,6 +43,7 @@ import org.apache.druid.query.filter.NullFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.column.ColumnType;
@ -546,6 +548,69 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest
Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(), resultsSegments.get(0).getEvents().toString());
}
@Test
public void testIngestAndScanSegmentsRealtimeAutoExplicit() throws Exception
{
DimensionsSpec spec = DimensionsSpec.builder()
.setDimensions(
ImmutableList.of(
new AutoTypeColumnSchema("str", ColumnType.STRING),
new AutoTypeColumnSchema("long", ColumnType.LONG),
new AutoTypeColumnSchema("double", ColumnType.FLOAT)
)
)
.build();
Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
.dataSource("test_datasource")
.intervals(
new MultipleIntervalSegmentSpec(
Collections.singletonList(Intervals.ETERNITY)
)
)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.limit(100)
.context(ImmutableMap.of())
.build();
List<Segment> realtimeSegs = ImmutableList.of(
NestedDataTestUtils.createIncrementalIndex(
tempFolder,
NestedDataTestUtils.TYPES_DATA_FILE,
NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
NestedDataTestUtils.TIMESTAMP_SPEC,
spec,
TransformSpec.NONE,
NestedDataTestUtils.COUNT,
Granularities.DAY,
true
)
);
List<Segment> segs = NestedDataTestUtils.createSegments(
tempFolder,
closer,
NestedDataTestUtils.TYPES_DATA_FILE,
NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
NestedDataTestUtils.TIMESTAMP_SPEC,
spec,
TransformSpec.NONE,
NestedDataTestUtils.COUNT,
Granularities.DAY,
true,
IndexSpec.DEFAULT
);
final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
final Sequence<ScanResultValue> seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery);
List<ScanResultValue> resultsRealtime = seq.toList();
List<ScanResultValue> resultsSegments = seq2.toList();
logResults(resultsSegments);
logResults(resultsRealtime);
Assert.assertEquals(1, resultsRealtime.size());
Assert.assertEquals(resultsRealtime.size(), resultsSegments.size());
Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(), resultsSegments.get(0).getEvents().toString());
}
@Test
public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryArrayTypes() throws Exception
{