mirror of https://github.com/apache/druid.git
fix NestedCommonFormatColumnHandler to use nullable comparator when castToType is set (#15921)
Fixes a bug when the undocumented castToType parameter is set on 'auto' column schema, which should have been using the 'nullable' comparator to allow null values to be present when merging columns, but wasn't which would lead to null pointer exceptions. Also fixes an issue I noticed while adding tests that if 'FLOAT' type was specified for the castToType parameter it would be an exception because that type is not expected to be present, since 'auto' uses the native expressions to determine the input types and expressions don't have direct support for floats, only doubles. In the future I should probably split this functionality out of the 'auto' schema (maybe even have a simpler version of the auto indexer dedicated to handling non-nested data) but still have the same results of writing out the newer 'nested common format' columns used by 'auto', but I haven't taken that on in this PR.
This commit is contained in:
parent
80942d5754
commit
cc5964fbcb
|
@ -76,8 +76,15 @@ public class AutoTypeColumnSchema extends DimensionSchema
|
|||
)
|
||||
{
|
||||
super(name, null, true);
|
||||
// auto doesn't currently do FLOAT since expressions only support DOUBLE
|
||||
if (ColumnType.FLOAT.equals(castToType)) {
|
||||
this.castToType = ColumnType.DOUBLE;
|
||||
} else if (ColumnType.FLOAT_ARRAY.equals(castToType)) {
|
||||
this.castToType = ColumnType.DOUBLE_ARRAY;
|
||||
} else {
|
||||
this.castToType = castToType;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTypeName()
|
||||
|
|
|
@ -99,8 +99,9 @@ public class NestedCommonFormatColumnHandler implements DimensionHandler<Structu
|
|||
public Comparator<ColumnValueSelector> getEncodedValueSelectorComparator()
|
||||
{
|
||||
if (castTo != null) {
|
||||
final Comparator<Object> typeComparator = castTo.getNullableStrategy();
|
||||
return (s1, s2) ->
|
||||
castTo.getStrategy().compare(
|
||||
typeComparator.compare(
|
||||
StructuredData.unwrap(s1.getObject()),
|
||||
StructuredData.unwrap(s2.getObject())
|
||||
);
|
||||
|
|
|
@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.Module;
|
|||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.guice.NestedDataModule;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
|
@ -42,6 +43,7 @@ import org.apache.druid.query.filter.NullFilter;
|
|||
import org.apache.druid.query.filter.SelectorDimFilter;
|
||||
import org.apache.druid.query.ordering.StringComparators;
|
||||
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||
import org.apache.druid.segment.AutoTypeColumnSchema;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.Segment;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
|
@ -546,6 +548,69 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest
|
|||
Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(), resultsSegments.get(0).getEvents().toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIngestAndScanSegmentsRealtimeAutoExplicit() throws Exception
|
||||
{
|
||||
DimensionsSpec spec = DimensionsSpec.builder()
|
||||
.setDimensions(
|
||||
ImmutableList.of(
|
||||
new AutoTypeColumnSchema("str", ColumnType.STRING),
|
||||
new AutoTypeColumnSchema("long", ColumnType.LONG),
|
||||
new AutoTypeColumnSchema("double", ColumnType.FLOAT)
|
||||
)
|
||||
)
|
||||
.build();
|
||||
Query<ScanResultValue> scanQuery = Druids.newScanQueryBuilder()
|
||||
.dataSource("test_datasource")
|
||||
.intervals(
|
||||
new MultipleIntervalSegmentSpec(
|
||||
Collections.singletonList(Intervals.ETERNITY)
|
||||
)
|
||||
)
|
||||
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
|
||||
.limit(100)
|
||||
.context(ImmutableMap.of())
|
||||
.build();
|
||||
List<Segment> realtimeSegs = ImmutableList.of(
|
||||
NestedDataTestUtils.createIncrementalIndex(
|
||||
tempFolder,
|
||||
NestedDataTestUtils.TYPES_DATA_FILE,
|
||||
NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
|
||||
NestedDataTestUtils.TIMESTAMP_SPEC,
|
||||
spec,
|
||||
TransformSpec.NONE,
|
||||
NestedDataTestUtils.COUNT,
|
||||
Granularities.DAY,
|
||||
true
|
||||
)
|
||||
);
|
||||
List<Segment> segs = NestedDataTestUtils.createSegments(
|
||||
tempFolder,
|
||||
closer,
|
||||
NestedDataTestUtils.TYPES_DATA_FILE,
|
||||
NestedDataTestUtils.DEFAULT_JSON_INPUT_FORMAT,
|
||||
NestedDataTestUtils.TIMESTAMP_SPEC,
|
||||
spec,
|
||||
TransformSpec.NONE,
|
||||
NestedDataTestUtils.COUNT,
|
||||
Granularities.DAY,
|
||||
true,
|
||||
IndexSpec.DEFAULT
|
||||
);
|
||||
|
||||
|
||||
final Sequence<ScanResultValue> seq = helper.runQueryOnSegmentsObjs(realtimeSegs, scanQuery);
|
||||
final Sequence<ScanResultValue> seq2 = helper.runQueryOnSegmentsObjs(segs, scanQuery);
|
||||
|
||||
List<ScanResultValue> resultsRealtime = seq.toList();
|
||||
List<ScanResultValue> resultsSegments = seq2.toList();
|
||||
logResults(resultsSegments);
|
||||
logResults(resultsRealtime);
|
||||
Assert.assertEquals(1, resultsRealtime.size());
|
||||
Assert.assertEquals(resultsRealtime.size(), resultsSegments.size());
|
||||
Assert.assertEquals(resultsRealtime.get(0).getEvents().toString(), resultsSegments.get(0).getEvents().toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIngestAndScanSegmentsRealtimeSchemaDiscoveryArrayTypes() throws Exception
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue