mirror of https://github.com/apache/druid.git
preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)
This commit is contained in:
parent
b95708f389
commit
887f8db1b5
|
@ -247,10 +247,19 @@ public class InputSourceSampler
|
|||
if (!SamplerInputRow.SAMPLER_ORDERING_COLUMN.equals(dimensionDesc.getName())) {
|
||||
final ColumnType columnType = dimensionDesc.getCapabilities().toColumnType();
|
||||
signatureBuilder.add(dimensionDesc.getName(), columnType);
|
||||
// for now, use legacy types instead of standard type
|
||||
// use explicitly specified dimension schema if it exists
|
||||
if (dataSchema != null &&
|
||||
dataSchema.getDimensionsSpec() != null &&
|
||||
dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()) != null) {
|
||||
logicalDimensionSchemas.add(dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()));
|
||||
} else {
|
||||
logicalDimensionSchemas.add(
|
||||
DimensionSchema.getDefaultSchemaForBuiltInType(dimensionDesc.getName(), dimensionDesc.getCapabilities())
|
||||
DimensionSchema.getDefaultSchemaForBuiltInType(
|
||||
dimensionDesc.getName(),
|
||||
dimensionDesc.getCapabilities()
|
||||
)
|
||||
);
|
||||
}
|
||||
physicalDimensionSchemas.add(
|
||||
dimensionDesc.getIndexer().getFormat().getColumnSchema(dimensionDesc.getName())
|
||||
);
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.druid.data.input.impl.TimestampSpec;
|
|||
import org.apache.druid.jackson.DefaultObjectMapper;
|
||||
import org.apache.druid.math.expr.ExpressionProcessing;
|
||||
import org.apache.druid.segment.AutoTypeColumnSchema;
|
||||
import org.apache.druid.segment.NestedDataDimensionSchema;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.column.RowSignature;
|
||||
import org.apache.druid.segment.indexing.DataSchema;
|
||||
|
@ -183,4 +184,118 @@ public class InputSourceSamplerDiscoveryTest extends InitializedNullHandlingTest
|
|||
ExpressionProcessing.initializeForTests();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTypesClassicDiscovery()
|
||||
{
|
||||
final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
|
||||
final DataSchema dataSchema = new DataSchema(
|
||||
"test",
|
||||
new TimestampSpec("t", null, null),
|
||||
DimensionsSpec.builder().build(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
final SamplerResponse response = inputSourceSampler.sample(
|
||||
inputSource,
|
||||
new JsonInputFormat(null, null, null, null, null),
|
||||
dataSchema,
|
||||
null
|
||||
);
|
||||
|
||||
Assert.assertEquals(6, response.getNumRowsRead());
|
||||
Assert.assertEquals(5, response.getNumRowsIndexed());
|
||||
Assert.assertEquals(6, response.getData().size());
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(
|
||||
new StringDimensionSchema("string"),
|
||||
new StringDimensionSchema("long"),
|
||||
new StringDimensionSchema("double"),
|
||||
new StringDimensionSchema("bool"),
|
||||
new StringDimensionSchema("variant"),
|
||||
new StringDimensionSchema("array")
|
||||
),
|
||||
response.getLogicalDimensions()
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
ImmutableList.of(
|
||||
new StringDimensionSchema("string"),
|
||||
new StringDimensionSchema("long"),
|
||||
new StringDimensionSchema("double"),
|
||||
new StringDimensionSchema("bool"),
|
||||
new StringDimensionSchema("variant"),
|
||||
new StringDimensionSchema("array")
|
||||
),
|
||||
response.getPhysicalDimensions()
|
||||
);
|
||||
Assert.assertEquals(
|
||||
RowSignature.builder()
|
||||
.addTimeColumn()
|
||||
.add("string", ColumnType.STRING)
|
||||
.add("long", ColumnType.STRING)
|
||||
.add("double", ColumnType.STRING)
|
||||
.add("bool", ColumnType.STRING)
|
||||
.add("variant", ColumnType.STRING)
|
||||
.add("array", ColumnType.STRING)
|
||||
.build(),
|
||||
response.getLogicalSegmentSchema()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTypesNoDiscoveryExplicitSchema()
|
||||
{
|
||||
final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
|
||||
final DataSchema dataSchema = new DataSchema(
|
||||
"test",
|
||||
new TimestampSpec("t", null, null),
|
||||
DimensionsSpec.builder().setDimensions(
|
||||
ImmutableList.of(new StringDimensionSchema("string"),
|
||||
new LongDimensionSchema("long"),
|
||||
new DoubleDimensionSchema("double"),
|
||||
new StringDimensionSchema("bool"),
|
||||
new NestedDataDimensionSchema("variant"),
|
||||
new NestedDataDimensionSchema("array"),
|
||||
new NestedDataDimensionSchema("nested")
|
||||
)
|
||||
).build(),
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
final SamplerResponse response = inputSourceSampler.sample(
|
||||
inputSource,
|
||||
new JsonInputFormat(null, null, null, null, null),
|
||||
dataSchema,
|
||||
null
|
||||
);
|
||||
|
||||
Assert.assertEquals(6, response.getNumRowsRead());
|
||||
Assert.assertEquals(5, response.getNumRowsIndexed());
|
||||
Assert.assertEquals(6, response.getData().size());
|
||||
Assert.assertEquals(
|
||||
dataSchema.getDimensionsSpec().getDimensions(),
|
||||
response.getLogicalDimensions()
|
||||
);
|
||||
|
||||
Assert.assertEquals(
|
||||
dataSchema.getDimensionsSpec().getDimensions(),
|
||||
response.getPhysicalDimensions()
|
||||
);
|
||||
Assert.assertEquals(
|
||||
RowSignature.builder()
|
||||
.addTimeColumn()
|
||||
.add("string", ColumnType.STRING)
|
||||
.add("long", ColumnType.LONG)
|
||||
.add("double", ColumnType.DOUBLE)
|
||||
.add("bool", ColumnType.STRING)
|
||||
.add("variant", ColumnType.NESTED_DATA)
|
||||
.add("array", ColumnType.NESTED_DATA)
|
||||
.add("nested", ColumnType.NESTED_DATA)
|
||||
.build(),
|
||||
response.getLogicalSegmentSchema()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue