mirror of https://github.com/apache/druid.git
preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)
This commit is contained in:
parent
b95708f389
commit
887f8db1b5
|
@ -247,10 +247,19 @@ public class InputSourceSampler
|
||||||
if (!SamplerInputRow.SAMPLER_ORDERING_COLUMN.equals(dimensionDesc.getName())) {
|
if (!SamplerInputRow.SAMPLER_ORDERING_COLUMN.equals(dimensionDesc.getName())) {
|
||||||
final ColumnType columnType = dimensionDesc.getCapabilities().toColumnType();
|
final ColumnType columnType = dimensionDesc.getCapabilities().toColumnType();
|
||||||
signatureBuilder.add(dimensionDesc.getName(), columnType);
|
signatureBuilder.add(dimensionDesc.getName(), columnType);
|
||||||
// for now, use legacy types instead of standard type
|
// use explicitly specified dimension schema if it exists
|
||||||
|
if (dataSchema != null &&
|
||||||
|
dataSchema.getDimensionsSpec() != null &&
|
||||||
|
dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()) != null) {
|
||||||
|
logicalDimensionSchemas.add(dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()));
|
||||||
|
} else {
|
||||||
logicalDimensionSchemas.add(
|
logicalDimensionSchemas.add(
|
||||||
DimensionSchema.getDefaultSchemaForBuiltInType(dimensionDesc.getName(), dimensionDesc.getCapabilities())
|
DimensionSchema.getDefaultSchemaForBuiltInType(
|
||||||
|
dimensionDesc.getName(),
|
||||||
|
dimensionDesc.getCapabilities()
|
||||||
|
)
|
||||||
);
|
);
|
||||||
|
}
|
||||||
physicalDimensionSchemas.add(
|
physicalDimensionSchemas.add(
|
||||||
dimensionDesc.getIndexer().getFormat().getColumnSchema(dimensionDesc.getName())
|
dimensionDesc.getIndexer().getFormat().getColumnSchema(dimensionDesc.getName())
|
||||||
);
|
);
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.druid.data.input.impl.TimestampSpec;
|
||||||
import org.apache.druid.jackson.DefaultObjectMapper;
|
import org.apache.druid.jackson.DefaultObjectMapper;
|
||||||
import org.apache.druid.math.expr.ExpressionProcessing;
|
import org.apache.druid.math.expr.ExpressionProcessing;
|
||||||
import org.apache.druid.segment.AutoTypeColumnSchema;
|
import org.apache.druid.segment.AutoTypeColumnSchema;
|
||||||
|
import org.apache.druid.segment.NestedDataDimensionSchema;
|
||||||
import org.apache.druid.segment.column.ColumnType;
|
import org.apache.druid.segment.column.ColumnType;
|
||||||
import org.apache.druid.segment.column.RowSignature;
|
import org.apache.druid.segment.column.RowSignature;
|
||||||
import org.apache.druid.segment.indexing.DataSchema;
|
import org.apache.druid.segment.indexing.DataSchema;
|
||||||
|
@ -183,4 +184,118 @@ public class InputSourceSamplerDiscoveryTest extends InitializedNullHandlingTest
|
||||||
ExpressionProcessing.initializeForTests();
|
ExpressionProcessing.initializeForTests();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTypesClassicDiscovery()
|
||||||
|
{
|
||||||
|
final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
|
||||||
|
final DataSchema dataSchema = new DataSchema(
|
||||||
|
"test",
|
||||||
|
new TimestampSpec("t", null, null),
|
||||||
|
DimensionsSpec.builder().build(),
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
final SamplerResponse response = inputSourceSampler.sample(
|
||||||
|
inputSource,
|
||||||
|
new JsonInputFormat(null, null, null, null, null),
|
||||||
|
dataSchema,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(6, response.getNumRowsRead());
|
||||||
|
Assert.assertEquals(5, response.getNumRowsIndexed());
|
||||||
|
Assert.assertEquals(6, response.getData().size());
|
||||||
|
Assert.assertEquals(
|
||||||
|
ImmutableList.of(
|
||||||
|
new StringDimensionSchema("string"),
|
||||||
|
new StringDimensionSchema("long"),
|
||||||
|
new StringDimensionSchema("double"),
|
||||||
|
new StringDimensionSchema("bool"),
|
||||||
|
new StringDimensionSchema("variant"),
|
||||||
|
new StringDimensionSchema("array")
|
||||||
|
),
|
||||||
|
response.getLogicalDimensions()
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
ImmutableList.of(
|
||||||
|
new StringDimensionSchema("string"),
|
||||||
|
new StringDimensionSchema("long"),
|
||||||
|
new StringDimensionSchema("double"),
|
||||||
|
new StringDimensionSchema("bool"),
|
||||||
|
new StringDimensionSchema("variant"),
|
||||||
|
new StringDimensionSchema("array")
|
||||||
|
),
|
||||||
|
response.getPhysicalDimensions()
|
||||||
|
);
|
||||||
|
Assert.assertEquals(
|
||||||
|
RowSignature.builder()
|
||||||
|
.addTimeColumn()
|
||||||
|
.add("string", ColumnType.STRING)
|
||||||
|
.add("long", ColumnType.STRING)
|
||||||
|
.add("double", ColumnType.STRING)
|
||||||
|
.add("bool", ColumnType.STRING)
|
||||||
|
.add("variant", ColumnType.STRING)
|
||||||
|
.add("array", ColumnType.STRING)
|
||||||
|
.build(),
|
||||||
|
response.getLogicalSegmentSchema()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTypesNoDiscoveryExplicitSchema()
|
||||||
|
{
|
||||||
|
final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
|
||||||
|
final DataSchema dataSchema = new DataSchema(
|
||||||
|
"test",
|
||||||
|
new TimestampSpec("t", null, null),
|
||||||
|
DimensionsSpec.builder().setDimensions(
|
||||||
|
ImmutableList.of(new StringDimensionSchema("string"),
|
||||||
|
new LongDimensionSchema("long"),
|
||||||
|
new DoubleDimensionSchema("double"),
|
||||||
|
new StringDimensionSchema("bool"),
|
||||||
|
new NestedDataDimensionSchema("variant"),
|
||||||
|
new NestedDataDimensionSchema("array"),
|
||||||
|
new NestedDataDimensionSchema("nested")
|
||||||
|
)
|
||||||
|
).build(),
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
final SamplerResponse response = inputSourceSampler.sample(
|
||||||
|
inputSource,
|
||||||
|
new JsonInputFormat(null, null, null, null, null),
|
||||||
|
dataSchema,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(6, response.getNumRowsRead());
|
||||||
|
Assert.assertEquals(5, response.getNumRowsIndexed());
|
||||||
|
Assert.assertEquals(6, response.getData().size());
|
||||||
|
Assert.assertEquals(
|
||||||
|
dataSchema.getDimensionsSpec().getDimensions(),
|
||||||
|
response.getLogicalDimensions()
|
||||||
|
);
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
dataSchema.getDimensionsSpec().getDimensions(),
|
||||||
|
response.getPhysicalDimensions()
|
||||||
|
);
|
||||||
|
Assert.assertEquals(
|
||||||
|
RowSignature.builder()
|
||||||
|
.addTimeColumn()
|
||||||
|
.add("string", ColumnType.STRING)
|
||||||
|
.add("long", ColumnType.LONG)
|
||||||
|
.add("double", ColumnType.DOUBLE)
|
||||||
|
.add("bool", ColumnType.STRING)
|
||||||
|
.add("variant", ColumnType.NESTED_DATA)
|
||||||
|
.add("array", ColumnType.NESTED_DATA)
|
||||||
|
.add("nested", ColumnType.NESTED_DATA)
|
||||||
|
.build(),
|
||||||
|
response.getLogicalSegmentSchema()
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue