mirror of https://github.com/apache/druid.git
Fix incorrect scale when reading decimal from parquet (#15715)
* Fix incorrect scale when reading decimal from parquet * add comments * fix test
This commit is contained in:
parent
a3b32fbd26
commit
55acf2e2ff
|
@ -426,9 +426,13 @@ public class ParquetGroupConverter
|
|||
int scale = pt.asPrimitiveType().getDecimalMetadata().getScale();
|
||||
switch (pt.getPrimitiveTypeName()) {
|
||||
case INT32:
|
||||
return new BigDecimal(g.getInteger(fieldIndex, index));
|
||||
// The primitive returned from Group is an unscaledValue.
|
||||
// We need to do unscaledValue * 10^(-scale) to convert back to decimal
|
||||
return new BigDecimal(g.getInteger(fieldIndex, index)).movePointLeft(scale);
|
||||
case INT64:
|
||||
return new BigDecimal(g.getLong(fieldIndex, index));
|
||||
// The primitive returned from Group is an unscaledValue.
|
||||
// We need to do unscaledValue * 10^(-scale) to convert back to decimal
|
||||
return new BigDecimal(g.getLong(fieldIndex, index)).movePointLeft(scale);
|
||||
case FIXED_LEN_BYTE_ARRAY:
|
||||
case BINARY:
|
||||
Binary value = g.getBinary(fieldIndex, index);
|
||||
|
|
|
@ -62,6 +62,35 @@ public class DecimalParquetInputTest extends BaseParquetInputTest
|
|||
parserType,
|
||||
true
|
||||
);
|
||||
/*
|
||||
The raw data in the parquet file has the following columns:
|
||||
############ Column(fixed_len_dec) ############
|
||||
name: fixed_len_dec
|
||||
path: fixed_len_dec
|
||||
max_definition_level: 1
|
||||
max_repetition_level: 0
|
||||
physical_type: FIXED_LEN_BYTE_ARRAY
|
||||
logical_type: Decimal(precision=10, scale=2)
|
||||
converted_type (legacy): DECIMAL
|
||||
|
||||
The raw data in the parquet file has the following rows:
|
||||
0.0
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
6.0
|
||||
7.0
|
||||
8.0
|
||||
9.0
|
||||
0.0
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
*/
|
||||
List<InputRow> rows = getAllRows(parserType, config);
|
||||
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
|
||||
Assert.assertEquals("1.0", rows.get(0).getDimension("fixed_len_dec").get(0));
|
||||
|
@ -80,10 +109,39 @@ public class DecimalParquetInputTest extends BaseParquetInputTest
|
|||
parserType,
|
||||
true
|
||||
);
|
||||
/*
|
||||
The raw data in the parquet file has the following columns:
|
||||
############ Column(i32_dec) ############
|
||||
name: i32_dec
|
||||
path: i32_dec
|
||||
max_definition_level: 1
|
||||
max_repetition_level: 0
|
||||
physical_type: INT32
|
||||
logical_type: Decimal(precision=5, scale=2)
|
||||
converted_type (legacy): DECIMAL
|
||||
|
||||
The raw data in the parquet file has the following rows:
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
6.00
|
||||
7.00
|
||||
8.00
|
||||
9.00
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
*/
|
||||
List<InputRow> rows = getAllRows(parserType, config);
|
||||
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
|
||||
Assert.assertEquals("100", rows.get(0).getDimension("i32_dec").get(0));
|
||||
Assert.assertEquals(new BigDecimal(100), rows.get(0).getMetric("metric1"));
|
||||
Assert.assertEquals("1.00", rows.get(0).getDimension("i32_dec").get(0));
|
||||
Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(0).getMetric("metric1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -98,9 +156,38 @@ public class DecimalParquetInputTest extends BaseParquetInputTest
|
|||
parserType,
|
||||
true
|
||||
);
|
||||
/*
|
||||
The raw data in the parquet file has the following columns:
|
||||
############ Column(i64_dec) ############
|
||||
name: i64_dec
|
||||
path: i64_dec
|
||||
max_definition_level: 1
|
||||
max_repetition_level: 0
|
||||
physical_type: INT64
|
||||
logical_type: Decimal(precision=10, scale=2)
|
||||
converted_type (legacy): DECIMAL
|
||||
|
||||
The raw data in the parquet file has the following rows:
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
6.00
|
||||
7.00
|
||||
8.00
|
||||
9.00
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
*/
|
||||
List<InputRow> rows = getAllRows(parserType, config);
|
||||
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
|
||||
Assert.assertEquals("100", rows.get(0).getDimension("i64_dec").get(0));
|
||||
Assert.assertEquals(new BigDecimal(100), rows.get(0).getMetric("metric1"));
|
||||
Assert.assertEquals("1.00", rows.get(0).getDimension("i64_dec").get(0));
|
||||
Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(0).getMetric("metric1"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,6 +63,36 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest
|
|||
flattenSpec
|
||||
);
|
||||
|
||||
/*
|
||||
The raw data in the parquet file has the following columns:
|
||||
############ Column(fixed_len_dec) ############
|
||||
name: fixed_len_dec
|
||||
path: fixed_len_dec
|
||||
max_definition_level: 1
|
||||
max_repetition_level: 0
|
||||
physical_type: FIXED_LEN_BYTE_ARRAY
|
||||
logical_type: Decimal(precision=10, scale=2)
|
||||
converted_type (legacy): DECIMAL
|
||||
|
||||
The raw data in the parquet file has the following rows:
|
||||
0.0
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
6.0
|
||||
7.0
|
||||
8.0
|
||||
9.0
|
||||
0.0
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
*/
|
||||
|
||||
List<InputRow> rows = readAllRows(reader);
|
||||
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
|
||||
Assert.assertEquals("1.0", rows.get(1).getDimension("fixed_len_dec").get(0));
|
||||
|
@ -100,10 +130,40 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest
|
|||
flattenSpec
|
||||
);
|
||||
|
||||
/*
|
||||
The raw data in the parquet file has the following columns:
|
||||
############ Column(i32_dec) ############
|
||||
name: i32_dec
|
||||
path: i32_dec
|
||||
max_definition_level: 1
|
||||
max_repetition_level: 0
|
||||
physical_type: INT32
|
||||
logical_type: Decimal(precision=5, scale=2)
|
||||
converted_type (legacy): DECIMAL
|
||||
|
||||
The raw data in the parquet file has the following rows:
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
6.00
|
||||
7.00
|
||||
8.00
|
||||
9.00
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
*/
|
||||
|
||||
List<InputRow> rows = readAllRows(reader);
|
||||
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
|
||||
Assert.assertEquals("100", rows.get(1).getDimension("i32_dec").get(0));
|
||||
Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
|
||||
Assert.assertEquals("1.00", rows.get(1).getDimension("i32_dec").get(0));
|
||||
Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(1).getMetric("metric1"));
|
||||
|
||||
reader = createReader(
|
||||
file,
|
||||
|
@ -112,7 +172,7 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest
|
|||
);
|
||||
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
||||
final String expectedJson = "{\n"
|
||||
+ " \"i32_dec\" : 100\n"
|
||||
+ " \"i32_dec\" : 1.00\n"
|
||||
+ "}";
|
||||
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
|
||||
}
|
||||
|
@ -137,10 +197,40 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest
|
|||
flattenSpec
|
||||
);
|
||||
|
||||
/*
|
||||
The raw data in the parquet file has the following columns:
|
||||
############ Column(i64_dec) ############
|
||||
name: i64_dec
|
||||
path: i64_dec
|
||||
max_definition_level: 1
|
||||
max_repetition_level: 0
|
||||
physical_type: INT64
|
||||
logical_type: Decimal(precision=10, scale=2)
|
||||
converted_type (legacy): DECIMAL
|
||||
|
||||
The raw data in the parquet file has the following rows:
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
6.00
|
||||
7.00
|
||||
8.00
|
||||
9.00
|
||||
0
|
||||
1.00
|
||||
2.00
|
||||
3.00
|
||||
4.00
|
||||
5.00
|
||||
*/
|
||||
|
||||
List<InputRow> rows = readAllRows(reader);
|
||||
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
|
||||
Assert.assertEquals("100", rows.get(1).getDimension("i64_dec").get(0));
|
||||
Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
|
||||
Assert.assertEquals("1.00", rows.get(1).getDimension("i64_dec").get(0));
|
||||
Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(1).getMetric("metric1"));
|
||||
|
||||
reader = createReader(
|
||||
file,
|
||||
|
@ -149,7 +239,7 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest
|
|||
);
|
||||
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
||||
final String expectedJson = "{\n"
|
||||
+ " \"i64_dec\" : 100\n"
|
||||
+ " \"i64_dec\" : 1.00\n"
|
||||
+ "}";
|
||||
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue