diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java index 4571da3d724..8700359cf79 100644 --- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java +++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java @@ -426,9 +426,13 @@ public class ParquetGroupConverter int scale = pt.asPrimitiveType().getDecimalMetadata().getScale(); switch (pt.getPrimitiveTypeName()) { case INT32: - return new BigDecimal(g.getInteger(fieldIndex, index)); + // The primitive returned from Group is an unscaledValue. + // We need to do unscaledValue * 10^(-scale) to convert back to decimal + return new BigDecimal(g.getInteger(fieldIndex, index)).movePointLeft(scale); case INT64: - return new BigDecimal(g.getLong(fieldIndex, index)); + // The primitive returned from Group is an unscaledValue. + // We need to do unscaledValue * 10^(-scale) to convert back to decimal + return new BigDecimal(g.getLong(fieldIndex, index)).movePointLeft(scale); case FIXED_LEN_BYTE_ARRAY: case BINARY: Binary value = g.getBinary(fieldIndex, index); diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetInputTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetInputTest.java index 7f60fbf066f..37c99f53baf 100644 --- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetInputTest.java +++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetInputTest.java @@ -62,6 +62,35 @@ public class DecimalParquetInputTest extends BaseParquetInputTest parserType, true ); + /* + The raw data in the parquet file has the following columns: + ############ Column(fixed_len_dec) ############ + name: fixed_len_dec + path: fixed_len_dec + max_definition_level: 1 + max_repetition_level: 0 + physical_type: FIXED_LEN_BYTE_ARRAY + logical_type: Decimal(precision=10, scale=2) + converted_type (legacy): DECIMAL + + The raw data in the parquet file has the following rows: + 0.0 + 1.0 + 2.0 + 3.0 + 4.0 + 5.0 + 6.0 + 7.0 + 8.0 + 9.0 + 0.0 + 1.0 + 2.0 + 3.0 + 4.0 + 5.0 + */ List rows = getAllRows(parserType, config); Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString()); Assert.assertEquals("1.0", rows.get(0).getDimension("fixed_len_dec").get(0)); @@ -80,10 +109,39 @@ public class DecimalParquetInputTest extends BaseParquetInputTest parserType, true ); + /* + The raw data in the parquet file has the following columns: + ############ Column(i32_dec) ############ + name: i32_dec + path: i32_dec + max_definition_level: 1 + max_repetition_level: 0 + physical_type: INT32 + logical_type: Decimal(precision=5, scale=2) + converted_type (legacy): DECIMAL + + The raw data in the parquet file has the following rows: + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + 6.00 + 7.00 + 8.00 + 9.00 + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + */ List rows = getAllRows(parserType, config); Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString()); - Assert.assertEquals("100", rows.get(0).getDimension("i32_dec").get(0)); - Assert.assertEquals(new BigDecimal(100), rows.get(0).getMetric("metric1")); + Assert.assertEquals("1.00", rows.get(0).getDimension("i32_dec").get(0)); + Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(0).getMetric("metric1")); } @Test @@ -98,9 +156,38 @@ public class DecimalParquetInputTest extends BaseParquetInputTest parserType, true ); + /* + The raw data in the parquet file has the following columns: + ############ Column(i64_dec) ############ + name: i64_dec + path: i64_dec + max_definition_level: 1 + max_repetition_level: 0 + physical_type: INT64 + logical_type: Decimal(precision=10, scale=2) + converted_type (legacy): DECIMAL + + The raw data in the parquet file has the following rows: + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + 6.00 + 7.00 + 8.00 + 9.00 + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + */ List rows = getAllRows(parserType, config); Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString()); - Assert.assertEquals("100", rows.get(0).getDimension("i64_dec").get(0)); - Assert.assertEquals(new BigDecimal(100), rows.get(0).getMetric("metric1")); + Assert.assertEquals("1.00", rows.get(0).getDimension("i64_dec").get(0)); + Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(0).getMetric("metric1")); } } diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java index faa80e6d73f..0d56e1e3652 100644 --- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java +++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/DecimalParquetReaderTest.java @@ -63,6 +63,36 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest flattenSpec ); + /* + The raw data in the parquet file has the following columns: + ############ Column(fixed_len_dec) ############ + name: fixed_len_dec + path: fixed_len_dec + max_definition_level: 1 + max_repetition_level: 0 + physical_type: FIXED_LEN_BYTE_ARRAY + logical_type: Decimal(precision=10, scale=2) + converted_type (legacy): DECIMAL + + The raw data in the parquet file has the following rows: + 0.0 + 1.0 + 2.0 + 3.0 + 4.0 + 5.0 + 6.0 + 7.0 + 8.0 + 9.0 + 0.0 + 1.0 + 2.0 + 3.0 + 4.0 + 5.0 + */ + List rows = readAllRows(reader); Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString()); Assert.assertEquals("1.0", rows.get(1).getDimension("fixed_len_dec").get(0)); @@ -100,10 +130,40 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest flattenSpec ); + /* + The raw data in the parquet file has the following columns: + ############ Column(i32_dec) ############ + name: i32_dec + path: i32_dec + max_definition_level: 1 + max_repetition_level: 0 + physical_type: INT32 + logical_type: Decimal(precision=5, scale=2) + converted_type (legacy): DECIMAL + + The raw data in the parquet file has the following rows: + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + 6.00 + 7.00 + 8.00 + 9.00 + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + */ + List rows = readAllRows(reader); Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString()); - Assert.assertEquals("100", rows.get(1).getDimension("i32_dec").get(0)); - Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1")); + Assert.assertEquals("1.00", rows.get(1).getDimension("i32_dec").get(0)); + Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(1).getMetric("metric1")); reader = createReader( file, @@ -112,7 +172,7 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest ); List sampled = sampleAllRows(reader); final String expectedJson = "{\n" - + " \"i32_dec\" : 100\n" + + " \"i32_dec\" : 1.00\n" + "}"; Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues())); } @@ -137,10 +197,40 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest flattenSpec ); + /* + The raw data in the parquet file has the following columns: + ############ Column(i64_dec) ############ + name: i64_dec + path: i64_dec + max_definition_level: 1 + max_repetition_level: 0 + physical_type: INT64 + logical_type: Decimal(precision=10, scale=2) + converted_type (legacy): DECIMAL + + The raw data in the parquet file has the following rows: + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + 6.00 + 7.00 + 8.00 + 9.00 + 0 + 1.00 + 2.00 + 3.00 + 4.00 + 5.00 + */ + List rows = readAllRows(reader); Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString()); - Assert.assertEquals("100", rows.get(1).getDimension("i64_dec").get(0)); - Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1")); + Assert.assertEquals("1.00", rows.get(1).getDimension("i64_dec").get(0)); + Assert.assertEquals(BigDecimal.valueOf(100L, 2), rows.get(1).getMetric("metric1")); reader = createReader( file, @@ -149,7 +239,7 @@ public class DecimalParquetReaderTest extends BaseParquetReaderTest ); List sampled = sampleAllRows(reader); final String expectedJson = "{\n" - + " \"i64_dec\" : 100\n" + + " \"i64_dec\" : 1.00\n" + "}"; Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues())); }