Fixes parquet uint_32 datatype conversion (#13935)

After parquet ingestion, uint_32 parquet datatypes are stored as null values in the dataSource. This PR fixes this conversion bug.
This commit is contained in:
Tejaswini Bandlamudi 2023-03-16 15:27:38 +05:30 committed by GitHub
parent c7d864d3bc
commit 6837289cb0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 0 deletions

View File

@ -364,6 +364,7 @@ class ParquetGroupConverter
case UINT_16:
return g.getInteger(fieldIndex, index);
case UINT_32:
return Integer.toUnsignedLong(g.getInteger(fieldIndex, index));
case UINT_64:
return g.getLong(fieldIndex, index);
case DECIMAL:

View File

@ -79,4 +79,23 @@ public class WikiParquetReaderTest extends BaseParquetReaderTest
+ "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
@Test
public void testUint32Datatype() throws IOException
{
InputRowSchema schema = new InputRowSchema(
new TimestampSpec("time", "millis", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo", "bar"))),
ColumnsFilter.all()
);
InputEntityReader reader = createReader("example/datatypes/uint32_test.parquet", schema, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n"
+ " \"bar\" : 2147483649,\n"
+ " \"foo\" : \"baz\",\n"
+ " \"time\" : 1678853101621\n"
+ "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
}