NIFI-12670 Read Jolt Transform with UTF-8 Encoding

- Specified UTF-8 encoding for reading Jolt Transform to avoid decoding issues on Windows or platforms with different default character sets

This closes #8842

Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
Jim Steinebrey 2024-05-15 14:31:20 -04:00 committed by exceptionfactory
parent 60112f242c
commit b27fc46b60
No known key found for this signature in database
3 changed files with 47 additions and 1 deletions

View File

@ -241,7 +241,7 @@ public abstract class AbstractJoltTransform extends AbstractProcessor {
String readTransform(final PropertyValue propertyValue) {
final ResourceReference resourceReference = propertyValue.asResource();
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(resourceReference.read()))) {
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(resourceReference.read(), StandardCharsets.UTF_8))) {
return reader.lines().collect(Collectors.joining(System.lineSeparator()));
} catch (final IOException e) {
throw new UncheckedIOException("Read JOLT Transform failed", e);

View File

@ -246,6 +246,26 @@ class TestJoltTransformJSON {
assertTrue(DIFFY.diff(compareJson, transformedJson).isEmpty());
}
String addAccentedChars(String input) {
return input.replace("\"primary\"", "\"primaryÄÖÜ\"");
}
@Test
void testTransformInputWithShiftrAccentedChars() throws IOException {
final String spec = addAccentedChars(Files.readString(Paths.get("src/test/resources/specs/shiftrSpec.json")));
runner.setProperty(JoltTransformJSON.JOLT_SPEC, spec);
runner.setProperty(JoltTransformJSON.JOLT_TRANSFORM, JoltTransformStrategy.SHIFTR);
runner.enqueue(addAccentedChars(Files.readString(JSON_INPUT)));
runner.run();
runner.assertAllFlowFilesTransferred(JoltTransformJSON.REL_SUCCESS);
final MockFlowFile transformed = runner.getFlowFilesForRelationship(JoltTransformJSON.REL_SUCCESS).get(0);
transformed.assertAttributeExists(CoreAttributes.MIME_TYPE.key());
transformed.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/json");
Object transformedJson = JsonUtils.jsonToObject(new ByteArrayInputStream(transformed.toByteArray()));
Object compareJson = JsonUtils.jsonToObject(Files.newInputStream(Paths.get("src/test/resources/TestJoltTransformJson/shiftrOutput.json")));
assertTrue(DIFFY.diff(compareJson, transformedJson).isEmpty());
}
@Test
void testTransformInputWithDefaultr() throws IOException {
final String spec = Files.readString(Paths.get("src/test/resources/specs/defaultrSpec.json"));

View File

@ -327,6 +327,32 @@ public class TestJoltTransformRecord {
new String(transformed.toByteArray()));
}
String addAccentedChars(String input) {
return input.replace("\"primary\"", "\"primaryÄÖÜ\"");
}
@Test
public void testTransformInputWithShiftrAccentedChars() throws IOException {
generateTestData(1, null);
final String outputSchemaText = Files.readString(Paths.get("src/test/resources/TestJoltTransformRecord/shiftrOutputSchema.avsc"));
runner.setProperty(writer, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY);
runner.setProperty(writer, SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
runner.setProperty(writer, JsonRecordSetWriter.PRETTY_PRINT_JSON, "true");
runner.enableControllerService(writer);
final String spec = Files.readString(Paths.get("src/test/resources/specs/shiftrSpec.json"));
runner.setProperty(JoltTransformRecord.JOLT_SPEC, spec);
runner.setProperty(JoltTransformRecord.JOLT_TRANSFORM, JoltTransformStrategy.SHIFTR);
runner.enqueue(new byte[0]);
runner.run();
runner.assertTransferCount(JoltTransformRecord.REL_SUCCESS, 1);
runner.assertTransferCount(JoltTransformRecord.REL_ORIGINAL, 1);
final MockFlowFile transformed = runner.getFlowFilesForRelationship(JoltTransformRecord.REL_SUCCESS).get(0);
transformed.assertAttributeExists(CoreAttributes.MIME_TYPE .key());
transformed.assertAttributeEquals(CoreAttributes.MIME_TYPE.key(), "application/json");
assertEquals(Files.readString(Paths.get("src/test/resources/TestJoltTransformRecord/shiftrOutput.json")),
new String(transformed.toByteArray()));
}
@Test
public void testTransformInputWithShiftrMultipleOutputRecords() throws IOException {
RecordField aField = new RecordField("a", RecordFieldType.INT.getDataType());