mirror of
https://github.com/apache/nifi.git
synced 2025-03-06 17:39:36 +00:00
NIFI-9884 - JacksonCSVRecordReader ignores specified encoding
NIFI-9884 - JacksonCSVRecordReader ignores specified encoding; test case for ISO-8859-1 Signed-off-by: Matthew Burgess <mattyb149@apache.org> This closes #5941
This commit is contained in:
parent
1cf4e72084
commit
2c83149c6d
@ -59,7 +59,7 @@ public class JacksonCSVRecordReader extends AbstractCSVRecordReader {
|
||||
final String dateFormat, final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
|
||||
super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);
|
||||
|
||||
final Reader reader = new InputStreamReader(new BOMInputStream(in));
|
||||
final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding);
|
||||
|
||||
CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder()
|
||||
.setColumnSeparator(csvFormat.getDelimiter())
|
||||
|
@ -34,6 +34,7 @@ import java.io.ByteArrayInputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@ -69,7 +70,7 @@ public class TestJacksonCSVRecordReader {
|
||||
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
|
||||
final RecordSchema schema = new SimpleRecordSchema(fields);
|
||||
|
||||
try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
|
||||
try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
|
||||
final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
|
||||
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
|
||||
|
||||
@ -80,6 +81,30 @@ public class TestJacksonCSVRecordReader {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testISO8859() throws IOException, MalformedRecordException {
|
||||
final String text = "name\nÄËÖÜ";
|
||||
final byte[] bytesUTF = text.getBytes(StandardCharsets.UTF_8);
|
||||
final byte[] bytes8859 = text.getBytes(StandardCharsets.ISO_8859_1);
|
||||
assertEquals(13, bytesUTF.length, "expected size=13 for UTF-8 representation of test data");
|
||||
assertEquals(9, bytes8859.length, "expected size=9 for ISO-8859-1 representation of test data");
|
||||
|
||||
final List<RecordField> fields = new ArrayList<>();
|
||||
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
|
||||
final RecordSchema schema = new SimpleRecordSchema(fields);
|
||||
|
||||
try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.ISO_8859_1));
|
||||
final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
|
||||
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(),
|
||||
StandardCharsets.ISO_8859_1.name())) {
|
||||
|
||||
final Record record = reader.nextRecord();
|
||||
final String name = (String)record.getValue("name");
|
||||
|
||||
assertEquals("ÄËÖÜ", name);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDate() throws IOException, MalformedRecordException {
|
||||
final String dateValue = "1983-11-30";
|
||||
|
Loading…
x
Reference in New Issue
Block a user