NIFI-5525: CSVRecordReader fails with StringIndexOutOfBoundsException when field is a double quote

This closes #2953.

Signed-off-by: Mark Payne <markap14@hotmail.com>
This commit is contained in:
Vadim Arshavsky 2018-08-16 17:25:38 +03:00 committed by Mark Payne
parent 9d2b698c1c
commit 410176ed22
2 changed files with 22 additions and 1 deletions

View File

@ -79,7 +79,7 @@ abstract public class AbstractCSVRecordReader implements RecordReader {
return value;
}
final String trimmed = value.startsWith("\"") && value.endsWith("\"") ? value.substring(1, value.length() - 1) : value;
final String trimmed = value.startsWith("\"") && value.endsWith("\"") && (value.length() > 1) ? value.substring(1, value.length() - 1) : value;
if (trimmed.isEmpty()) {
return null;
}

View File

@ -593,4 +593,25 @@ public class TestCSVRecordReader {
assertNull(reader.nextRecord());
}
}
@Test
public void testQuote() throws IOException, MalformedRecordException {
final CSVFormat format = CSVFormat.RFC4180.withFirstRecordAsHeader().withTrim().withQuote('"');
final String text = "\"name\"\n\"\"\"\"";
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), StandardCharsets.UTF_8.name())) {
final Record record = reader.nextRecord();
final String name = (String)record.getValue("name");
assertEquals("\"", name);
}
}
}