NIFI-5525 - CSVRecordReader fails with StringIndexOutOfBoundsException when field is a double quote

review

Signed-off-by: Matthew Burgess <mattyb149@apache.org>

This closes #3092
This commit is contained in:
Pierre Villard 2018-10-19 10:44:00 +02:00 committed by Matthew Burgess
parent 4fe7cb33e4
commit c6106d1d88
2 changed files with 12 additions and 6 deletions

View File

@ -17,7 +17,6 @@
package org.apache.nifi.csv; package org.apache.nifi.csv;
import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.serialization.RecordReader; import org.apache.nifi.serialization.RecordReader;
import org.apache.nifi.serialization.record.DataType; import org.apache.nifi.serialization.record.DataType;
@ -79,7 +78,7 @@ abstract public class AbstractCSVRecordReader implements RecordReader {
return value; return value;
} }
final String trimmed = value.startsWith("\"") && value.endsWith("\"") && (value.length() > 1) ? value.substring(1, value.length() - 1) : value; final String trimmed = trim(value);
if (trimmed.isEmpty()) { if (trimmed.isEmpty()) {
return null; return null;
} }
@ -92,7 +91,7 @@ abstract public class AbstractCSVRecordReader implements RecordReader {
return value; return value;
} }
final String trimmed = value.startsWith("\"") && value.endsWith("\"") ? value.substring(1, value.length() - 1) : value; final String trimmed = trim(value);
if (trimmed.isEmpty()) { if (trimmed.isEmpty()) {
return null; return null;
} }
@ -132,6 +131,10 @@ abstract public class AbstractCSVRecordReader implements RecordReader {
return value; return value;
} }
private String trim(String value) {
return (value.length() > 1) && value.startsWith("\"") && value.endsWith("\"") ? value.substring(1, value.length() - 1) : value;
}
@Override @Override
public RecordSchema getSchema() { public RecordSchema getSchema() {
return schema; return schema;

View File

@ -597,7 +597,7 @@ public class TestCSVRecordReader {
@Test @Test
public void testQuote() throws IOException, MalformedRecordException { public void testQuote() throws IOException, MalformedRecordException {
final CSVFormat format = CSVFormat.RFC4180.withFirstRecordAsHeader().withTrim().withQuote('"'); final CSVFormat format = CSVFormat.RFC4180.withFirstRecordAsHeader().withTrim().withQuote('"');
final String text = "\"name\"\n\"\"\"\""; final String text = "\"name\"\n\"\"\"\"\n\"\"\"\"";
final List<RecordField> fields = new ArrayList<>(); final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType())); fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
@ -607,9 +607,12 @@ public class TestCSVRecordReader {
final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), StandardCharsets.UTF_8.name())) { RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), StandardCharsets.UTF_8.name())) {
final Record record = reader.nextRecord(); Record record = reader.nextRecord();
final String name = (String)record.getValue("name"); String name = (String)record.getValue("name");
assertEquals("\"", name);
record = reader.nextRecord(false, false);
name = (String)record.getValue("name");
assertEquals("\"", name); assertEquals("\"", name);
} }
} }