mirror of
https://github.com/apache/nifi.git
synced 2025-02-07 18:48:51 +00:00
NIFI-5983: handling parse problems in recordReader implementations
Fixed Checkstyle violation Signed-off-by: Matthew Burgess <mattyb149@apache.org> This closes #3282
This commit is contained in:
parent
35147a620f
commit
24a7d480c8
@ -119,6 +119,11 @@
|
||||
<artifactId>caffeine</artifactId>
|
||||
<version>2.6.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>27.0.1-jre</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
|
@ -24,6 +24,8 @@ import org.apache.nifi.serialization.record.MapRecord;
|
||||
import org.apache.nifi.serialization.record.Record;
|
||||
import org.apache.nifi.serialization.record.RecordSchema;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
@ -33,14 +35,21 @@ public abstract class AvroRecordReader implements RecordReader {
|
||||
|
||||
@Override
|
||||
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException {
|
||||
GenericRecord record = nextAvroRecord();
|
||||
if (record == null) {
|
||||
return null;
|
||||
try {
|
||||
GenericRecord record = nextAvroRecord();
|
||||
if (record == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final RecordSchema schema = getSchema();
|
||||
final Map<String, Object> values = AvroTypeUtil.convertAvroRecordToMap(record, schema);
|
||||
return new MapRecord(schema, values);
|
||||
} catch (IOException e) {
|
||||
throw e;
|
||||
} catch (MalformedRecordException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw new MalformedRecordException("Error while getting next record. Root cause: " + Throwables.getRootCause(e), e);
|
||||
}
|
||||
|
||||
final RecordSchema schema = getSchema();
|
||||
final Map<String, Object> values = AvroTypeUtil.convertAvroRecordToMap(record, schema);
|
||||
return new MapRecord(schema, values);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -42,6 +42,8 @@ import org.apache.nifi.serialization.record.RecordField;
|
||||
import org.apache.nifi.serialization.record.RecordFieldType;
|
||||
import org.apache.nifi.serialization.record.RecordSchema;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
|
||||
|
||||
public class CSVRecordReader extends AbstractCSVRecordReader {
|
||||
private final CSVParser csvParser;
|
||||
@ -72,45 +74,49 @@ public class CSVRecordReader extends AbstractCSVRecordReader {
|
||||
|
||||
@Override
|
||||
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException {
|
||||
final RecordSchema schema = getSchema();
|
||||
|
||||
final List<RecordField> recordFields = getRecordFields();
|
||||
final int numFieldNames = recordFields.size();
|
||||
try {
|
||||
final RecordSchema schema = getSchema();
|
||||
|
||||
for (final CSVRecord csvRecord : csvParser) {
|
||||
final Map<String, Object> values = new LinkedHashMap<>(recordFields.size() * 2);
|
||||
for (int i = 0; i < csvRecord.size(); i++) {
|
||||
final String rawValue = csvRecord.get(i);
|
||||
final List<RecordField> recordFields = getRecordFields();
|
||||
final int numFieldNames = recordFields.size();
|
||||
for (final CSVRecord csvRecord : csvParser) {
|
||||
final Map<String, Object> values = new LinkedHashMap<>(recordFields.size() * 2);
|
||||
for (int i = 0; i < csvRecord.size(); i++) {
|
||||
final String rawValue = csvRecord.get(i);
|
||||
|
||||
final String rawFieldName;
|
||||
final DataType dataType;
|
||||
if (i >= numFieldNames) {
|
||||
if (!dropUnknownFields) {
|
||||
values.put("unknown_field_index_" + i, rawValue);
|
||||
final String rawFieldName;
|
||||
final DataType dataType;
|
||||
if (i >= numFieldNames) {
|
||||
if (!dropUnknownFields) {
|
||||
values.put("unknown_field_index_" + i, rawValue);
|
||||
}
|
||||
|
||||
continue;
|
||||
} else {
|
||||
final RecordField recordField = recordFields.get(i);
|
||||
rawFieldName = recordField.getFieldName();
|
||||
dataType = recordField.getDataType();
|
||||
}
|
||||
|
||||
continue;
|
||||
} else {
|
||||
final RecordField recordField = recordFields.get(i);
|
||||
rawFieldName = recordField.getFieldName();
|
||||
dataType = recordField.getDataType();
|
||||
|
||||
final Object value;
|
||||
if (coerceTypes) {
|
||||
value = convert(rawValue, dataType, rawFieldName);
|
||||
} else {
|
||||
// The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
|
||||
// dictate a field type. As a result, we will use the schema that we have to attempt to convert
|
||||
// the value into the desired type if it's a simple type.
|
||||
value = convertSimpleIfPossible(rawValue, dataType, rawFieldName);
|
||||
}
|
||||
|
||||
values.put(rawFieldName, value);
|
||||
}
|
||||
|
||||
|
||||
final Object value;
|
||||
if (coerceTypes) {
|
||||
value = convert(rawValue, dataType, rawFieldName);
|
||||
} else {
|
||||
// The CSV Reader is going to return all fields as Strings, because CSV doesn't have any way to
|
||||
// dictate a field type. As a result, we will use the schema that we have to attempt to convert
|
||||
// the value into the desired type if it's a simple type.
|
||||
value = convertSimpleIfPossible(rawValue, dataType, rawFieldName);
|
||||
}
|
||||
|
||||
values.put(rawFieldName, value);
|
||||
return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
|
||||
}
|
||||
|
||||
return new MapRecord(schema, values, coerceTypes, dropUnknownFields);
|
||||
} catch (Exception e) {
|
||||
throw new MalformedRecordException("Error while getting next record. Root cause: " + Throwables.getRootCause(e), e);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@ -17,6 +17,7 @@
|
||||
package org.apache.nifi.csv;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.QuoteMode;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.serialization.MalformedRecordException;
|
||||
import org.apache.nifi.serialization.SimpleRecordSchema;
|
||||
@ -27,22 +28,30 @@ import org.apache.nifi.serialization.record.RecordSchema;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import com.google.common.base.Throwables;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
public class ITApacheCSVRecordReader {
|
||||
|
||||
private final CSVFormat format = CSVFormat.DEFAULT.withFirstRecordAsHeader().withTrim().withQuote('"');
|
||||
|
||||
private List<RecordField> getDefaultFields() {
|
||||
return createStringFields(new String[]{"id", "name", "balance", "address", "city", "state", "zipCode", "country"});
|
||||
}
|
||||
|
||||
private List<RecordField> createStringFields(String[] fieldNames) {
|
||||
final List<RecordField> fields = new ArrayList<>();
|
||||
for (final String fieldName : new String[]{"id", "name", "balance", "address", "city", "state", "zipCode", "country"}) {
|
||||
for (final String fieldName : fieldNames) {
|
||||
fields.add(new RecordField(fieldName, RecordFieldType.STRING.getDataType()));
|
||||
}
|
||||
return fields;
|
||||
@ -71,4 +80,28 @@ public class ITApacheCSVRecordReader {
|
||||
assertEquals(NUM_LINES, numRecords);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExceptionThrownOnParseProblem() throws IOException, MalformedRecordException {
|
||||
CSVFormat csvFormat = CSVFormat.DEFAULT.withFirstRecordAsHeader().withQuoteMode(QuoteMode.ALL).withTrim().withDelimiter(',');
|
||||
final int NUM_LINES = 25;
|
||||
StringBuilder sb = new StringBuilder("\"id\",\"name\",\"balance\"");
|
||||
for (int i = 0; i < NUM_LINES; i++) {
|
||||
sb.append(String.format("\"%s\",\"John Doe\",\"4750.89D\"\n", i));
|
||||
}
|
||||
// cause a parse problem
|
||||
sb.append(String.format("\"%s\"dieParser,\"John Doe\",\"4750.89D\"\n", NUM_LINES ));
|
||||
sb.append(String.format("\"%s\",\"John Doe\",\"4750.89D\"\n", NUM_LINES + 1));
|
||||
final RecordSchema schema = new SimpleRecordSchema(createStringFields(new String[] {"id", "name", "balance"}));
|
||||
|
||||
try (final InputStream bais = new ByteArrayInputStream(sb.toString().getBytes());
|
||||
final CSVRecordReader reader = new CSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, csvFormat, true, false,
|
||||
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
|
||||
|
||||
while (reader.nextRecord() != null) {}
|
||||
} catch (Exception e) {
|
||||
assertThat(e, instanceOf(MalformedRecordException.class));
|
||||
assertThat(Throwables.getRootCause(e), instanceOf(IOException.class));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user