mirror of https://github.com/apache/nifi.git
NIFI-551 - ConvertJSONToAvro improve error message
- Report failure counts as an log error message - Send record parsing errors to a separate flowfile which is transfered down the failure relationship Signed-off-by: joewitt <joewitt@apache.org>
This commit is contained in:
parent
8201381c9b
commit
6f32e6e977
|
@ -38,6 +38,7 @@ import org.apache.nifi.processor.ProcessContext;
|
||||||
import org.apache.nifi.processor.ProcessSession;
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
import org.apache.nifi.processor.Relationship;
|
import org.apache.nifi.processor.Relationship;
|
||||||
import org.apache.nifi.processor.exception.ProcessException;
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
|
import org.apache.nifi.processor.io.OutputStreamCallback;
|
||||||
import org.apache.nifi.processor.io.StreamCallback;
|
import org.apache.nifi.processor.io.StreamCallback;
|
||||||
import org.kitesdk.data.DatasetException;
|
import org.kitesdk.data.DatasetException;
|
||||||
import org.kitesdk.data.DatasetIOException;
|
import org.kitesdk.data.DatasetIOException;
|
||||||
|
@ -97,22 +98,22 @@ public class ConvertJSONToAvro extends AbstractKiteProcessor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onTrigger(ProcessContext context, final ProcessSession session)
|
public void onTrigger(final ProcessContext context, final ProcessSession session)
|
||||||
throws ProcessException {
|
throws ProcessException {
|
||||||
FlowFile flowFile = session.get();
|
FlowFile successfulRecords = session.get();
|
||||||
if (flowFile == null) {
|
if (successfulRecords == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
String schemaProperty = context.getProperty(SCHEMA)
|
String schemaProperty = context.getProperty(SCHEMA)
|
||||||
.evaluateAttributeExpressions(flowFile)
|
.evaluateAttributeExpressions(successfulRecords)
|
||||||
.getValue();
|
.getValue();
|
||||||
final Schema schema;
|
final Schema schema;
|
||||||
try {
|
try {
|
||||||
schema = getSchema(schemaProperty, DefaultConfiguration.get());
|
schema = getSchema(schemaProperty, DefaultConfiguration.get());
|
||||||
} catch (SchemaNotFoundException e) {
|
} catch (SchemaNotFoundException e) {
|
||||||
getLogger().error("Cannot find schema: " + schemaProperty);
|
getLogger().error("Cannot find schema: " + schemaProperty);
|
||||||
session.transfer(flowFile, FAILURE);
|
session.transfer(successfulRecords, FAILURE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,21 +122,31 @@ public class ConvertJSONToAvro extends AbstractKiteProcessor {
|
||||||
writer.setCodec(CodecFactory.snappyCodec());
|
writer.setCodec(CodecFactory.snappyCodec());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
flowFile = session.write(flowFile, new StreamCallback() {
|
successfulRecords = session.write(successfulRecords, new StreamCallback() {
|
||||||
@Override
|
@Override
|
||||||
public void process(InputStream in, OutputStream out) throws IOException {
|
public void process(InputStream in, OutputStream out) throws IOException {
|
||||||
|
FlowFile failedRecords = session.create();
|
||||||
long written = 0L;
|
long written = 0L;
|
||||||
long errors = 0L;
|
long errors = 0L;
|
||||||
|
long total = 0L;
|
||||||
try (JSONFileReader<Record> reader = new JSONFileReader<>(
|
try (JSONFileReader<Record> reader = new JSONFileReader<>(
|
||||||
in, schema, Record.class)) {
|
in, schema, Record.class)) {
|
||||||
reader.initialize();
|
reader.initialize();
|
||||||
try (DataFileWriter<Record> w = writer.create(schema, out)) {
|
try (DataFileWriter<Record> w = writer.create(schema, out)) {
|
||||||
while (reader.hasNext()) {
|
while (reader.hasNext()) {
|
||||||
|
total += 1;
|
||||||
try {
|
try {
|
||||||
Record record = reader.next();
|
Record record = reader.next();
|
||||||
w.append(record);
|
w.append(record);
|
||||||
written += 1;
|
written += 1;
|
||||||
} catch (DatasetRecordException e) {
|
} catch (final DatasetRecordException e) {
|
||||||
|
failedRecords = session.append(failedRecords, new OutputStreamCallback() {
|
||||||
|
@Override
|
||||||
|
public void process(OutputStream out) throws IOException {
|
||||||
|
out.write((e.getMessage() + " [" +
|
||||||
|
e.getCause().getMessage() + "]\n").getBytes());
|
||||||
|
}
|
||||||
|
});
|
||||||
errors += 1;
|
errors += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,20 +155,25 @@ public class ConvertJSONToAvro extends AbstractKiteProcessor {
|
||||||
false /* update only if file transfer is successful */);
|
false /* update only if file transfer is successful */);
|
||||||
session.adjustCounter("Conversion errors", errors,
|
session.adjustCounter("Conversion errors", errors,
|
||||||
false /* update only if file transfer is successful */);
|
false /* update only if file transfer is successful */);
|
||||||
|
|
||||||
|
if (errors > 0L) {
|
||||||
|
getLogger().warn("Failed to convert " + errors + '/' + total + " records from JSON to Avro");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
session.transfer(failedRecords, FAILURE);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
session.transfer(flowFile, SUCCESS);
|
session.transfer(successfulRecords, SUCCESS);
|
||||||
|
|
||||||
//session.getProvenanceReporter().send(flowFile, target.getUri().toString());
|
//session.getProvenanceReporter().send(flowFile, target.getUri().toString());
|
||||||
} catch (ProcessException | DatasetIOException e) {
|
} catch (ProcessException | DatasetIOException e) {
|
||||||
getLogger().error("Failed reading or writing", e);
|
getLogger().error("Failed reading or writing", e);
|
||||||
session.transfer(flowFile, FAILURE);
|
session.transfer(successfulRecords, FAILURE);
|
||||||
|
|
||||||
} catch (DatasetException e) {
|
} catch (DatasetException e) {
|
||||||
getLogger().error("Failed to read FlowFile", e);
|
getLogger().error("Failed to read FlowFile", e);
|
||||||
session.transfer(flowFile, FAILURE);
|
session.transfer(successfulRecords, FAILURE);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,9 +18,17 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.nifi.processors.kite;
|
package org.apache.nifi.processors.kite;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
import org.apache.avro.SchemaBuilder;
|
import org.apache.avro.SchemaBuilder;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
import org.apache.nifi.util.TestRunner;
|
import org.apache.nifi.util.TestRunner;
|
||||||
import org.apache.nifi.util.TestRunners;
|
import org.apache.nifi.util.TestRunners;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
@ -38,9 +46,13 @@ public class TestJSONToAvroProcessor {
|
||||||
|
|
||||||
public static final String JSON_CONTENT = ""
|
public static final String JSON_CONTENT = ""
|
||||||
+ "{\"id\": 1,\"color\": \"green\"}"
|
+ "{\"id\": 1,\"color\": \"green\"}"
|
||||||
+ "{\"id\": \"120V\", \"color\": \"blue\"}\n" + // invalid, ID is a string
|
+ "{\"id\": \"120V\", \"color\": \"blue\"}\n" // invalid, ID is a string
|
||||||
|
+ "{\"id\": 10, \"color\": 15.23}\n" + // invalid, color as double
|
||||||
"{\"id\": 2, \"color\": \"grey\", \"price\": 12.95 }";
|
"{\"id\": 2, \"color\": \"grey\", \"price\": 12.95 }";
|
||||||
|
|
||||||
|
public static final String FAILURE_CONTENT = "Cannot convert field id [Cannot convert to long: \"120V\"]\n" +
|
||||||
|
"Cannot convert field color [Cannot convert to string: 15.23]\n";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBasicConversion() throws IOException {
|
public void testBasicConversion() throws IOException {
|
||||||
TestRunner runner = TestRunners.newTestRunner(ConvertJSONToAvro.class);
|
TestRunner runner = TestRunners.newTestRunner(ConvertJSONToAvro.class);
|
||||||
|
@ -54,8 +66,13 @@ public class TestJSONToAvroProcessor {
|
||||||
long converted = runner.getCounterValue("Converted records");
|
long converted = runner.getCounterValue("Converted records");
|
||||||
long errors = runner.getCounterValue("Conversion errors");
|
long errors = runner.getCounterValue("Conversion errors");
|
||||||
Assert.assertEquals("Should convert 2 rows", 2, converted);
|
Assert.assertEquals("Should convert 2 rows", 2, converted);
|
||||||
Assert.assertEquals("Should reject 1 row", 1, errors);
|
Assert.assertEquals("Should reject 2 rows", 2, errors);
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred("success", 1);
|
runner.assertTransferCount("success", 1);
|
||||||
|
runner.assertTransferCount("failure", 1);
|
||||||
|
|
||||||
|
String failureContent = Bytes.toString(runner.getContentAsByteArray(
|
||||||
|
runner.getFlowFilesForRelationship("failure").get(0)));
|
||||||
|
Assert.assertEquals("Should reject an invalid string and double", FAILURE_CONTENT, failureContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue