NIFI-551 - ConvertJSONToAvro improve error message

- Report failure counts as an log error message
- Send record parsing errors to a separate flowfile which is transfered down the
  failure relationship

Signed-off-by: joewitt <joewitt@apache.org>
This commit is contained in:
ricky 2015-04-28 15:58:09 -04:00 committed by joewitt
parent 8201381c9b
commit 6f32e6e977
2 changed files with 47 additions and 14 deletions

View File

@ -38,6 +38,7 @@ import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.io.StreamCallback; import org.apache.nifi.processor.io.StreamCallback;
import org.kitesdk.data.DatasetException; import org.kitesdk.data.DatasetException;
import org.kitesdk.data.DatasetIOException; import org.kitesdk.data.DatasetIOException;
@ -97,22 +98,22 @@ public class ConvertJSONToAvro extends AbstractKiteProcessor {
} }
@Override @Override
public void onTrigger(ProcessContext context, final ProcessSession session) public void onTrigger(final ProcessContext context, final ProcessSession session)
throws ProcessException { throws ProcessException {
FlowFile flowFile = session.get(); FlowFile successfulRecords = session.get();
if (flowFile == null) { if (successfulRecords == null) {
return; return;
} }
String schemaProperty = context.getProperty(SCHEMA) String schemaProperty = context.getProperty(SCHEMA)
.evaluateAttributeExpressions(flowFile) .evaluateAttributeExpressions(successfulRecords)
.getValue(); .getValue();
final Schema schema; final Schema schema;
try { try {
schema = getSchema(schemaProperty, DefaultConfiguration.get()); schema = getSchema(schemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) { } catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + schemaProperty); getLogger().error("Cannot find schema: " + schemaProperty);
session.transfer(flowFile, FAILURE); session.transfer(successfulRecords, FAILURE);
return; return;
} }
@ -121,21 +122,31 @@ public class ConvertJSONToAvro extends AbstractKiteProcessor {
writer.setCodec(CodecFactory.snappyCodec()); writer.setCodec(CodecFactory.snappyCodec());
try { try {
flowFile = session.write(flowFile, new StreamCallback() { successfulRecords = session.write(successfulRecords, new StreamCallback() {
@Override @Override
public void process(InputStream in, OutputStream out) throws IOException { public void process(InputStream in, OutputStream out) throws IOException {
FlowFile failedRecords = session.create();
long written = 0L; long written = 0L;
long errors = 0L; long errors = 0L;
long total = 0L;
try (JSONFileReader<Record> reader = new JSONFileReader<>( try (JSONFileReader<Record> reader = new JSONFileReader<>(
in, schema, Record.class)) { in, schema, Record.class)) {
reader.initialize(); reader.initialize();
try (DataFileWriter<Record> w = writer.create(schema, out)) { try (DataFileWriter<Record> w = writer.create(schema, out)) {
while (reader.hasNext()) { while (reader.hasNext()) {
total += 1;
try { try {
Record record = reader.next(); Record record = reader.next();
w.append(record); w.append(record);
written += 1; written += 1;
} catch (DatasetRecordException e) { } catch (final DatasetRecordException e) {
failedRecords = session.append(failedRecords, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write((e.getMessage() + " [" +
e.getCause().getMessage() + "]\n").getBytes());
}
});
errors += 1; errors += 1;
} }
} }
@ -144,20 +155,25 @@ public class ConvertJSONToAvro extends AbstractKiteProcessor {
false /* update only if file transfer is successful */); false /* update only if file transfer is successful */);
session.adjustCounter("Conversion errors", errors, session.adjustCounter("Conversion errors", errors,
false /* update only if file transfer is successful */); false /* update only if file transfer is successful */);
if (errors > 0L) {
getLogger().warn("Failed to convert " + errors + '/' + total + " records from JSON to Avro");
} }
} }
session.transfer(failedRecords, FAILURE);
}
}); });
session.transfer(flowFile, SUCCESS); session.transfer(successfulRecords, SUCCESS);
//session.getProvenanceReporter().send(flowFile, target.getUri().toString()); //session.getProvenanceReporter().send(flowFile, target.getUri().toString());
} catch (ProcessException | DatasetIOException e) { } catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed reading or writing", e); getLogger().error("Failed reading or writing", e);
session.transfer(flowFile, FAILURE); session.transfer(successfulRecords, FAILURE);
} catch (DatasetException e) { } catch (DatasetException e) {
getLogger().error("Failed to read FlowFile", e); getLogger().error("Failed to read FlowFile", e);
session.transfer(flowFile, FAILURE); session.transfer(successfulRecords, FAILURE);
} }
} }

View File

@ -18,9 +18,17 @@
*/ */
package org.apache.nifi.processors.kite; package org.apache.nifi.processors.kite;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.List;
import org.apache.avro.Schema; import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder; import org.apache.avro.SchemaBuilder;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners; import org.apache.nifi.util.TestRunners;
import org.junit.Assert; import org.junit.Assert;
@ -38,9 +46,13 @@ public class TestJSONToAvroProcessor {
public static final String JSON_CONTENT = "" public static final String JSON_CONTENT = ""
+ "{\"id\": 1,\"color\": \"green\"}" + "{\"id\": 1,\"color\": \"green\"}"
+ "{\"id\": \"120V\", \"color\": \"blue\"}\n" + // invalid, ID is a string + "{\"id\": \"120V\", \"color\": \"blue\"}\n" // invalid, ID is a string
+ "{\"id\": 10, \"color\": 15.23}\n" + // invalid, color as double
"{\"id\": 2, \"color\": \"grey\", \"price\": 12.95 }"; "{\"id\": 2, \"color\": \"grey\", \"price\": 12.95 }";
public static final String FAILURE_CONTENT = "Cannot convert field id [Cannot convert to long: \"120V\"]\n" +
"Cannot convert field color [Cannot convert to string: 15.23]\n";
@Test @Test
public void testBasicConversion() throws IOException { public void testBasicConversion() throws IOException {
TestRunner runner = TestRunners.newTestRunner(ConvertJSONToAvro.class); TestRunner runner = TestRunners.newTestRunner(ConvertJSONToAvro.class);
@ -54,8 +66,13 @@ public class TestJSONToAvroProcessor {
long converted = runner.getCounterValue("Converted records"); long converted = runner.getCounterValue("Converted records");
long errors = runner.getCounterValue("Conversion errors"); long errors = runner.getCounterValue("Conversion errors");
Assert.assertEquals("Should convert 2 rows", 2, converted); Assert.assertEquals("Should convert 2 rows", 2, converted);
Assert.assertEquals("Should reject 1 row", 1, errors); Assert.assertEquals("Should reject 2 rows", 2, errors);
runner.assertAllFlowFilesTransferred("success", 1); runner.assertTransferCount("success", 1);
runner.assertTransferCount("failure", 1);
String failureContent = Bytes.toString(runner.getContentAsByteArray(
runner.getFlowFilesForRelationship("failure").get(0)));
Assert.assertEquals("Should reject an invalid string and double", FAILURE_CONTENT, failureContent);
} }
} }