NIFI-5264 - Added attribute for validation error message in ValidateCSV

This closes #2769

Signed-off-by: zenfenan <zenfenan@apache.org>
This commit is contained in:
Pierre Villard 2018-06-07 15:03:16 +02:00 committed by zenfenan
parent 49228aa5dc
commit 6e067734d5
2 changed files with 15 additions and 1 deletions

View File

@ -92,7 +92,8 @@ import org.supercsv.prefs.CsvPreference;
@WritesAttributes({ @WritesAttributes({
@WritesAttribute(attribute="count.valid.lines", description="If line by line validation, number of valid lines extracted from the source data"), @WritesAttribute(attribute="count.valid.lines", description="If line by line validation, number of valid lines extracted from the source data"),
@WritesAttribute(attribute="count.invalid.lines", description="If line by line validation, number of invalid lines extracted from the source data"), @WritesAttribute(attribute="count.invalid.lines", description="If line by line validation, number of invalid lines extracted from the source data"),
@WritesAttribute(attribute="count.total.lines", description="If line by line validation, total number of lines in the source data") @WritesAttribute(attribute="count.total.lines", description="If line by line validation, total number of lines in the source data"),
@WritesAttribute(attribute="validation.error.message", description="For flow files routed to invalid, message of the first validation error")
}) })
public class ValidateCsv extends AbstractProcessor { public class ValidateCsv extends AbstractProcessor {
@ -455,6 +456,7 @@ public class ValidateCsv extends AbstractProcessor {
final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0); final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0);
final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null); final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null);
final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null); final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null);
final AtomicReference<String> validationError = new AtomicReference<String>(null);
if(!isWholeFFValidation) { if(!isWholeFFValidation) {
invalidFF.set(session.create(flowFile)); invalidFF.set(session.create(flowFile));
@ -514,6 +516,7 @@ public class ValidateCsv extends AbstractProcessor {
} catch (final SuperCsvException e) { } catch (final SuperCsvException e) {
valid.set(false); valid.set(false);
if(isWholeFFValidation) { if(isWholeFFValidation) {
validationError.set(e.getLocalizedMessage());
logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[]{flowFile}, e); logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[]{flowFile}, e);
break; break;
} else { } else {
@ -528,6 +531,10 @@ public class ValidateCsv extends AbstractProcessor {
if(isFirstLineInvalid.get()) { if(isFirstLineInvalid.get()) {
isFirstLineInvalid.set(false); isFirstLineInvalid.set(false);
} }
if(validationError.get() == null) {
validationError.set(e.getLocalizedMessage());
}
} }
} finally { } finally {
if(!isWholeFFValidation) { if(!isWholeFFValidation) {
@ -554,6 +561,7 @@ public class ValidateCsv extends AbstractProcessor {
session.transfer(flowFile, REL_VALID); session.transfer(flowFile, REL_VALID);
} else { } else {
session.getProvenanceReporter().route(flowFile, REL_INVALID); session.getProvenanceReporter().route(flowFile, REL_INVALID);
session.putAttribute(flowFile, "validation.error.message", validationError.get());
session.transfer(flowFile, REL_INVALID); session.transfer(flowFile, REL_INVALID);
} }
} else { } else {
@ -578,6 +586,7 @@ public class ValidateCsv extends AbstractProcessor {
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)"); session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get()))); session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get())); session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.putAttribute(invalidFF.get(), "validation.error.message", validationError.get());
session.transfer(invalidFF.get(), REL_INVALID); session.transfer(invalidFF.get(), REL_INVALID);
session.remove(flowFile); session.remove(flowFile);
} else { } else {
@ -585,6 +594,7 @@ public class ValidateCsv extends AbstractProcessor {
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid"); session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get())); session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get())); session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.putAttribute(invalidFF.get(), "validation.error.message", validationError.get());
session.transfer(invalidFF.get(), REL_INVALID); session.transfer(invalidFF.get(), REL_INVALID);
session.remove(validFF.get()); session.remove(validFF.get());
session.remove(flowFile); session.remove(flowFile);

View File

@ -121,6 +121,8 @@ public class TestValidateCsv {
runner.enqueue("John,22/111954,63.2\r\nBob,01/03/2004,45.0"); runner.enqueue("John,22/111954,63.2\r\nBob,01/03/2004,45.0");
runner.run(); runner.run();
runner.assertTransferCount(ValidateCsv.REL_INVALID, 1); runner.assertTransferCount(ValidateCsv.REL_INVALID, 1);
runner.getFlowFilesForRelationship(ValidateCsv.REL_INVALID).get(0).assertAttributeEquals("validation.error.message",
"'22/111954' could not be parsed as a Date");
} }
@Test @Test
@ -197,6 +199,8 @@ public class TestValidateCsv {
runner.enqueue("test,test,testapache.org"); runner.enqueue("test,test,testapache.org");
runner.run(); runner.run();
runner.assertTransferCount(ValidateCsv.REL_INVALID, 1); runner.assertTransferCount(ValidateCsv.REL_INVALID, 1);
runner.getFlowFilesForRelationship(ValidateCsv.REL_INVALID).get(0).assertAttributeEquals("validation.error.message",
"'testapache.org' does not match the regular expression '[a-z0-9\\._]+@[a-z0-9\\.]+'");
} }
@Test @Test