mirror of https://github.com/apache/nifi.git
NIFI-12674 Modified ValidateCSV to make the schema optional if a header is provided. Added validate on attribute option.
This closes #8362 Signed-off-by: Mike Moser <mosermw@apache.org>
This commit is contained in:
parent
5c3499a008
commit
0190374e56
|
@ -36,8 +36,6 @@ import org.apache.nifi.processor.AbstractProcessor;
|
||||||
import org.apache.nifi.processor.ProcessContext;
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
import org.apache.nifi.processor.ProcessSession;
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
import org.apache.nifi.processor.Relationship;
|
import org.apache.nifi.processor.Relationship;
|
||||||
import org.apache.nifi.processor.io.InputStreamCallback;
|
|
||||||
import org.apache.nifi.processor.io.OutputStreamCallback;
|
|
||||||
import org.apache.nifi.processor.util.StandardValidators;
|
import org.apache.nifi.processor.util.StandardValidators;
|
||||||
import org.supercsv.cellprocessor.Optional;
|
import org.supercsv.cellprocessor.Optional;
|
||||||
import org.supercsv.cellprocessor.ParseBigDecimal;
|
import org.supercsv.cellprocessor.ParseBigDecimal;
|
||||||
|
@ -67,14 +65,16 @@ import org.supercsv.io.CsvListReader;
|
||||||
import org.supercsv.prefs.CsvPreference;
|
import org.supercsv.prefs.CsvPreference;
|
||||||
import org.supercsv.util.CsvContext;
|
import org.supercsv.util.CsvContext;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
|
@ -82,7 +82,7 @@ import java.util.concurrent.atomic.AtomicReference;
|
||||||
@SupportsBatching
|
@SupportsBatching
|
||||||
@InputRequirement(Requirement.INPUT_REQUIRED)
|
@InputRequirement(Requirement.INPUT_REQUIRED)
|
||||||
@Tags({"csv", "schema", "validation"})
|
@Tags({"csv", "schema", "validation"})
|
||||||
@CapabilityDescription("Validates the contents of FlowFiles against a user-specified CSV schema. " +
|
@CapabilityDescription("Validates the contents of FlowFiles or a FlowFile attribute value against a user-specified CSV schema. " +
|
||||||
"Take a look at the additional documentation of this processor for some schema examples.")
|
"Take a look at the additional documentation of this processor for some schema examples.")
|
||||||
@WritesAttributes({
|
@WritesAttributes({
|
||||||
@WritesAttribute(attribute = "count.valid.lines", description = "If line by line validation, number of valid lines extracted from the source data"),
|
@WritesAttribute(attribute = "count.valid.lines", description = "If line by line validation, number of valid lines extracted from the source data"),
|
||||||
|
@ -116,8 +116,8 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
.displayName("Schema")
|
.displayName("Schema")
|
||||||
.description("The schema to be used for validation. Is expected a comma-delimited string representing the cell "
|
.description("The schema to be used for validation. Is expected a comma-delimited string representing the cell "
|
||||||
+ "processors to apply. The following cell processors are allowed in the schema definition: "
|
+ "processors to apply. The following cell processors are allowed in the schema definition: "
|
||||||
+ ALLOWED_OPERATORS + ". Note: cell processors cannot be nested except with Optional.")
|
+ ALLOWED_OPERATORS + ". Note: cell processors cannot be nested except with Optional. Schema is required if Header is false.")
|
||||||
.required(true)
|
.required(false)
|
||||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
.addValidator(StandardValidators.NON_EMPTY_EL_VALIDATOR)
|
.addValidator(StandardValidators.NON_EMPTY_EL_VALIDATOR)
|
||||||
.build();
|
.build();
|
||||||
|
@ -172,6 +172,16 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
public static final PropertyDescriptor CSV_SOURCE_ATTRIBUTE = new PropertyDescriptor.Builder()
|
||||||
|
.name("CSV Source Attribute")
|
||||||
|
.displayName("CSV Source Attribute")
|
||||||
|
.description("The name of the attribute containing CSV data to be validated. If this property is blank, the FlowFile content will be validated.")
|
||||||
|
.required(false)
|
||||||
|
.expressionLanguageSupported(ExpressionLanguageScope.ENVIRONMENT)
|
||||||
|
.addValidator(StandardValidators.ATTRIBUTE_KEY_VALIDATOR)
|
||||||
|
.dependsOn(VALIDATION_STRATEGY, VALIDATE_WHOLE_FLOWFILE.getValue())
|
||||||
|
.build();
|
||||||
|
|
||||||
public static final PropertyDescriptor INCLUDE_ALL_VIOLATIONS = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor INCLUDE_ALL_VIOLATIONS = new PropertyDescriptor.Builder()
|
||||||
.name("validate-csv-violations")
|
.name("validate-csv-violations")
|
||||||
.displayName("Include all violations")
|
.displayName("Include all violations")
|
||||||
|
@ -187,6 +197,7 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
|
|
||||||
private static final List<PropertyDescriptor> PROPERTIES = List.of(
|
private static final List<PropertyDescriptor> PROPERTIES = List.of(
|
||||||
SCHEMA,
|
SCHEMA,
|
||||||
|
CSV_SOURCE_ATTRIBUTE,
|
||||||
HEADER,
|
HEADER,
|
||||||
DELIMITER_CHARACTER,
|
DELIMITER_CHARACTER,
|
||||||
QUOTE_CHARACTER,
|
QUOTE_CHARACTER,
|
||||||
|
@ -201,7 +212,8 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
.build();
|
.build();
|
||||||
public static final Relationship REL_INVALID = new Relationship.Builder()
|
public static final Relationship REL_INVALID = new Relationship.Builder()
|
||||||
.name("invalid")
|
.name("invalid")
|
||||||
.description("FlowFiles that are not valid according to the specified schema are routed to this relationship")
|
.description("FlowFiles that are not valid according to the specified schema,"
|
||||||
|
+ " or no schema or CSV header can be identified, are routed to this relationship")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
private static final Set<Relationship> RELATIONSHIPS = Set.of(
|
private static final Set<Relationship> RELATIONSHIPS = Set.of(
|
||||||
|
@ -223,6 +235,7 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
protected Collection<ValidationResult> customValidate(ValidationContext context) {
|
protected Collection<ValidationResult> customValidate(ValidationContext context) {
|
||||||
|
|
||||||
PropertyValue schemaProp = context.getProperty(SCHEMA);
|
PropertyValue schemaProp = context.getProperty(SCHEMA);
|
||||||
|
PropertyValue headerProp = context.getProperty(HEADER);
|
||||||
String schema = schemaProp.getValue();
|
String schema = schemaProp.getValue();
|
||||||
String subject = SCHEMA.getName();
|
String subject = SCHEMA.getName();
|
||||||
|
|
||||||
|
@ -231,7 +244,11 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
}
|
}
|
||||||
// If no Expression Language is present, try parsing the schema
|
// If no Expression Language is present, try parsing the schema
|
||||||
try {
|
try {
|
||||||
|
if (schema != null) {
|
||||||
this.parseSchema(schema);
|
this.parseSchema(schema);
|
||||||
|
} else if (!headerProp.asBoolean()) {
|
||||||
|
throw(new Exception("Schema cannot be empty if Header property is false."));
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
final List<ValidationResult> problems = new ArrayList<>(1);
|
final List<ValidationResult> problems = new ArrayList<>(1);
|
||||||
problems.add(new ValidationResult.Builder().subject(subject)
|
problems.add(new ValidationResult.Builder().subject(subject)
|
||||||
|
@ -449,49 +466,60 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
final CsvPreference csvPref = getPreference(context, flowFile);
|
final CsvPreference csvPref = getPreference(context, flowFile);
|
||||||
final boolean header = context.getProperty(HEADER).asBoolean();
|
final boolean header = context.getProperty(HEADER).asBoolean();
|
||||||
final ComponentLog logger = getLogger();
|
final ComponentLog logger = getLogger();
|
||||||
final String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
|
String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
|
||||||
final CellProcessor[] cellProcs = this.parseSchema(schema);
|
CellProcessor[] cellProcs = null;
|
||||||
final boolean isWholeFFValidation = context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
|
if (schema != null) {
|
||||||
|
cellProcs = this.parseSchema(schema);
|
||||||
|
}
|
||||||
|
final String validationStrategy = context.getProperty(VALIDATION_STRATEGY).getValue();
|
||||||
|
final boolean isWholeFFValidation = !validationStrategy.equals(VALIDATE_LINES_INDIVIDUALLY.getValue());
|
||||||
final boolean includeAllViolations = context.getProperty(INCLUDE_ALL_VIOLATIONS).asBoolean();
|
final boolean includeAllViolations = context.getProperty(INCLUDE_ALL_VIOLATIONS).asBoolean();
|
||||||
|
|
||||||
final AtomicReference<Boolean> valid = new AtomicReference<>(true);
|
boolean valid = true;
|
||||||
|
int okCount = 0;
|
||||||
|
int totalCount = 0;
|
||||||
|
FlowFile invalidFF = null;
|
||||||
|
FlowFile validFF = null;
|
||||||
|
String validationError = null;
|
||||||
final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<>(true);
|
final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<>(true);
|
||||||
final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<>(true);
|
final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<>(true);
|
||||||
final AtomicReference<Integer> okCount = new AtomicReference<>(0);
|
|
||||||
final AtomicReference<Integer> totalCount = new AtomicReference<>(0);
|
|
||||||
final AtomicReference<FlowFile> invalidFF = new AtomicReference<>(null);
|
|
||||||
final AtomicReference<FlowFile> validFF = new AtomicReference<>(null);
|
|
||||||
final AtomicReference<String> validationError = new AtomicReference<>(null);
|
|
||||||
|
|
||||||
if (!isWholeFFValidation) {
|
if (!isWholeFFValidation) {
|
||||||
invalidFF.set(session.create(flowFile));
|
invalidFF = session.create(flowFile);
|
||||||
validFF.set(session.create(flowFile));
|
validFF = session.create(flowFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
session.read(flowFile, new InputStreamCallback() {
|
InputStream stream;
|
||||||
@Override
|
if (context.getProperty(CSV_SOURCE_ATTRIBUTE).isSet() && isWholeFFValidation) {
|
||||||
public void process(final InputStream in) throws IOException {
|
String csvAttribute = flowFile.getAttribute(context.getProperty(CSV_SOURCE_ATTRIBUTE).evaluateAttributeExpressions().getValue());
|
||||||
try (final NifiCsvListReader listReader = new NifiCsvListReader(new InputStreamReader(in), csvPref)) {
|
stream = new ByteArrayInputStream(Objects.requireNonNullElse(csvAttribute, "").getBytes(StandardCharsets.UTF_8));
|
||||||
|
} else {
|
||||||
|
stream = session.read(flowFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
stream: try (final NifiCsvListReader listReader = new NifiCsvListReader(new InputStreamReader(stream), csvPref)) {
|
||||||
|
|
||||||
// handling of header
|
// handling of header
|
||||||
if (header) {
|
if (header) {
|
||||||
|
|
||||||
// read header
|
// read header
|
||||||
listReader.read();
|
List<String> headers = listReader.read();
|
||||||
|
|
||||||
|
if (schema == null) {
|
||||||
|
if (headers != null && !headers.isEmpty()) {
|
||||||
|
String newSchema = "Optional(StrNotNullOrEmpty()),".repeat(headers.size());
|
||||||
|
schema = newSchema.substring(0, newSchema.length() - 1);
|
||||||
|
cellProcs = this.parseSchema(schema);
|
||||||
|
} else {
|
||||||
|
validationError = "No schema or CSV header could be identified.";
|
||||||
|
valid = false;
|
||||||
|
break stream;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!isWholeFFValidation) {
|
if (!isWholeFFValidation) {
|
||||||
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
|
invalidFF = session.append(invalidFF, out -> out.write(print(listReader.getUntokenizedRow(), csvPref, true)));
|
||||||
@Override
|
validFF = session.append(validFF, out -> out.write(print(listReader.getUntokenizedRow(), csvPref, true)));
|
||||||
public void process(OutputStream out) throws IOException {
|
|
||||||
out.write(print(listReader.getUntokenizedRow(), csvPref, true));
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
|
|
||||||
@Override
|
|
||||||
public void process(OutputStream out) throws IOException {
|
|
||||||
out.write(print(listReader.getUntokenizedRow(), csvPref, true));
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
isFirstLineValid.set(false);
|
isFirstLineValid.set(false);
|
||||||
isFirstLineInvalid.set(false);
|
isFirstLineInvalid.set(false);
|
||||||
}
|
}
|
||||||
|
@ -503,101 +531,89 @@ public class ValidateCsv extends AbstractProcessor {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
// read next row and check if no more row
|
// read next row and check if no more row
|
||||||
stop = listReader.read(includeAllViolations && valid.get(), cellProcs) == null;
|
stop = listReader.read(includeAllViolations && valid, cellProcs) == null;
|
||||||
|
|
||||||
if (!isWholeFFValidation && !stop) {
|
if (!isWholeFFValidation && !stop) {
|
||||||
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
|
validFF = session.append(validFF, out -> out.write(print(listReader.getUntokenizedRow(), csvPref, isFirstLineValid.get())));
|
||||||
@Override
|
okCount++;
|
||||||
public void process(OutputStream out) throws IOException {
|
|
||||||
out.write(print(listReader.getUntokenizedRow(), csvPref, isFirstLineValid.get()));
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
okCount.set(okCount.get() + 1);
|
|
||||||
|
|
||||||
if (isFirstLineValid.get()) {
|
if (isFirstLineValid.get()) {
|
||||||
isFirstLineValid.set(false);
|
isFirstLineValid.set(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (final SuperCsvException e) {
|
} catch (final SuperCsvException e) {
|
||||||
valid.set(false);
|
valid = false;
|
||||||
if (isWholeFFValidation) {
|
if (isWholeFFValidation) {
|
||||||
validationError.set(e.getLocalizedMessage());
|
validationError = e.getLocalizedMessage();
|
||||||
logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", flowFile, e);
|
logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", flowFile, e);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
// we append the invalid line to the flow file that will be routed to invalid relationship
|
// we append the invalid line to the flow file that will be routed to invalid relationship
|
||||||
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
|
invalidFF = session.append(invalidFF, out -> out.write(print(listReader.getUntokenizedRow(), csvPref, isFirstLineInvalid.get())));
|
||||||
@Override
|
|
||||||
public void process(OutputStream out) throws IOException {
|
|
||||||
out.write(print(listReader.getUntokenizedRow(), csvPref, isFirstLineInvalid.get()));
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
|
|
||||||
if (isFirstLineInvalid.get()) {
|
if (isFirstLineInvalid.get()) {
|
||||||
isFirstLineInvalid.set(false);
|
isFirstLineInvalid.set(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (validationError.get() == null) {
|
if (validationError == null) {
|
||||||
validationError.set(e.getLocalizedMessage());
|
validationError = e.getLocalizedMessage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (!isWholeFFValidation) {
|
if (!isWholeFFValidation) {
|
||||||
totalCount.set(totalCount.get() + 1);
|
totalCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
valid.set(false);
|
valid = false;
|
||||||
logger.error("Failed to validate {} against schema due to {}", flowFile, e);
|
logger.error("Failed to validate {} against schema due to {}", flowFile, e);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if (isWholeFFValidation) {
|
if (isWholeFFValidation) {
|
||||||
if (valid.get()) {
|
if (valid) {
|
||||||
logger.debug("Successfully validated {} against schema; routing to 'valid'", flowFile);
|
logger.debug("Successfully validated {} against schema; routing to 'valid'", flowFile);
|
||||||
session.getProvenanceReporter().route(flowFile, REL_VALID);
|
session.getProvenanceReporter().route(flowFile, REL_VALID);
|
||||||
session.transfer(flowFile, REL_VALID);
|
session.transfer(flowFile, REL_VALID);
|
||||||
} else {
|
} else {
|
||||||
session.getProvenanceReporter().route(flowFile, REL_INVALID);
|
session.getProvenanceReporter().route(flowFile, REL_INVALID);
|
||||||
session.putAttribute(flowFile, "validation.error.message", validationError.get());
|
session.putAttribute(flowFile, "validation.error.message", validationError);
|
||||||
session.transfer(flowFile, REL_INVALID);
|
session.transfer(flowFile, REL_INVALID);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (valid.get()) {
|
if (valid) {
|
||||||
logger.debug("Successfully validated {} against schema; routing to 'valid'", validFF.get());
|
logger.debug("Successfully validated {} against schema; routing to 'valid'", validFF);
|
||||||
session.getProvenanceReporter().route(validFF.get(), REL_VALID, "All " + totalCount.get() + " line(s) are valid");
|
session.getProvenanceReporter().route(validFF, REL_VALID, "All " + totalCount + " line(s) are valid");
|
||||||
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(totalCount.get()));
|
session.putAttribute(validFF, "count.valid.lines", Integer.toString(totalCount));
|
||||||
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
|
session.putAttribute(validFF, "count.total.lines", Integer.toString(totalCount));
|
||||||
session.transfer(validFF.get(), REL_VALID);
|
session.transfer(validFF, REL_VALID);
|
||||||
session.remove(invalidFF.get());
|
session.remove(invalidFF);
|
||||||
session.remove(flowFile);
|
session.remove(flowFile);
|
||||||
} else if (okCount.get() != 0) {
|
} else if (okCount != 0) {
|
||||||
// because of the finally within the 'while' loop
|
// because of the finally within the 'while' loop
|
||||||
totalCount.set(totalCount.get() - 1);
|
totalCount--;
|
||||||
|
|
||||||
logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'", okCount.get(), totalCount.get(), flowFile);
|
logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'",
|
||||||
session.getProvenanceReporter().route(validFF.get(), REL_VALID, okCount.get() + " valid line(s)");
|
okCount, totalCount, flowFile);
|
||||||
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
|
session.getProvenanceReporter().route(validFF, REL_VALID, okCount + " valid line(s)");
|
||||||
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(okCount.get()));
|
session.putAttribute(validFF, "count.total.lines", Integer.toString(totalCount));
|
||||||
session.transfer(validFF.get(), REL_VALID);
|
session.putAttribute(validFF, "count.valid.lines", Integer.toString(okCount));
|
||||||
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
|
session.transfer(validFF, REL_VALID);
|
||||||
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
|
session.getProvenanceReporter().route(invalidFF, REL_INVALID, (totalCount - okCount) + " invalid line(s)");
|
||||||
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
|
session.putAttribute(invalidFF, "count.invalid.lines", Integer.toString((totalCount - okCount)));
|
||||||
session.putAttribute(invalidFF.get(), "validation.error.message", validationError.get());
|
session.putAttribute(invalidFF, "count.total.lines", Integer.toString(totalCount));
|
||||||
session.transfer(invalidFF.get(), REL_INVALID);
|
session.putAttribute(invalidFF, "validation.error.message", validationError);
|
||||||
|
session.transfer(invalidFF, REL_INVALID);
|
||||||
session.remove(flowFile);
|
session.remove(flowFile);
|
||||||
} else {
|
} else {
|
||||||
logger.debug("All lines in {} are invalid; routing to 'invalid'", invalidFF.get());
|
logger.debug("All lines in {} are invalid; routing to 'invalid'", invalidFF);
|
||||||
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
|
session.getProvenanceReporter().route(invalidFF, REL_INVALID, "All " + totalCount + " line(s) are invalid");
|
||||||
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
|
session.putAttribute(invalidFF, "count.invalid.lines", Integer.toString(totalCount));
|
||||||
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
|
session.putAttribute(invalidFF, "count.total.lines", Integer.toString(totalCount));
|
||||||
session.putAttribute(invalidFF.get(), "validation.error.message", validationError.get());
|
session.putAttribute(invalidFF, "validation.error.message", validationError);
|
||||||
session.transfer(invalidFF.get(), REL_INVALID);
|
session.transfer(invalidFF, REL_INVALID);
|
||||||
session.remove(validFF.get());
|
session.remove(validFF);
|
||||||
session.remove(flowFile);
|
session.remove(flowFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,10 +16,14 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.nifi.processors.standard;
|
package org.apache.nifi.processors.standard;
|
||||||
|
|
||||||
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
import org.apache.nifi.util.TestRunner;
|
import org.apache.nifi.util.TestRunner;
|
||||||
import org.apache.nifi.util.TestRunners;
|
import org.apache.nifi.util.TestRunners;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
public class TestValidateCsv {
|
public class TestValidateCsv {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -164,6 +168,106 @@ public class TestValidateCsv {
|
||||||
runner.assertTransferCount(ValidateCsv.REL_INVALID, 1);
|
runner.assertTransferCount(ValidateCsv.REL_INVALID, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNoSchema() {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(new ValidateCsv());
|
||||||
|
runner.setProperty(ValidateCsv.DELIMITER_CHARACTER, ",");
|
||||||
|
runner.setProperty(ValidateCsv.END_OF_LINE_CHARACTER, "\r\n");
|
||||||
|
runner.setProperty(ValidateCsv.QUOTE_CHARACTER, "\"");
|
||||||
|
runner.setProperty(ValidateCsv.HEADER, "true");
|
||||||
|
|
||||||
|
runner.enqueue("bigdecimal,bool,char,integer,long\r\n10.0001,true,c,1,92147483647");
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(ValidateCsv.REL_VALID, 1);
|
||||||
|
|
||||||
|
runner.clearTransferState();
|
||||||
|
runner.enqueue(new byte[0]);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(ValidateCsv.REL_INVALID, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidateOnAttribute() {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(new ValidateCsv());
|
||||||
|
runner.setProperty(ValidateCsv.DELIMITER_CHARACTER, ",");
|
||||||
|
runner.setProperty(ValidateCsv.END_OF_LINE_CHARACTER, "\r\n");
|
||||||
|
runner.setProperty(ValidateCsv.QUOTE_CHARACTER, "\"");
|
||||||
|
runner.setProperty(ValidateCsv.HEADER, "true");
|
||||||
|
runner.setProperty(ValidateCsv.CSV_SOURCE_ATTRIBUTE, "CSV_ATTRIBUTE");
|
||||||
|
runner.setProperty(ValidateCsv.VALIDATION_STRATEGY, ValidateCsv.VALIDATE_WHOLE_FLOWFILE.getValue());
|
||||||
|
final Map<String, String> attributeMap = new HashMap<>();
|
||||||
|
attributeMap.put("CSV_ATTRIBUTE", "bigdecimal,bool,char,integer,long\r\n10.0001,true,c,1,92147483647");
|
||||||
|
|
||||||
|
runner.enqueue("FlowFile Random Data", attributeMap);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(ValidateCsv.REL_VALID, 1);
|
||||||
|
runner.getFlowFilesForRelationship(ValidateCsv.REL_VALID).getFirst().assertContentEquals("FlowFile Random Data");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidateOnAttributeDoesNotExist() {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(new ValidateCsv());
|
||||||
|
runner.setProperty(ValidateCsv.DELIMITER_CHARACTER, ",");
|
||||||
|
runner.setProperty(ValidateCsv.END_OF_LINE_CHARACTER, "\r\n");
|
||||||
|
runner.setProperty(ValidateCsv.QUOTE_CHARACTER, "\"");
|
||||||
|
runner.setProperty(ValidateCsv.HEADER, "true");
|
||||||
|
runner.setProperty(ValidateCsv.CSV_SOURCE_ATTRIBUTE, "CSV_ATTRIBUTE");
|
||||||
|
runner.setProperty(ValidateCsv.SCHEMA, "ParseInt(),ParseInt(),ParseInt()");
|
||||||
|
runner.setProperty(ValidateCsv.VALIDATION_STRATEGY, ValidateCsv.VALIDATE_WHOLE_FLOWFILE.getValue());
|
||||||
|
final Map<String, String> attributeMap = new HashMap<>();
|
||||||
|
attributeMap.put("CSV_ATTRIBUTE_BAD", "bigdecimal,bool,char,integer,long\r\n10.0001,true,c,1,92147483647");
|
||||||
|
|
||||||
|
runner.enqueue("FlowFile Random Data", attributeMap);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(ValidateCsv.REL_VALID, 1);
|
||||||
|
runner.getFlowFilesForRelationship(ValidateCsv.REL_VALID).getFirst().assertContentEquals("FlowFile Random Data");
|
||||||
|
|
||||||
|
runner.clearTransferState();
|
||||||
|
attributeMap.clear();
|
||||||
|
attributeMap.put("CSV_ATTRIBUTE", "");
|
||||||
|
runner.enqueue("FlowFile Random Data", attributeMap);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(ValidateCsv.REL_VALID, 1);
|
||||||
|
runner.getFlowFilesForRelationship(ValidateCsv.REL_VALID).getFirst().assertContentEquals("FlowFile Random Data");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidateOnAttributeDoesNotExistNoSchema() {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(new ValidateCsv());
|
||||||
|
runner.setProperty(ValidateCsv.DELIMITER_CHARACTER, ",");
|
||||||
|
runner.setProperty(ValidateCsv.END_OF_LINE_CHARACTER, "\r\n");
|
||||||
|
runner.setProperty(ValidateCsv.QUOTE_CHARACTER, "\"");
|
||||||
|
runner.setProperty(ValidateCsv.HEADER, "true");
|
||||||
|
runner.setProperty(ValidateCsv.CSV_SOURCE_ATTRIBUTE, "CSV_ATTRIBUTE");
|
||||||
|
runner.setProperty(ValidateCsv.VALIDATION_STRATEGY, ValidateCsv.VALIDATE_WHOLE_FLOWFILE.getValue());
|
||||||
|
final Map<String, String> attributeMap = new HashMap<>();
|
||||||
|
attributeMap.put("CSV_ATTRIBUTE_BAD", "bigdecimal,bool,char,integer,long\r\n10.0001,true,c,1,92147483647");
|
||||||
|
|
||||||
|
runner.enqueue("FlowFile Random Data", attributeMap);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(ValidateCsv.REL_INVALID, 1);
|
||||||
|
MockFlowFile flowfile = runner.getFlowFilesForRelationship(ValidateCsv.REL_INVALID).getFirst();
|
||||||
|
flowfile.assertAttributeEquals("validation.error.message",
|
||||||
|
"No schema or CSV header could be identified.");
|
||||||
|
flowfile.assertContentEquals("FlowFile Random Data");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testValidateEmptyFile() {
|
||||||
|
final TestRunner runner = TestRunners.newTestRunner(new ValidateCsv());
|
||||||
|
runner.setProperty(ValidateCsv.DELIMITER_CHARACTER, ",");
|
||||||
|
runner.setProperty(ValidateCsv.END_OF_LINE_CHARACTER, "\r\n");
|
||||||
|
runner.setProperty(ValidateCsv.QUOTE_CHARACTER, "\"");
|
||||||
|
runner.setProperty(ValidateCsv.HEADER, "true");
|
||||||
|
runner.setProperty(ValidateCsv.SCHEMA, "ParseInt(),ParseInt(),ParseInt()");
|
||||||
|
runner.setProperty(ValidateCsv.VALIDATION_STRATEGY, ValidateCsv.VALIDATE_WHOLE_FLOWFILE.getValue());
|
||||||
|
final Map<String, String> attributeMap = new HashMap<>();
|
||||||
|
|
||||||
|
runner.enqueue(new byte[0], attributeMap);
|
||||||
|
runner.run();
|
||||||
|
runner.assertAllFlowFilesTransferred(ValidateCsv.REL_VALID, 1);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEqualsNotNullStrNotNullOrEmpty() {
|
public void testEqualsNotNullStrNotNullOrEmpty() {
|
||||||
final TestRunner runner = TestRunners.newTestRunner(new ValidateCsv());
|
final TestRunner runner = TestRunners.newTestRunner(new ValidateCsv());
|
||||||
|
|
Loading…
Reference in New Issue