diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateXml.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateXml.java
index ca2f30f69e..b7184d4f06 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateXml.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateXml.java
@@ -21,6 +21,8 @@ import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.SystemResource;
+import org.apache.nifi.annotation.behavior.SystemResourceConsideration;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
@@ -38,15 +40,21 @@ import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.security.xml.SafeXMLConfiguration;
import org.xml.sax.SAXException;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
@@ -64,26 +72,40 @@ import java.util.concurrent.atomic.AtomicReference;
@WritesAttribute(attribute = "validatexml.invalid.error", description = "If the flow file is routed to the invalid relationship "
+ "the attribute will contain the error message resulting from the validation failure.")
})
-@CapabilityDescription("Validates the contents of FlowFiles against a user-specified XML Schema file")
+@CapabilityDescription("Validates XML contained in a FlowFile. By default, the XML is contained in the FlowFile content. If the 'XML Source Attribute' property is set, the XML to be validated "
+ + "is contained in the specified attribute. It is not recommended to use attributes to hold large XML documents; doing so could adversely affect system performance. "
+ + "Full schema validation is performed if the processor is configured with the XSD schema details. Otherwise, the only validation performed is "
+ + "to ensure the XML syntax is correct and well-formed, e.g. all opening tags are properly closed.")
+@SystemResourceConsideration(resource = SystemResource.MEMORY, description = "While this processor supports processing XML within attributes, it is strongly discouraged to hold "
+ + "large amounts of data in attributes. In general, attribute values should be as small as possible and hold no more than a couple hundred characters.")
public class ValidateXml extends AbstractProcessor {
public static final String ERROR_ATTRIBUTE_KEY = "validatexml.invalid.error";
public static final PropertyDescriptor SCHEMA_FILE = new PropertyDescriptor.Builder()
.name("Schema File")
- .description("The path to the Schema file that is to be used for validation")
- .required(true)
+ .displayName("Schema File")
+ .description("The file path or URL to the XSD Schema file that is to be used for validation. If this property is blank, only XML syntax/structure will be validated.")
+ .required(false)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.identifiesExternalResource(ResourceCardinality.SINGLE, ResourceType.FILE, ResourceType.URL)
.build();
+ public static final PropertyDescriptor XML_SOURCE_ATTRIBUTE = new PropertyDescriptor.Builder()
+ .name("XML Source Attribute")
+ .displayName("XML Source Attribute")
+ .description("The name of the attribute containing XML to be validated. If this property is blank, the FlowFile content will be validated.")
+ .required(false)
+ .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
+ .addValidator(StandardValidators.ATTRIBUTE_KEY_VALIDATOR)
+ .build();
public static final Relationship REL_VALID = new Relationship.Builder()
.name("valid")
- .description("FlowFiles that are successfully validated against the schema are routed to this relationship")
+ .description("FlowFiles that are successfully validated against the schema, if provided, or verified to be well-formed XML are routed to this relationship")
.build();
public static final Relationship REL_INVALID = new Relationship.Builder()
.name("invalid")
- .description("FlowFiles that are not valid according to the specified schema are routed to this relationship")
+ .description("FlowFiles that are not valid according to the specified schema or contain invalid XML are routed to this relationship")
.build();
private static final String SCHEMA_LANGUAGE = "http://www.w3.org/2001/XMLSchema";
@@ -96,6 +118,7 @@ public class ValidateXml extends AbstractProcessor {
protected void init(final ProcessorInitializationContext context) {
final List properties = new ArrayList<>();
properties.add(SCHEMA_FILE);
+ properties.add(XML_SOURCE_ATTRIBUTE);
this.properties = Collections.unmodifiableList(properties);
final Set relationships = new HashSet<>();
@@ -116,13 +139,13 @@ public class ValidateXml extends AbstractProcessor {
@OnScheduled
public void parseSchema(final ProcessContext context) throws SAXException {
- try {
+ if (context.getProperty(SCHEMA_FILE).isSet()) {
final URL url = context.getProperty(SCHEMA_FILE).evaluateAttributeExpressions().asResource().asURL();
final SchemaFactory schemaFactory = SchemaFactory.newInstance(SCHEMA_LANGUAGE);
final Schema schema = schemaFactory.newSchema(url);
- this.schemaRef.set(schema);
- } catch (final SAXException e) {
- throw e;
+ schemaRef.set(schema);
+ } else {
+ schemaRef.set(null);
}
}
@@ -134,35 +157,74 @@ public class ValidateXml extends AbstractProcessor {
}
final Schema schema = schemaRef.get();
- final Validator validator = schema.newValidator();
+ final Validator validator = schema == null ? null : schema.newValidator();
final ComponentLog logger = getLogger();
+ final boolean attributeContainsXML = context.getProperty(XML_SOURCE_ATTRIBUTE).isSet();
for (FlowFile flowFile : flowFiles) {
final AtomicBoolean valid = new AtomicBoolean(true);
- final AtomicReference exception = new AtomicReference(null);
+ final AtomicReference exception = new AtomicReference<>(null);
+ SafeXMLConfiguration safeXMLConfiguration = new SafeXMLConfiguration();
+ safeXMLConfiguration.setValidating(false);
- session.read(flowFile, new InputStreamCallback() {
- @Override
- public void process(final InputStream in) throws IOException {
- try {
- validator.validate(new StreamSource(in));
- } catch (final IllegalArgumentException | SAXException e) {
- valid.set(false);
- exception.set(e);
- }
+ try {
+ DocumentBuilder docBuilder = safeXMLConfiguration.createDocumentBuilder();
+
+ if (attributeContainsXML) {
+ // If XML source attribute is set, validate attribute value
+ String xml = flowFile.getAttribute(context.getProperty(XML_SOURCE_ATTRIBUTE).evaluateAttributeExpressions().getValue());
+ ByteArrayInputStream bais = new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8));
+
+ validate(validator, docBuilder, bais);
+ } else {
+ // If XML source attribute is not set, validate flowfile content
+ session.read(flowFile, new InputStreamCallback() {
+ @Override
+ public void process(final InputStream in) throws IOException {
+ try {
+ validate(validator, docBuilder, in);
+ } catch (final IllegalArgumentException | SAXException e) {
+ valid.set(false);
+ exception.set(e);
+ }
+ }
+ });
}
- });
+ } catch (final IllegalArgumentException | SAXException | ParserConfigurationException | IOException e) {
+ valid.set(false);
+ exception.set(e);
+ }
+ // determine source location of XML for logging purposes
+ String xmlSource = attributeContainsXML ? "attribute '" + context.getProperty(XML_SOURCE_ATTRIBUTE).evaluateAttributeExpressions().getValue() + "'" : "content";
if (valid.get()) {
- logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[]{flowFile});
+ if (context.getProperty(SCHEMA_FILE).isSet()) {
+ logger.debug("Successfully validated XML in {} of {} against schema; routing to 'valid'", xmlSource, flowFile);
+ } else {
+ logger.debug("Successfully validated XML is well-formed in {} of {}; routing to 'valid'", xmlSource, flowFile);
+ }
session.getProvenanceReporter().route(flowFile, REL_VALID);
session.transfer(flowFile, REL_VALID);
} else {
flowFile = session.putAttribute(flowFile, ERROR_ATTRIBUTE_KEY, exception.get().getLocalizedMessage());
- logger.info("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[]{flowFile, exception.get().getLocalizedMessage()});
+ if (context.getProperty(SCHEMA_FILE).isSet()) {
+ logger.info("Failed to validate XML in {} of {} against schema due to {}; routing to 'invalid'", xmlSource, flowFile, exception.get().getLocalizedMessage());
+ } else {
+ logger.info("Failed to validate XML is well-formed in {} of {} due to {}; routing to 'invalid'", xmlSource, flowFile, exception.get().getLocalizedMessage());
+ }
session.getProvenanceReporter().route(flowFile, REL_INVALID);
session.transfer(flowFile, REL_INVALID);
}
}
}
+
+ private void validate(final Validator validator, final DocumentBuilder docBuilder, final InputStream in) throws IllegalArgumentException, SAXException, IOException {
+ if (validator != null) {
+ // If schema is provided, validator will be non-null
+ validator.validate(new StreamSource(in));
+ } else {
+ // Only verify that the XML is well-formed; no schema check
+ docBuilder.parse(in);
+ }
+ }
}
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.ValidateXml/additionalDetails.html b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.ValidateXml/additionalDetails.html
new file mode 100644
index 0000000000..2b5df57ee2
--- /dev/null
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/resources/docs/org.apache.nifi.processors.standard.ValidateXml/additionalDetails.html
@@ -0,0 +1,104 @@
+
+
+
+
+
+ ValidateCsv
+
+
+
+
+
+
Usage Information
+
+
+ In order to fully validate XML, a schema must be provided. The ValidateXML processor allows the schema to be specified in the
+ property 'Schema File'. The following example illustrates how an XSD schema and XML data work together.
+
The following are invalid XML data. The resulting validatexml.invalid.error attribute is shown.
+
+ <ns:bundle xmlns:ns="http://namespace/1">
+ <node>Hello World!</node>
+ </ns:bundle>
+ validatexml.invalid.error: cvc-complex-type.2.3: Element 'node' cannot have character [children], because the type's content type is element-only.
+
+
+
+
+ <ns:bundle xmlns:ns="http://namespace/1">
+ <node>
+ <value>Hello World!</value>
+ </node>
+ </ns:bundle>
+ validatexml.invalid.error: cvc-complex-type.2.4.a: Invalid content was found starting with element 'value'. One of '{subNode}' is expected.
+
+
+
+
+
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestValidateXml.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestValidateXml.java
index b41f86982e..3a03b229d9 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestValidateXml.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestValidateXml.java
@@ -18,16 +18,23 @@ package org.apache.nifi.processors.standard;
import java.io.IOException;
import java.nio.file.Paths;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Test;
-import org.xml.sax.SAXException;
public class TestValidateXml {
+ private static final String VALID_XML = "Hello" +
+ "World!";
+ private static final String INVALID_XML = "is an invalid";
+ private static final String NONCOMPLIANT_XML = "is good XML, but violates schema";
+
@Test
- public void testValid() throws IOException, SAXException {
+ public void testValid() throws IOException {
+ // Valid XML in FF content, XSD provided
final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
runner.setProperty(ValidateXml.SCHEMA_FILE, "src/test/resources/TestXml/XmlBundle.xsd");
@@ -38,11 +45,20 @@ public class TestValidateXml {
}
@Test
- public void testInvalid() throws IOException, SAXException {
+ public void testInvalid() {
+ // Invalid XML in FF content, XSD provided
final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
runner.setProperty(ValidateXml.SCHEMA_FILE, "src/test/resources/TestXml/XmlBundle.xsd");
- runner.enqueue("is an invalid");
+ runner.enqueue(INVALID_XML);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_INVALID, 1);
+ runner.assertAllFlowFilesContainAttribute(ValidateXml.REL_INVALID, ValidateXml.ERROR_ATTRIBUTE_KEY);
+
+ runner.clearTransferState();
+ runner.enqueue(NONCOMPLIANT_XML);
+
runner.run();
runner.assertAllFlowFilesTransferred(ValidateXml.REL_INVALID, 1);
@@ -50,7 +66,7 @@ public class TestValidateXml {
}
@Test
- public void testValidEL() throws IOException, SAXException {
+ public void testValidEL() throws IOException {
final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
runner.setProperty(ValidateXml.SCHEMA_FILE, "${my.schema}");
runner.setVariable("my.schema", "src/test/resources/TestXml/XmlBundle.xsd");
@@ -62,15 +78,107 @@ public class TestValidateXml {
}
@Test(expected = AssertionError.class)
- public void testInvalidEL() throws IOException, SAXException {
+ public void testInvalidEL() {
final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
runner.setProperty(ValidateXml.SCHEMA_FILE, "${my.schema}");
- runner.enqueue("is an invalid");
+ runner.enqueue(INVALID_XML);
runner.run();
runner.assertAllFlowFilesTransferred(ValidateXml.REL_INVALID, 1);
runner.assertAllFlowFilesContainAttribute(ValidateXml.REL_INVALID, ValidateXml.ERROR_ATTRIBUTE_KEY);
}
+ @Test
+ public void testValidXMLAttributeWithSchema() {
+ // Valid XML in FF attribute, XSD provided
+ final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
+ runner.setProperty(ValidateXml.SCHEMA_FILE, "src/test/resources/TestXml/XmlBundle.xsd");
+ runner.setProperty(ValidateXml.XML_SOURCE_ATTRIBUTE, "xml.attribute");
+ Map attributes = new HashMap<>();
+ attributes.put("xml.attribute", VALID_XML);
+
+ runner.enqueue("XML is in attribute, not content", attributes);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_VALID, 1);
+ }
+
+ @Test
+ public void testInvalidXMLAttributeWithSchema() {
+ // Invalid XML in FF attribute, XSD provided
+ final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
+ runner.setProperty(ValidateXml.SCHEMA_FILE, "src/test/resources/TestXml/XmlBundle.xsd");
+ runner.setProperty(ValidateXml.XML_SOURCE_ATTRIBUTE, "xml.attribute");
+ Map attributes = new HashMap<>();
+ attributes.put("xml.attribute", INVALID_XML);
+
+ runner.enqueue("flowfile content is irrelevant", attributes);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_INVALID, 1);
+ runner.assertAllFlowFilesContainAttribute(ValidateXml.REL_INVALID, ValidateXml.ERROR_ATTRIBUTE_KEY);
+
+ runner.clearTransferState();
+ attributes.clear();
+ attributes.put("xml.attribute", NONCOMPLIANT_XML);
+
+ runner.enqueue("flowfile content is irrelevant", attributes);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_INVALID, 1);
+ runner.assertAllFlowFilesContainAttribute(ValidateXml.REL_INVALID, ValidateXml.ERROR_ATTRIBUTE_KEY);
+ }
+
+ @Test
+ public void testValidXMLAttributeStructure() {
+ // Valid XML in FF attribute, no XSD provided
+ final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
+ runner.setProperty(ValidateXml.XML_SOURCE_ATTRIBUTE, "xml.attribute");
+ Map attributes = new HashMap<>();
+ attributes.put("xml.attribute", VALID_XML);
+
+ runner.enqueue("XML is in attribute, not content", attributes);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_VALID, 1);
+ }
+
+ @Test
+ public void testInvalidXMLAttributeStructure() {
+ // Invalid XML in FF attribute, no XSD provided
+ final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
+ runner.setProperty(ValidateXml.XML_SOURCE_ATTRIBUTE, "xml.attribute");
+ Map attributes = new HashMap<>();
+ attributes.put("xml.attribute", INVALID_XML);
+
+ runner.enqueue("XML is in attribute, not content", attributes);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_INVALID, 1);
+ runner.assertAllFlowFilesContainAttribute(ValidateXml.REL_INVALID, ValidateXml.ERROR_ATTRIBUTE_KEY);
+ }
+
+ @Test
+ public void testValidXMLContentStructure() throws IOException {
+ // Valid XML in FF content, no XSD provided
+ final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
+
+ runner.enqueue(Paths.get("src/test/resources/TestXml/xml-snippet.xml"));
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_VALID, 1);
+ }
+
+ @Test
+ public void testInvalidXMLContentStructure() {
+ // Invalid XML in FF content, no XSD provided
+ final TestRunner runner = TestRunners.newTestRunner(new ValidateXml());
+
+ runner.enqueue(INVALID_XML);
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ValidateXml.REL_INVALID, 1);
+ runner.assertAllFlowFilesContainAttribute(ValidateXml.REL_INVALID, ValidateXml.ERROR_ATTRIBUTE_KEY);
+ }
}