NIFI-9832: Fix disappearing XML element content when the element has attribute (#5896)

- NIFI-9832: Additional test cases for XMLReader
2025-03-01 15:09:11 +00:00 · 2022-04-12 17:41:50 +02:00 · 2022-04-12 17:41:50 +02:00 · 68c6722f76
commit 68c6722f76
parent 27e78c6f0c
9 changed files with 561 additions and 59 deletions
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
@ -218,6 +218,7 @@
                        <exclude>src/test/resources/syslog/syslog5424/log_mix.txt</exclude>
                        <exclude>src/test/resources/syslog/syslog5424/log_mix_in_error.txt</exclude>
                        <exclude>src/test/resources/text/testschema</exclude>
                        <exclude>src/test/resources/xml/field_with_sub-element.xml</exclude>
                        <exclude>src/test/resources/xml/people.xml</exclude>
                        <exclude>src/test/resources/xml/people2.xml</exclude>
                        <exclude>src/test/resources/xml/people3.xml</exclude>
@ -236,6 +237,7 @@
                        <exclude>src/test/resources/xml/people_tag_in_characters.xml</exclude>
                        <exclude>src/test/resources/xml/people_with_header_and_comments.xml</exclude>
                        <exclude>src/test/resources/xml/person.xml</exclude>
                        <exclude>src/test/resources/xml/person_record.xml</exclude>
                        <exclude>src/test/resources/xml/testschema</exclude>
                        <exclude>src/test/resources/xml/testschema2</exclude>
                        <exclude>src/test/resources/xml/testschema3</exclude>
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLReader.java
@ -97,7 +97,10 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
            .description("If tags with content (e. g. <field>content</field>) are defined as nested records in the schema, " +
                    "the name of the tag will be used as name for the record and the value of this property will be used as name for the field. " +
                    "If tags with content shall be parsed together with attributes (e. g. <field attribute=\"123\">content</field>), " +
-                    "they have to be defined as records. For additional information, see the section of processor usage.")
+                    "they have to be defined as records. In such a case, the name of the tag will be used as the name for the record and  " +
                    "the value of this property will be used as the name for the field holding the original content. The name of the attribute " +
                    "will be used to create a new record field, the content of which will be the value of the attribute. " +
                    "For more information, see the 'Additional Details...' section of the XMLReader controller service's documentation.")
            .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
            .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
            .required(false)
@ -136,7 +139,12 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
    @Override
    protected SchemaAccessStrategy getSchemaAccessStrategy(final String strategy, final SchemaRegistry schemaRegistry, final PropertyContext context) {
-        final RecordSourceFactory<XmlNode> sourceFactory = (variables, contentStream) -> new XmlRecordSource(contentStream, isMultipleRecords(context, variables));
+
        final RecordSourceFactory<XmlNode> sourceFactory = (variables, contentStream) -> {
            String contentFieldName = trim(context.getProperty(CONTENT_FIELD_NAME).evaluateAttributeExpressions(variables).getValue());
            contentFieldName = (contentFieldName == null) ? "value" : contentFieldName;
            return new XmlRecordSource(contentStream, contentFieldName, isMultipleRecords(context, variables));
        };
        final Supplier<SchemaInferenceEngine<XmlNode>> schemaInference = () -> new XmlSchemaInference(new TimeValueInference(dateFormat, timeFormat, timestampFormat));
        return SchemaInferenceUtil.getSchemaAccessStrategy(strategy, context, getLogger(), sourceFactory, schemaInference,
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLRecordReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLRecordReader.java
@ -339,8 +339,8 @@ public class XMLRecordReader implements RecordReader {
                if (contentFieldName != null) {
                    recordValues.put(contentFieldName, content.toString());
                } else {
-                    logger.debug("Found content for field that has to be parsed as record but property \"Field Name for Content\" is not set. " +
+                    logger.debug("Found content for a field that was supposed to be named with the value of the \"Field Name for Content\" property but " +
-                            "The content will not be added to the record.");
+                            "the property was not set. The content was not added to the record.");
                }
                return new MapRecord(new SimpleRecordSchema(Collections.emptyList()), recordValues);
@ -486,10 +486,13 @@ public class XMLRecordReader implements RecordReader {
                if (field.isPresent()) {
                    Object value = parseStringForType(content.toString(), contentFieldName, field.get().getDataType());
                    recordValues.put(contentFieldName, value);
                } else {
                    logger.debug("Found content for a field that was supposed to be named with the value of the \"Field Name for Content\" property " +
                            "but no such field was present in the schema. The content was not added to the record.");
                }
            } else {
-                logger.debug("Found content for field that is defined as record but property \"Field Name for Content\" is not set. " +
+                logger.debug("Found content for a field that was supposed to be named with the value of the \"Field Name for Content\" property but " +
-                        "The content will not be added to record.");
+                        "the property was not set. The content was not added to the record.");
            }
        }
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/inference/XmlRecordSource.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/inference/XmlRecordSource.java
@ -35,8 +35,10 @@ import java.util.Map;
 public class XmlRecordSource implements RecordSource<XmlNode> {
    private final XMLEventReader xmlEventReader;
    private final String contentFieldName;
-    public XmlRecordSource(final InputStream in, final boolean ignoreWrapper) throws IOException {
+    public XmlRecordSource(final InputStream in, final String contentFieldName, final boolean ignoreWrapper) throws IOException {
        this.contentFieldName = contentFieldName;
        try {
            final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
@ -125,7 +127,7 @@ public class XmlRecordSource implements RecordSource<XmlNode> {
        } else {
            final String textContent = content.toString().trim();
            if (!textContent.equals("")) {
-                childNodes.put("value", new XmlTextNode("value", textContent));
+                childNodes.put(contentFieldName, new XmlTextNode(contentFieldName, textContent));
            }
            return new XmlContainerNode(nodeName, childNodes);
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.xml.XMLReader/additionalDetails.html
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.xml.XMLReader/additionalDetails.html
@ -286,6 +286,357 @@
        for tags containing attributes and content.
    </p>
    <h2>Example: Tags with Attributes and Schema Inference</h2>
    <p>
        When the record's schema is not provided but inferred based on the data itself, providing a value for the "Field Name for Content" property
        is especially important. (For detailed information on schema inference, see the "Schema Inference" section below.)
        Let's focus on cases where an XML element (called <code>&lt;field_with_attribute&gt;</code> in the examples) has an XML attribute and some content and no sub-elements.
        For the examples below, let's assume that a ConvertRecord processor is used, and it uses an XMLReader controller service and an XMLRecordSetWriter
        controller service. The settings for XMLReader are provided separately for each example. The settings for XMLRecordSetWriter are common
        for all the examples below. This way an XML to XML conversion is executed and comparing the input data with the output highlights
        the schema inference behavior. The same behavior can be observed if a different Writer controller service is used.
        XMLRecordSetWriter was chosen for these examples so that the input and the output are easily comparable.
        The settings of the common XMLRecordSetWriter are the following:
    </p>
    <table>
        <tr>
            <th>Property Name</th>
            <th>Property Value</th>
        </tr>
        <tr>
            <td>Schema Access Strategy</td>
            <td><code>Inherit Record Schema</code></td>
        </tr>
        <tr>
            <td>Suppress Null Values</td>
            <td><code>Never Suppress</code></td>
        </tr>
    </table>
    <h3>XML Attributes and Schema Inference Example 1</h3>
    <p>
        XMLReader settings:
    </p>
    <table>
        <tr>
            <th>Property Name</th>
            <th>Property Value</th>
        </tr>
        <tr>
            <td>Schema Access Strategy</td>
            <td><code>Infer Schema</code></td>
        </tr>
        <tr>
            <td>Expect Records as Array</td>
            <td><code>false</code></td>
        </tr>
        <tr>
            <td>Field Name for Content</td>
            <td>not set</td>
        </tr>
    </table>
    <p>
        Input:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute attr="attr_content"&gt;
                        content of field
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>As mentioned above, the element called "field_with_attribute" has an attribute and some content but no sub-element.</p>
    <p>
        Output:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute&gt;
                        &lt;attr&gt;attr_content&lt;/attr&gt;
                        &lt;value&gt;&lt;/value&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        In the XMLReader's settings, no value is set for the "Field Name for Content" property. In such cases the schema inference logic
        adds a field named "value" to the schema. However, since "Field Name for Content" is not set, the data processing logic is instructed
        not to consider the original content of the parent XML tags (<code>&lt;field_with_attribute&gt;</code> the content of which is "content of field"
        in the example). So a new field named "value" appears in the schema but no value is assigned to it from the data, thus the field is empty.
        The XML attribute (named "attr") is processed, a field named "attr" is added to the schema and the attribute's value ("attr_content") is assigned to it.
        In a case like this, the parent field's original content is lost and a new field named "value" appears in the schema with no data assigned to it.
        This is to make sure that no data is overwritten in the record if it already contains a field named "value". More on that case in Example 3 and Example 4.
    </p>
    <h3>XML Attributes and Schema Inference Example 2</h3>
    <p>
        In this example, the XMLReader's "Field Name for Content" property is filled with the value "original_content". The input data is the same as
        in the previous example.
    </p>
    <p>
        XMLReader settings:
    </p>
    <table>
        <tr>
            <th>Property Name</th>
            <th>Property Value</th>
        </tr>
        <tr>
            <td>Schema Access Strategy</td>
            <td><code>Infer Schema</code></td>
        </tr>
        <tr>
            <td>Expect Records as Array</td>
            <td><code>false</code></td>
        </tr>
        <tr>
            <td>Field Name for Content</td>
            <td><code>original_content</code></td>
        </tr>
    </table>
    <p>
        Input:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute attr="attr_content"&gt;
                        content of field
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        Output:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute&gt;
                        &lt;attr&gt;attr_content&lt;/attr&gt;
                        &lt;original_content&gt;content of field&lt;/original_content&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        The XMLReader's "Field Name for Content" property contains the value "original_content" (the concrete value is not important, what is important
        is that a value is provided and it does not clash with the name of any sub-element in <code>&lt;field_with_attribute&gt;</code>).
        This explicitly tells the XMLReader controller service to create a field named "original_content" and make the original content of
        the parent XML tag the value of the field named "original_content". Adding the XML attributed named "attr" works just like in the first example.
        Since the <code>&lt;field_with_attribute&gt;</code> element had no child-element with the name "original_content", no data is lost.
    </p>
    <h3>XML Attributes and Schema Inference Example 3</h3>
    <p>
        In this example, XMLReader's "Field Name for Content" property is left empty. In the input data, the <code>&lt;field_with_attribute&gt;</code> element
        has some content and a sub-element named <code>&lt;value&gt;</code>.
    </p>
    <p>
        XMLReader settings:
    </p>
    <table>
        <tr>
            <th>Property Name</th>
            <th>Property Value</th>
        </tr>
        <tr>
            <td>Schema Access Strategy</td>
            <td><code>Infer Schema</code></td>
        </tr>
        <tr>
            <td>Expect Records as Array</td>
            <td><code>false</code></td>
        </tr>
        <tr>
            <td>Field Name for Content</td>
            <td>not set</td>
        </tr>
    </table>
    <p>
        Input:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute attr="attr_content"&gt;
                          content of field&lt;value&gt;123&lt;/value&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        Output:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute&gt;
                        &lt;attr&gt;attr_content&lt;/attr&gt;
                        &lt;value&gt;123&lt;/value&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        The "Field Name for Content" property is not set, and the XML element has a sub-element named "value". The name of the sub-element clashes with the
        default field name added to the schema by the Schema Inference logic (see Example 1). As seen in the output data, the input XML attribute's value
        is added to the record just like in the previous examples. The value of the <code>&lt;value&gt;</code> element is retained, but the content of the
        <code>&lt;field_with_attribute&gt;</code> that was outside of the sub-element, is lost.
    </p>
    <h3>XML Attributes and Schema Inference Example 4</h3>
    <p>
        In this example, XMLReader's "Field Name for Content" property is given the value "value". In the input data, the <code>&lt;field_with_attribute&gt;</code> element
       has some content and a sub-element named <code>&lt;value&gt;</code>. The name of the sub-element clashes with the value of the "Field Name for Content" property.
    </p>
    <p>
        XMLReader settings:
    </p>
    <table>
        <tr>
            <th>Property Name</th>
            <th>Property Value</th>
        </tr>
        <tr>
            <td>Schema Access Strategy</td>
            <td><code>Infer Schema</code></td>
        </tr>
        <tr>
            <td>Expect Records as Array</td>
            <td><code>false</code></td>
        </tr>
        <tr>
            <td>Field Name for Content</td>
            <td><code>value</code></td>
        </tr>
    </table>
    <p>
        Input:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute attr="attr_content"&gt;
                          content of field&lt;value&gt;123&lt;/value&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        Output:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute&gt;
                        &lt;attr&gt;attr_content&lt;/attr&gt;
                        &lt;value&gt;content of field&lt;/value&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        The "Field Name for Content" property's value is "value", and the XML element has a sub-element named "value". The name of the sub-element clashes with the
        value of the "Field Name for Content" property. The value of the <code>&lt;value&gt;</code> element is replaced by the content of the
        <code>&lt;field_with_attribute&gt;</code> element, and the original content of the <code>&lt;value&gt;</code> element is lost.
    </p>
    <h3>XML Attributes and Schema Inference Example 5</h3>
    <p>
        To avoid losing any data, the XMLReader's "Field Name for Content" property needs to be given a value that does not clash with any sub-element's name
        in the input data. In this example the input data is the same as in the previous one, but the "Field Name for Content" property's value is "original_content",
        a value that does not clash with any sub-element name. No data is lost in this case.
    </p>
    <p>
        XMLReader settings:
    </p>
    <table>
        <tr>
            <th>Property Name</th>
            <th>Property Value</th>
        </tr>
        <tr>
            <td>Schema Access Strategy</td>
            <td><code>Infer Schema</code></td>
        </tr>
        <tr>
            <td>Expect Records as Array</td>
            <td><code>false</code></td>
        </tr>
        <tr>
            <td>Field Name for Content</td>
            <td><code>original_content</code></td>
        </tr>
    </table>
    <p>
        Input:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute attr="attr_content"&gt;
                          content of field&lt;value&gt;123&lt;/value&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        Output:
    </p>
    <code>
            <pre>
                &lt;record&gt;
                    &lt;field_with_attribute&gt;
                        &lt;attr&gt;attr_content&lt;/attr&gt;
                        &lt;value&gt;123&lt;/value&gt;
                        &lt;original_content&gt;content of field&lt;/original_content&gt;
                    &lt;/field_with_attribute&gt;
                &lt;/record&gt;</pre>
    </code>
    <p>
        It can be seen in the output data, that the attribute has been added to the <code>&lt;field_with_attribute&gt;</code> element as a sub-element,
        the <code>&lt;value&gt;</code> retained its value, and the original content of the <code>&lt;field_with_attribute&gt;</code> element has been added as a sub-element
        named "original_content". This is because a value was chosen for the "Field Name for Content" property that does not clash with any of
        the existing sub-elements of the input XML element (<code>&lt;field_with_attribute&gt;</code>). No data is lost.
    </p>
    <h2>Example: Array of records</h2>
    <p>
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestInferXmlSchema.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestInferXmlSchema.java
@ -93,7 +93,8 @@ public class TestInferXmlSchema {
    @Test
    public void testStringFieldWithAttributes() throws IOException {
-        final RecordSchema schema = inferSchema("src/test/resources/xml/TextNodeWithAttribute.xml", true);
+        final String contentFieldName = "contentfield";
        final RecordSchema schema = inferSchema("src/test/resources/xml/TextNodeWithAttribute.xml", contentFieldName, true);
        assertEquals(3, schema.getFieldCount());
@ -106,12 +107,16 @@ public class TestInferXmlSchema {
        final RecordSchema childSchema = ((RecordDataType) softwareDataType).getChildSchema();
        assertSame(RecordFieldType.BOOLEAN, childSchema.getDataType("favorite").get().getFieldType());
-        assertSame(RecordFieldType.STRING, childSchema.getDataType("value").get().getFieldType());
+        assertSame(RecordFieldType.STRING, childSchema.getDataType(contentFieldName).get().getFieldType());
    }
    private RecordSchema inferSchema(final String filename, final boolean ignoreWrapper) throws IOException {
        return inferSchema(filename, "contentfield", ignoreWrapper);
    }
    private RecordSchema inferSchema(final String filename, final String contentFieldName, final boolean ignoreWrapper) throws IOException {
        final File file = new File(filename);
-        final RecordSourceFactory<XmlNode> xmlSourceFactory = (var, in) ->  new XmlRecordSource(in, ignoreWrapper);
+        final RecordSourceFactory<XmlNode> xmlSourceFactory = (var, in) ->  new XmlRecordSource(in, contentFieldName, ignoreWrapper);
        final SchemaInferenceEngine<XmlNode> schemaInference = new XmlSchemaInference(timeValueInference);
        final InferSchemaAccessStrategy<XmlNode> inferStrategy = new InferSchemaAccessStrategy<>(xmlSourceFactory, schemaInference, Mockito.mock(ComponentLog.class));
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLReader.java
@ -17,8 +17,10 @@
 package org.apache.nifi.xml;
 import org.apache.nifi.components.PropertyDescriptor;
 import org.apache.nifi.reporting.InitializationException;
 import org.apache.nifi.schema.access.SchemaAccessUtils;
 import org.apache.nifi.schema.inference.SchemaInferenceUtil;
 import org.apache.nifi.util.MockFlowFile;
 import org.apache.nifi.util.TestRunner;
 import org.apache.nifi.util.TestRunners;
@ -31,43 +33,48 @@ import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import static junit.framework.TestCase.assertEquals;
 public class TestXMLReader {
    private XMLReader reader;
    private final String ATTRIBUTE_PREFIX = "attribute_prefix";
    private final String CONTENT_NAME = "content_field";
    private final String EVALUATE_IS_ARRAY = "xml.stream.is.array";
-    public TestRunner setup(String filePath) throws InitializationException, IOException {
+    private TestRunner setup(Map<PropertyDescriptor, String> xmlReaderProperties) throws InitializationException {
        TestRunner runner = TestRunners.newTestRunner(TestXMLReaderProcessor.class);
-        reader = new XMLReader();
+        XMLReader reader = new XMLReader();
        runner.addControllerService("xml_reader", reader);
        runner.setProperty(TestXMLReaderProcessor.XML_READER, "xml_reader");
-        final String outputSchemaText = new String(Files.readAllBytes(Paths.get(filePath)));
+        for (Map.Entry<PropertyDescriptor, String> entry : xmlReaderProperties.entrySet()) {
-        runner.setProperty(reader, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY);
+            runner.setProperty(reader, entry.getKey(), entry.getValue());
-        runner.setProperty(reader, SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
+        }
        runner.enableControllerService(reader);
        return runner;
    }
    @Test
-    public void testRecordFormat() throws IOException, InitializationException {
+    public void testRecordFormatDeterminedBasedOnAttribute() throws IOException, InitializationException {
-        TestRunner runner = setup("src/test/resources/xml/testschema");
+        String outputSchemaPath = "src/test/resources/xml/testschema";
        String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
-        runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_EVALUATE);
+        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_EVALUATE.getValue());
        TestRunner runner = setup(xmlReaderProperties);
-        runner.enableControllerService(reader);
+        try (InputStream is = new FileInputStream("src/test/resources/xml/people.xml")) {
-
+            runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
-        InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
+            runner.run();
-        runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
+        }
        runner.run();
        List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
        List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n"));
@ -76,16 +83,20 @@ public class TestXMLReader {
    }
    @Test
-    public void testRecordFormat2() throws IOException, InitializationException {
+    public void testRecordFormatArray() throws IOException, InitializationException {
-        TestRunner runner = setup("src/test/resources/xml/testschema");
+        String outputSchemaPath = "src/test/resources/xml/testschema";
        String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
-        runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY);
+        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY.getValue());
        TestRunner runner = setup(xmlReaderProperties);
-        runner.enableControllerService(reader);
+        try (InputStream is = new FileInputStream("src/test/resources/xml/people.xml")) {
-
+            runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
-        InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
+            runner.run();
-        runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
+        }
        runner.run();
        List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
        List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n"));
@ -94,16 +105,20 @@ public class TestXMLReader {
    }
    @Test
-    public void testRecordFormat3() throws IOException, InitializationException {
+    public void testRecordFormatNotArray() throws IOException, InitializationException {
-        TestRunner runner = setup("src/test/resources/xml/testschema");
+        String outputSchemaPath = "src/test/resources/xml/testschema";
        String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
-        runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE);
+        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
        TestRunner runner = setup(xmlReaderProperties);
-        runner.enableControllerService(reader);
+        try (InputStream is = new FileInputStream("src/test/resources/xml/person.xml")) {
-
+            runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
-        InputStream is = new FileInputStream("src/test/resources/xml/person.xml");
+            runner.run();
-        runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
+        }
        runner.run();
        List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
        List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
@ -113,16 +128,20 @@ public class TestXMLReader {
    @Test
    public void testAttributePrefix() throws IOException, InitializationException {
-        TestRunner runner = setup("src/test/resources/xml/testschema");
+        String outputSchemaPath = "src/test/resources/xml/testschema";
        String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
-        runner.setProperty(reader, XMLReader.ATTRIBUTE_PREFIX, "${" + ATTRIBUTE_PREFIX + "}");
+        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
-        runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY);
+        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
        xmlReaderProperties.put(XMLReader.ATTRIBUTE_PREFIX, "${" + ATTRIBUTE_PREFIX + "}");
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY.getValue());
        TestRunner runner = setup(xmlReaderProperties);
-        runner.enableControllerService(reader);
+        try (InputStream is = new FileInputStream("src/test/resources/xml/people.xml")) {
-
+            runner.enqueue(is, Collections.singletonMap(ATTRIBUTE_PREFIX, "ATTR_"));
-        InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
+            runner.run();
-        runner.enqueue(is, Collections.singletonMap(ATTRIBUTE_PREFIX, "ATTR_"));
+        }
        runner.run();
        List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
        List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
@ -136,16 +155,20 @@ public class TestXMLReader {
    @Test
    public void testContentField() throws IOException, InitializationException {
-        TestRunner runner = setup("src/test/resources/xml/testschema2");
+        String outputSchemaPath = "src/test/resources/xml/testschema2";
        String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
-        runner.setProperty(reader, XMLReader.CONTENT_FIELD_NAME, "${" + CONTENT_NAME + "}");
+        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
-        runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY);
+        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
        xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, "${" + CONTENT_NAME + "}");
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY.getValue());
        TestRunner runner = setup(xmlReaderProperties);
-        runner.enableControllerService(reader);
+        try (InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml")) {
-
+            runner.enqueue(is, Collections.singletonMap(CONTENT_NAME, "CONTENT"));
-        InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml");
+            runner.run();
-        runner.enqueue(is, Collections.singletonMap(CONTENT_NAME, "CONTENT"));
+        }
        runner.run();
        List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
        List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
@ -157,4 +180,103 @@ public class TestXMLReader {
        assertEquals("MapRecord[{ID=P4, NAME=MapRecord[{CONTENT=Elenora Scrivens, ATTR=attr content, INNER=inner content}], AGE=16}]", records.get(3));
        assertEquals("MapRecord[{ID=P5, NAME=MapRecord[{INNER=inner content}]}]", records.get(4));
    }
    @Test
    public void testInferSchema() throws InitializationException, IOException {
        String expectedContent = "MapRecord[{software=MapRecord[{" + CONTENT_NAME + "=Apache NiFi, favorite=true}], num=123, name=John Doe}]";
        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
        xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, CONTENT_NAME);
        TestRunner runner = setup(xmlReaderProperties);
        try (InputStream is = new FileInputStream("src/test/resources/xml/person_record.xml")) {
            runner.enqueue(is);
            runner.run();
        }
        MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
        String actualContent = out.getContent();
        assertEquals(expectedContent, actualContent);
    }
    @Test
    public void testInferSchemaContentFieldNameNotSet() throws InitializationException, IOException {
        String expectedContent = "MapRecord[{software=MapRecord[{favorite=true}], num=123, name=John Doe}]";
        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
        TestRunner runner = setup(xmlReaderProperties);
        try (InputStream is = new FileInputStream("src/test/resources/xml/person_record.xml")) {
            runner.enqueue(is);
            runner.run();
        }
        MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
        String actualContent = out.getContent();
        assertEquals(expectedContent, actualContent);
    }
    @Test
    public void testInferSchemaContentFieldNameNotSetSubElementExists() throws InitializationException, IOException {
        String expectedContent = "MapRecord[{field_with_attribute=MapRecord[{attr=attr_content, value=123}]}]";
        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
        TestRunner runner = setup(xmlReaderProperties);
        try (InputStream is = new FileInputStream("src/test/resources/xml/field_with_sub-element.xml")) {
            runner.enqueue(is);
            runner.run();
        }
        MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
        String actualContent = out.getContent();
        assertEquals(expectedContent, actualContent);
    }
    @Test
    public void testInferSchemaContentFieldNameSetSubElementExistsNameClash() throws InitializationException, IOException {
        String expectedContent = "MapRecord[{field_with_attribute=MapRecord[{attr=attr_content, value=content of field}]}]";
        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
        xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, "value");
        TestRunner runner = setup(xmlReaderProperties);
        try (InputStream is = new FileInputStream("src/test/resources/xml/field_with_sub-element.xml")) {
            runner.enqueue(is);
            runner.run();
        }
        MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
        String actualContent = out.getContent();
        assertEquals(expectedContent, actualContent);
    }
    @Test
    public void testInferSchemaContentFieldNameSetSubElementExistsNoNameClash() throws InitializationException, IOException {
        String expectedContent = "MapRecord[{field_with_attribute=MapRecord[{" +CONTENT_NAME + "=content of field, " +
                "attr=attr_content, value=123}]}]";
        Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
        xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
        xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
        xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, CONTENT_NAME);
        TestRunner runner = setup(xmlReaderProperties);
        try (InputStream is = new FileInputStream("src/test/resources/xml/field_with_sub-element.xml")) {
            runner.enqueue(is);
            runner.run();
        }
        MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
        String actualContent = out.getContent();
        assertEquals(expectedContent, actualContent);
    }
 }
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/xml/field_with_sub-element.xml
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/xml/field_with_sub-element.xml
@ -0,0 +1,4 @@
 <record>
    <field_with_attribute attr="attr_content">content of field<value>123</value>
    </field_with_attribute>
 </record>
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/xml/person_record.xml
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/xml/person_record.xml
@ -0,0 +1,5 @@
 <record>
    <num>123</num>
    <name>John Doe</name>
    <software favorite="true">Apache NiFi</software>
 </record>