mirror of
https://github.com/apache/nifi.git
synced 2025-03-01 15:09:11 +00:00
NIFI-9832: Fix disappearing XML element content when the element has attribute (#5896)
- NIFI-9832: Additional test cases for XMLReader
This commit is contained in:
parent
27e78c6f0c
commit
68c6722f76
@ -218,6 +218,7 @@
|
|||||||
<exclude>src/test/resources/syslog/syslog5424/log_mix.txt</exclude>
|
<exclude>src/test/resources/syslog/syslog5424/log_mix.txt</exclude>
|
||||||
<exclude>src/test/resources/syslog/syslog5424/log_mix_in_error.txt</exclude>
|
<exclude>src/test/resources/syslog/syslog5424/log_mix_in_error.txt</exclude>
|
||||||
<exclude>src/test/resources/text/testschema</exclude>
|
<exclude>src/test/resources/text/testschema</exclude>
|
||||||
|
<exclude>src/test/resources/xml/field_with_sub-element.xml</exclude>
|
||||||
<exclude>src/test/resources/xml/people.xml</exclude>
|
<exclude>src/test/resources/xml/people.xml</exclude>
|
||||||
<exclude>src/test/resources/xml/people2.xml</exclude>
|
<exclude>src/test/resources/xml/people2.xml</exclude>
|
||||||
<exclude>src/test/resources/xml/people3.xml</exclude>
|
<exclude>src/test/resources/xml/people3.xml</exclude>
|
||||||
@ -236,6 +237,7 @@
|
|||||||
<exclude>src/test/resources/xml/people_tag_in_characters.xml</exclude>
|
<exclude>src/test/resources/xml/people_tag_in_characters.xml</exclude>
|
||||||
<exclude>src/test/resources/xml/people_with_header_and_comments.xml</exclude>
|
<exclude>src/test/resources/xml/people_with_header_and_comments.xml</exclude>
|
||||||
<exclude>src/test/resources/xml/person.xml</exclude>
|
<exclude>src/test/resources/xml/person.xml</exclude>
|
||||||
|
<exclude>src/test/resources/xml/person_record.xml</exclude>
|
||||||
<exclude>src/test/resources/xml/testschema</exclude>
|
<exclude>src/test/resources/xml/testschema</exclude>
|
||||||
<exclude>src/test/resources/xml/testschema2</exclude>
|
<exclude>src/test/resources/xml/testschema2</exclude>
|
||||||
<exclude>src/test/resources/xml/testschema3</exclude>
|
<exclude>src/test/resources/xml/testschema3</exclude>
|
||||||
|
@ -97,7 +97,10 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
|
|||||||
.description("If tags with content (e. g. <field>content</field>) are defined as nested records in the schema, " +
|
.description("If tags with content (e. g. <field>content</field>) are defined as nested records in the schema, " +
|
||||||
"the name of the tag will be used as name for the record and the value of this property will be used as name for the field. " +
|
"the name of the tag will be used as name for the record and the value of this property will be used as name for the field. " +
|
||||||
"If tags with content shall be parsed together with attributes (e. g. <field attribute=\"123\">content</field>), " +
|
"If tags with content shall be parsed together with attributes (e. g. <field attribute=\"123\">content</field>), " +
|
||||||
"they have to be defined as records. For additional information, see the section of processor usage.")
|
"they have to be defined as records. In such a case, the name of the tag will be used as the name for the record and " +
|
||||||
|
"the value of this property will be used as the name for the field holding the original content. The name of the attribute " +
|
||||||
|
"will be used to create a new record field, the content of which will be the value of the attribute. " +
|
||||||
|
"For more information, see the 'Additional Details...' section of the XMLReader controller service's documentation.")
|
||||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||||
.required(false)
|
.required(false)
|
||||||
@ -136,7 +139,12 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected SchemaAccessStrategy getSchemaAccessStrategy(final String strategy, final SchemaRegistry schemaRegistry, final PropertyContext context) {
|
protected SchemaAccessStrategy getSchemaAccessStrategy(final String strategy, final SchemaRegistry schemaRegistry, final PropertyContext context) {
|
||||||
final RecordSourceFactory<XmlNode> sourceFactory = (variables, contentStream) -> new XmlRecordSource(contentStream, isMultipleRecords(context, variables));
|
|
||||||
|
final RecordSourceFactory<XmlNode> sourceFactory = (variables, contentStream) -> {
|
||||||
|
String contentFieldName = trim(context.getProperty(CONTENT_FIELD_NAME).evaluateAttributeExpressions(variables).getValue());
|
||||||
|
contentFieldName = (contentFieldName == null) ? "value" : contentFieldName;
|
||||||
|
return new XmlRecordSource(contentStream, contentFieldName, isMultipleRecords(context, variables));
|
||||||
|
};
|
||||||
final Supplier<SchemaInferenceEngine<XmlNode>> schemaInference = () -> new XmlSchemaInference(new TimeValueInference(dateFormat, timeFormat, timestampFormat));
|
final Supplier<SchemaInferenceEngine<XmlNode>> schemaInference = () -> new XmlSchemaInference(new TimeValueInference(dateFormat, timeFormat, timestampFormat));
|
||||||
|
|
||||||
return SchemaInferenceUtil.getSchemaAccessStrategy(strategy, context, getLogger(), sourceFactory, schemaInference,
|
return SchemaInferenceUtil.getSchemaAccessStrategy(strategy, context, getLogger(), sourceFactory, schemaInference,
|
||||||
|
@ -339,8 +339,8 @@ public class XMLRecordReader implements RecordReader {
|
|||||||
if (contentFieldName != null) {
|
if (contentFieldName != null) {
|
||||||
recordValues.put(contentFieldName, content.toString());
|
recordValues.put(contentFieldName, content.toString());
|
||||||
} else {
|
} else {
|
||||||
logger.debug("Found content for field that has to be parsed as record but property \"Field Name for Content\" is not set. " +
|
logger.debug("Found content for a field that was supposed to be named with the value of the \"Field Name for Content\" property but " +
|
||||||
"The content will not be added to the record.");
|
"the property was not set. The content was not added to the record.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new MapRecord(new SimpleRecordSchema(Collections.emptyList()), recordValues);
|
return new MapRecord(new SimpleRecordSchema(Collections.emptyList()), recordValues);
|
||||||
@ -486,10 +486,13 @@ public class XMLRecordReader implements RecordReader {
|
|||||||
if (field.isPresent()) {
|
if (field.isPresent()) {
|
||||||
Object value = parseStringForType(content.toString(), contentFieldName, field.get().getDataType());
|
Object value = parseStringForType(content.toString(), contentFieldName, field.get().getDataType());
|
||||||
recordValues.put(contentFieldName, value);
|
recordValues.put(contentFieldName, value);
|
||||||
|
} else {
|
||||||
|
logger.debug("Found content for a field that was supposed to be named with the value of the \"Field Name for Content\" property " +
|
||||||
|
"but no such field was present in the schema. The content was not added to the record.");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.debug("Found content for field that is defined as record but property \"Field Name for Content\" is not set. " +
|
logger.debug("Found content for a field that was supposed to be named with the value of the \"Field Name for Content\" property but " +
|
||||||
"The content will not be added to record.");
|
"the property was not set. The content was not added to the record.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,8 +35,10 @@ import java.util.Map;
|
|||||||
public class XmlRecordSource implements RecordSource<XmlNode> {
|
public class XmlRecordSource implements RecordSource<XmlNode> {
|
||||||
|
|
||||||
private final XMLEventReader xmlEventReader;
|
private final XMLEventReader xmlEventReader;
|
||||||
|
private final String contentFieldName;
|
||||||
|
|
||||||
public XmlRecordSource(final InputStream in, final boolean ignoreWrapper) throws IOException {
|
public XmlRecordSource(final InputStream in, final String contentFieldName, final boolean ignoreWrapper) throws IOException {
|
||||||
|
this.contentFieldName = contentFieldName;
|
||||||
try {
|
try {
|
||||||
final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
|
final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
|
||||||
|
|
||||||
@ -125,7 +127,7 @@ public class XmlRecordSource implements RecordSource<XmlNode> {
|
|||||||
} else {
|
} else {
|
||||||
final String textContent = content.toString().trim();
|
final String textContent = content.toString().trim();
|
||||||
if (!textContent.equals("")) {
|
if (!textContent.equals("")) {
|
||||||
childNodes.put("value", new XmlTextNode("value", textContent));
|
childNodes.put(contentFieldName, new XmlTextNode(contentFieldName, textContent));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new XmlContainerNode(nodeName, childNodes);
|
return new XmlContainerNode(nodeName, childNodes);
|
||||||
|
@ -286,6 +286,357 @@
|
|||||||
for tags containing attributes and content.
|
for tags containing attributes and content.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<h2>Example: Tags with Attributes and Schema Inference</h2>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
When the record's schema is not provided but inferred based on the data itself, providing a value for the "Field Name for Content" property
|
||||||
|
is especially important. (For detailed information on schema inference, see the "Schema Inference" section below.)
|
||||||
|
Let's focus on cases where an XML element (called <code><field_with_attribute></code> in the examples) has an XML attribute and some content and no sub-elements.
|
||||||
|
For the examples below, let's assume that a ConvertRecord processor is used, and it uses an XMLReader controller service and an XMLRecordSetWriter
|
||||||
|
controller service. The settings for XMLReader are provided separately for each example. The settings for XMLRecordSetWriter are common
|
||||||
|
for all the examples below. This way an XML to XML conversion is executed and comparing the input data with the output highlights
|
||||||
|
the schema inference behavior. The same behavior can be observed if a different Writer controller service is used.
|
||||||
|
XMLRecordSetWriter was chosen for these examples so that the input and the output are easily comparable.
|
||||||
|
The settings of the common XMLRecordSetWriter are the following:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Property Name</th>
|
||||||
|
<th>Property Value</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Schema Access Strategy</td>
|
||||||
|
<td><code>Inherit Record Schema</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Suppress Null Values</td>
|
||||||
|
<td><code>Never Suppress</code></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h3>XML Attributes and Schema Inference Example 1</h3>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
XMLReader settings:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Property Name</th>
|
||||||
|
<th>Property Value</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Schema Access Strategy</td>
|
||||||
|
<td><code>Infer Schema</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Expect Records as Array</td>
|
||||||
|
<td><code>false</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Field Name for Content</td>
|
||||||
|
<td>not set</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Input:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute attr="attr_content">
|
||||||
|
content of field
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>As mentioned above, the element called "field_with_attribute" has an attribute and some content but no sub-element.</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Output:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute>
|
||||||
|
<attr>attr_content</attr>
|
||||||
|
<value></value>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
In the XMLReader's settings, no value is set for the "Field Name for Content" property. In such cases the schema inference logic
|
||||||
|
adds a field named "value" to the schema. However, since "Field Name for Content" is not set, the data processing logic is instructed
|
||||||
|
not to consider the original content of the parent XML tags (<code><field_with_attribute></code> the content of which is "content of field"
|
||||||
|
in the example). So a new field named "value" appears in the schema but no value is assigned to it from the data, thus the field is empty.
|
||||||
|
The XML attribute (named "attr") is processed, a field named "attr" is added to the schema and the attribute's value ("attr_content") is assigned to it.
|
||||||
|
In a case like this, the parent field's original content is lost and a new field named "value" appears in the schema with no data assigned to it.
|
||||||
|
This is to make sure that no data is overwritten in the record if it already contains a field named "value". More on that case in Example 3 and Example 4.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h3>XML Attributes and Schema Inference Example 2</h3>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
In this example, the XMLReader's "Field Name for Content" property is filled with the value "original_content". The input data is the same as
|
||||||
|
in the previous example.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
XMLReader settings:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Property Name</th>
|
||||||
|
<th>Property Value</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Schema Access Strategy</td>
|
||||||
|
<td><code>Infer Schema</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Expect Records as Array</td>
|
||||||
|
<td><code>false</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Field Name for Content</td>
|
||||||
|
<td><code>original_content</code></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Input:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute attr="attr_content">
|
||||||
|
content of field
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Output:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute>
|
||||||
|
<attr>attr_content</attr>
|
||||||
|
<original_content>content of field</original_content>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The XMLReader's "Field Name for Content" property contains the value "original_content" (the concrete value is not important, what is important
|
||||||
|
is that a value is provided and it does not clash with the name of any sub-element in <code><field_with_attribute></code>).
|
||||||
|
This explicitly tells the XMLReader controller service to create a field named "original_content" and make the original content of
|
||||||
|
the parent XML tag the value of the field named "original_content". Adding the XML attributed named "attr" works just like in the first example.
|
||||||
|
Since the <code><field_with_attribute></code> element had no child-element with the name "original_content", no data is lost.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h3>XML Attributes and Schema Inference Example 3</h3>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
In this example, XMLReader's "Field Name for Content" property is left empty. In the input data, the <code><field_with_attribute></code> element
|
||||||
|
has some content and a sub-element named <code><value></code>.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
XMLReader settings:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Property Name</th>
|
||||||
|
<th>Property Value</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Schema Access Strategy</td>
|
||||||
|
<td><code>Infer Schema</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Expect Records as Array</td>
|
||||||
|
<td><code>false</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Field Name for Content</td>
|
||||||
|
<td>not set</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Input:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute attr="attr_content">
|
||||||
|
content of field<value>123</value>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Output:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute>
|
||||||
|
<attr>attr_content</attr>
|
||||||
|
<value>123</value>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The "Field Name for Content" property is not set, and the XML element has a sub-element named "value". The name of the sub-element clashes with the
|
||||||
|
default field name added to the schema by the Schema Inference logic (see Example 1). As seen in the output data, the input XML attribute's value
|
||||||
|
is added to the record just like in the previous examples. The value of the <code><value></code> element is retained, but the content of the
|
||||||
|
<code><field_with_attribute></code> that was outside of the sub-element, is lost.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h3>XML Attributes and Schema Inference Example 4</h3>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
In this example, XMLReader's "Field Name for Content" property is given the value "value". In the input data, the <code><field_with_attribute></code> element
|
||||||
|
has some content and a sub-element named <code><value></code>. The name of the sub-element clashes with the value of the "Field Name for Content" property.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
XMLReader settings:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Property Name</th>
|
||||||
|
<th>Property Value</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Schema Access Strategy</td>
|
||||||
|
<td><code>Infer Schema</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Expect Records as Array</td>
|
||||||
|
<td><code>false</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Field Name for Content</td>
|
||||||
|
<td><code>value</code></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Input:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute attr="attr_content">
|
||||||
|
content of field<value>123</value>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Output:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute>
|
||||||
|
<attr>attr_content</attr>
|
||||||
|
<value>content of field</value>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The "Field Name for Content" property's value is "value", and the XML element has a sub-element named "value". The name of the sub-element clashes with the
|
||||||
|
value of the "Field Name for Content" property. The value of the <code><value></code> element is replaced by the content of the
|
||||||
|
<code><field_with_attribute></code> element, and the original content of the <code><value></code> element is lost.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h3>XML Attributes and Schema Inference Example 5</h3>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
To avoid losing any data, the XMLReader's "Field Name for Content" property needs to be given a value that does not clash with any sub-element's name
|
||||||
|
in the input data. In this example the input data is the same as in the previous one, but the "Field Name for Content" property's value is "original_content",
|
||||||
|
a value that does not clash with any sub-element name. No data is lost in this case.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
XMLReader settings:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Property Name</th>
|
||||||
|
<th>Property Value</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Schema Access Strategy</td>
|
||||||
|
<td><code>Infer Schema</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Expect Records as Array</td>
|
||||||
|
<td><code>false</code></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Field Name for Content</td>
|
||||||
|
<td><code>original_content</code></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Input:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute attr="attr_content">
|
||||||
|
content of field<value>123</value>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Output:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
<record>
|
||||||
|
<field_with_attribute>
|
||||||
|
<attr>attr_content</attr>
|
||||||
|
<value>123</value>
|
||||||
|
<original_content>content of field</original_content>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record></pre>
|
||||||
|
</code>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
It can be seen in the output data, that the attribute has been added to the <code><field_with_attribute></code> element as a sub-element,
|
||||||
|
the <code><value></code> retained its value, and the original content of the <code><field_with_attribute></code> element has been added as a sub-element
|
||||||
|
named "original_content". This is because a value was chosen for the "Field Name for Content" property that does not clash with any of
|
||||||
|
the existing sub-elements of the input XML element (<code><field_with_attribute></code>). No data is lost.
|
||||||
|
</p>
|
||||||
|
|
||||||
<h2>Example: Array of records</h2>
|
<h2>Example: Array of records</h2>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
@ -93,7 +93,8 @@ public class TestInferXmlSchema {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStringFieldWithAttributes() throws IOException {
|
public void testStringFieldWithAttributes() throws IOException {
|
||||||
final RecordSchema schema = inferSchema("src/test/resources/xml/TextNodeWithAttribute.xml", true);
|
final String contentFieldName = "contentfield";
|
||||||
|
final RecordSchema schema = inferSchema("src/test/resources/xml/TextNodeWithAttribute.xml", contentFieldName, true);
|
||||||
|
|
||||||
assertEquals(3, schema.getFieldCount());
|
assertEquals(3, schema.getFieldCount());
|
||||||
|
|
||||||
@ -106,12 +107,16 @@ public class TestInferXmlSchema {
|
|||||||
|
|
||||||
final RecordSchema childSchema = ((RecordDataType) softwareDataType).getChildSchema();
|
final RecordSchema childSchema = ((RecordDataType) softwareDataType).getChildSchema();
|
||||||
assertSame(RecordFieldType.BOOLEAN, childSchema.getDataType("favorite").get().getFieldType());
|
assertSame(RecordFieldType.BOOLEAN, childSchema.getDataType("favorite").get().getFieldType());
|
||||||
assertSame(RecordFieldType.STRING, childSchema.getDataType("value").get().getFieldType());
|
assertSame(RecordFieldType.STRING, childSchema.getDataType(contentFieldName).get().getFieldType());
|
||||||
}
|
}
|
||||||
|
|
||||||
private RecordSchema inferSchema(final String filename, final boolean ignoreWrapper) throws IOException {
|
private RecordSchema inferSchema(final String filename, final boolean ignoreWrapper) throws IOException {
|
||||||
|
return inferSchema(filename, "contentfield", ignoreWrapper);
|
||||||
|
}
|
||||||
|
|
||||||
|
private RecordSchema inferSchema(final String filename, final String contentFieldName, final boolean ignoreWrapper) throws IOException {
|
||||||
final File file = new File(filename);
|
final File file = new File(filename);
|
||||||
final RecordSourceFactory<XmlNode> xmlSourceFactory = (var, in) -> new XmlRecordSource(in, ignoreWrapper);
|
final RecordSourceFactory<XmlNode> xmlSourceFactory = (var, in) -> new XmlRecordSource(in, contentFieldName, ignoreWrapper);
|
||||||
final SchemaInferenceEngine<XmlNode> schemaInference = new XmlSchemaInference(timeValueInference);
|
final SchemaInferenceEngine<XmlNode> schemaInference = new XmlSchemaInference(timeValueInference);
|
||||||
final InferSchemaAccessStrategy<XmlNode> inferStrategy = new InferSchemaAccessStrategy<>(xmlSourceFactory, schemaInference, Mockito.mock(ComponentLog.class));
|
final InferSchemaAccessStrategy<XmlNode> inferStrategy = new InferSchemaAccessStrategy<>(xmlSourceFactory, schemaInference, Mockito.mock(ComponentLog.class));
|
||||||
|
|
||||||
|
@ -17,8 +17,10 @@
|
|||||||
|
|
||||||
package org.apache.nifi.xml;
|
package org.apache.nifi.xml;
|
||||||
|
|
||||||
|
import org.apache.nifi.components.PropertyDescriptor;
|
||||||
import org.apache.nifi.reporting.InitializationException;
|
import org.apache.nifi.reporting.InitializationException;
|
||||||
import org.apache.nifi.schema.access.SchemaAccessUtils;
|
import org.apache.nifi.schema.access.SchemaAccessUtils;
|
||||||
|
import org.apache.nifi.schema.inference.SchemaInferenceUtil;
|
||||||
import org.apache.nifi.util.MockFlowFile;
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
import org.apache.nifi.util.TestRunner;
|
import org.apache.nifi.util.TestRunner;
|
||||||
import org.apache.nifi.util.TestRunners;
|
import org.apache.nifi.util.TestRunners;
|
||||||
@ -31,43 +33,48 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import static junit.framework.TestCase.assertEquals;
|
import static junit.framework.TestCase.assertEquals;
|
||||||
|
|
||||||
public class TestXMLReader {
|
public class TestXMLReader {
|
||||||
|
|
||||||
private XMLReader reader;
|
|
||||||
|
|
||||||
private final String ATTRIBUTE_PREFIX = "attribute_prefix";
|
private final String ATTRIBUTE_PREFIX = "attribute_prefix";
|
||||||
private final String CONTENT_NAME = "content_field";
|
private final String CONTENT_NAME = "content_field";
|
||||||
private final String EVALUATE_IS_ARRAY = "xml.stream.is.array";
|
private final String EVALUATE_IS_ARRAY = "xml.stream.is.array";
|
||||||
|
|
||||||
public TestRunner setup(String filePath) throws InitializationException, IOException {
|
private TestRunner setup(Map<PropertyDescriptor, String> xmlReaderProperties) throws InitializationException {
|
||||||
|
|
||||||
TestRunner runner = TestRunners.newTestRunner(TestXMLReaderProcessor.class);
|
TestRunner runner = TestRunners.newTestRunner(TestXMLReaderProcessor.class);
|
||||||
reader = new XMLReader();
|
XMLReader reader = new XMLReader();
|
||||||
|
|
||||||
runner.addControllerService("xml_reader", reader);
|
runner.addControllerService("xml_reader", reader);
|
||||||
runner.setProperty(TestXMLReaderProcessor.XML_READER, "xml_reader");
|
runner.setProperty(TestXMLReaderProcessor.XML_READER, "xml_reader");
|
||||||
|
|
||||||
final String outputSchemaText = new String(Files.readAllBytes(Paths.get(filePath)));
|
for (Map.Entry<PropertyDescriptor, String> entry : xmlReaderProperties.entrySet()) {
|
||||||
runner.setProperty(reader, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY);
|
runner.setProperty(reader, entry.getKey(), entry.getValue());
|
||||||
runner.setProperty(reader, SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
|
}
|
||||||
|
|
||||||
|
runner.enableControllerService(reader);
|
||||||
return runner;
|
return runner;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRecordFormat() throws IOException, InitializationException {
|
public void testRecordFormatDeterminedBasedOnAttribute() throws IOException, InitializationException {
|
||||||
TestRunner runner = setup("src/test/resources/xml/testschema");
|
String outputSchemaPath = "src/test/resources/xml/testschema";
|
||||||
|
String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
|
||||||
|
|
||||||
runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_EVALUATE);
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_EVALUATE.getValue());
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
runner.enableControllerService(reader);
|
try (InputStream is = new FileInputStream("src/test/resources/xml/people.xml")) {
|
||||||
|
runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
|
||||||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
runner.run();
|
||||||
runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
|
}
|
||||||
runner.run();
|
|
||||||
|
|
||||||
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
||||||
List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n"));
|
List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n"));
|
||||||
@ -76,16 +83,20 @@ public class TestXMLReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRecordFormat2() throws IOException, InitializationException {
|
public void testRecordFormatArray() throws IOException, InitializationException {
|
||||||
TestRunner runner = setup("src/test/resources/xml/testschema");
|
String outputSchemaPath = "src/test/resources/xml/testschema";
|
||||||
|
String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
|
||||||
|
|
||||||
runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY);
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY.getValue());
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
runner.enableControllerService(reader);
|
try (InputStream is = new FileInputStream("src/test/resources/xml/people.xml")) {
|
||||||
|
runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
|
||||||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
runner.run();
|
||||||
runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
|
}
|
||||||
runner.run();
|
|
||||||
|
|
||||||
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
||||||
List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n"));
|
List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n"));
|
||||||
@ -94,16 +105,20 @@ public class TestXMLReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRecordFormat3() throws IOException, InitializationException {
|
public void testRecordFormatNotArray() throws IOException, InitializationException {
|
||||||
TestRunner runner = setup("src/test/resources/xml/testschema");
|
String outputSchemaPath = "src/test/resources/xml/testschema";
|
||||||
|
String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
|
||||||
|
|
||||||
runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE);
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
runner.enableControllerService(reader);
|
try (InputStream is = new FileInputStream("src/test/resources/xml/person.xml")) {
|
||||||
|
runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
|
||||||
InputStream is = new FileInputStream("src/test/resources/xml/person.xml");
|
runner.run();
|
||||||
runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true"));
|
}
|
||||||
runner.run();
|
|
||||||
|
|
||||||
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
||||||
List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
|
List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
|
||||||
@ -113,16 +128,20 @@ public class TestXMLReader {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAttributePrefix() throws IOException, InitializationException {
|
public void testAttributePrefix() throws IOException, InitializationException {
|
||||||
TestRunner runner = setup("src/test/resources/xml/testschema");
|
String outputSchemaPath = "src/test/resources/xml/testschema";
|
||||||
|
String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
|
||||||
|
|
||||||
runner.setProperty(reader, XMLReader.ATTRIBUTE_PREFIX, "${" + ATTRIBUTE_PREFIX + "}");
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY);
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
|
||||||
|
xmlReaderProperties.put(XMLReader.ATTRIBUTE_PREFIX, "${" + ATTRIBUTE_PREFIX + "}");
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY.getValue());
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
runner.enableControllerService(reader);
|
try (InputStream is = new FileInputStream("src/test/resources/xml/people.xml")) {
|
||||||
|
runner.enqueue(is, Collections.singletonMap(ATTRIBUTE_PREFIX, "ATTR_"));
|
||||||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
runner.run();
|
||||||
runner.enqueue(is, Collections.singletonMap(ATTRIBUTE_PREFIX, "ATTR_"));
|
}
|
||||||
runner.run();
|
|
||||||
|
|
||||||
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
||||||
List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
|
List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
|
||||||
@ -136,16 +155,20 @@ public class TestXMLReader {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testContentField() throws IOException, InitializationException {
|
public void testContentField() throws IOException, InitializationException {
|
||||||
TestRunner runner = setup("src/test/resources/xml/testschema2");
|
String outputSchemaPath = "src/test/resources/xml/testschema2";
|
||||||
|
String outputSchemaText = new String(Files.readAllBytes(Paths.get(outputSchemaPath)));
|
||||||
|
|
||||||
runner.setProperty(reader, XMLReader.CONTENT_FIELD_NAME, "${" + CONTENT_NAME + "}");
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
runner.setProperty(reader, XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY);
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText);
|
||||||
|
xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, "${" + CONTENT_NAME + "}");
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_ARRAY.getValue());
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
runner.enableControllerService(reader);
|
try (InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml")) {
|
||||||
|
runner.enqueue(is, Collections.singletonMap(CONTENT_NAME, "CONTENT"));
|
||||||
InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml");
|
runner.run();
|
||||||
runner.enqueue(is, Collections.singletonMap(CONTENT_NAME, "CONTENT"));
|
}
|
||||||
runner.run();
|
|
||||||
|
|
||||||
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS);
|
||||||
List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
|
List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n"));
|
||||||
@ -157,4 +180,103 @@ public class TestXMLReader {
|
|||||||
assertEquals("MapRecord[{ID=P4, NAME=MapRecord[{CONTENT=Elenora Scrivens, ATTR=attr content, INNER=inner content}], AGE=16}]", records.get(3));
|
assertEquals("MapRecord[{ID=P4, NAME=MapRecord[{CONTENT=Elenora Scrivens, ATTR=attr content, INNER=inner content}], AGE=16}]", records.get(3));
|
||||||
assertEquals("MapRecord[{ID=P5, NAME=MapRecord[{INNER=inner content}]}]", records.get(4));
|
assertEquals("MapRecord[{ID=P5, NAME=MapRecord[{INNER=inner content}]}]", records.get(4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInferSchema() throws InitializationException, IOException {
|
||||||
|
String expectedContent = "MapRecord[{software=MapRecord[{" + CONTENT_NAME + "=Apache NiFi, favorite=true}], num=123, name=John Doe}]";
|
||||||
|
|
||||||
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, CONTENT_NAME);
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
|
try (InputStream is = new FileInputStream("src/test/resources/xml/person_record.xml")) {
|
||||||
|
runner.enqueue(is);
|
||||||
|
runner.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
|
||||||
|
String actualContent = out.getContent();
|
||||||
|
assertEquals(expectedContent, actualContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInferSchemaContentFieldNameNotSet() throws InitializationException, IOException {
|
||||||
|
String expectedContent = "MapRecord[{software=MapRecord[{favorite=true}], num=123, name=John Doe}]";
|
||||||
|
|
||||||
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
|
try (InputStream is = new FileInputStream("src/test/resources/xml/person_record.xml")) {
|
||||||
|
runner.enqueue(is);
|
||||||
|
runner.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
|
||||||
|
String actualContent = out.getContent();
|
||||||
|
assertEquals(expectedContent, actualContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInferSchemaContentFieldNameNotSetSubElementExists() throws InitializationException, IOException {
|
||||||
|
String expectedContent = "MapRecord[{field_with_attribute=MapRecord[{attr=attr_content, value=123}]}]";
|
||||||
|
|
||||||
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
|
try (InputStream is = new FileInputStream("src/test/resources/xml/field_with_sub-element.xml")) {
|
||||||
|
runner.enqueue(is);
|
||||||
|
runner.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
|
||||||
|
String actualContent = out.getContent();
|
||||||
|
assertEquals(expectedContent, actualContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInferSchemaContentFieldNameSetSubElementExistsNameClash() throws InitializationException, IOException {
|
||||||
|
String expectedContent = "MapRecord[{field_with_attribute=MapRecord[{attr=attr_content, value=content of field}]}]";
|
||||||
|
|
||||||
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, "value");
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
|
try (InputStream is = new FileInputStream("src/test/resources/xml/field_with_sub-element.xml")) {
|
||||||
|
runner.enqueue(is);
|
||||||
|
runner.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
|
||||||
|
String actualContent = out.getContent();
|
||||||
|
assertEquals(expectedContent, actualContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInferSchemaContentFieldNameSetSubElementExistsNoNameClash() throws InitializationException, IOException {
|
||||||
|
String expectedContent = "MapRecord[{field_with_attribute=MapRecord[{" +CONTENT_NAME + "=content of field, " +
|
||||||
|
"attr=attr_content, value=123}]}]";
|
||||||
|
|
||||||
|
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||||
|
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
|
||||||
|
xmlReaderProperties.put(XMLReader.CONTENT_FIELD_NAME, CONTENT_NAME);
|
||||||
|
TestRunner runner = setup(xmlReaderProperties);
|
||||||
|
|
||||||
|
try (InputStream is = new FileInputStream("src/test/resources/xml/field_with_sub-element.xml")) {
|
||||||
|
runner.enqueue(is);
|
||||||
|
runner.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
|
||||||
|
String actualContent = out.getContent();
|
||||||
|
assertEquals(expectedContent, actualContent);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
<record>
|
||||||
|
<field_with_attribute attr="attr_content">content of field<value>123</value>
|
||||||
|
</field_with_attribute>
|
||||||
|
</record>
|
@ -0,0 +1,5 @@
|
|||||||
|
<record>
|
||||||
|
<num>123</num>
|
||||||
|
<name>John Doe</name>
|
||||||
|
<software favorite="true">Apache NiFi</software>
|
||||||
|
</record>
|
Loading…
x
Reference in New Issue
Block a user