mirror of https://github.com/apache/nifi.git
NIFI-9918: 'Parse XML Attributes' property added to XMLReader
NIFI-9918: Fixed review findings NIFI-9918: Example added to XMLReader's additionalDetails.html on the new property. Minor documentation fixes. Signed-off-by: Nathan Gough <thenatog@gmail.com> This closes #5964.
This commit is contained in:
parent
6ae1590aef
commit
6ee3d32ca3
|
@ -29,6 +29,7 @@ import org.apache.nifi.logging.ComponentLog;
|
|||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.schema.access.SchemaAccessStrategy;
|
||||
import org.apache.nifi.schema.access.SchemaAccessUtils;
|
||||
import org.apache.nifi.schema.access.SchemaNotFoundException;
|
||||
import org.apache.nifi.schema.inference.SchemaInferenceEngine;
|
||||
import org.apache.nifi.schema.inference.RecordSourceFactory;
|
||||
|
@ -106,12 +107,27 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
|
|||
.required(false)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor PARSE_XML_ATTRIBUTES = new PropertyDescriptor.Builder()
|
||||
.name("parse_xml_attributes")
|
||||
.displayName("Parse XML Attributes")
|
||||
.description("When 'Schema Access Strategy' is 'Infer Schema' and this property is 'true' then XML attributes are parsed and " +
|
||||
"added to the record as new fields. When the schema is inferred but this property is 'false', " +
|
||||
"XML attributes and their values are ignored.")
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("true")
|
||||
.required(false)
|
||||
.dependsOn(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, INFER_SCHEMA)
|
||||
.build();
|
||||
|
||||
private volatile boolean parseXmlAttributes;
|
||||
private volatile String dateFormat;
|
||||
private volatile String timeFormat;
|
||||
private volatile String timestampFormat;
|
||||
|
||||
@OnEnabled
|
||||
public void onEnabled(final ConfigurationContext context) {
|
||||
this.parseXmlAttributes = context.getProperty(PARSE_XML_ATTRIBUTES).asBoolean();
|
||||
this.dateFormat = context.getProperty(DateTimeUtils.DATE_FORMAT).getValue();
|
||||
this.timeFormat = context.getProperty(DateTimeUtils.TIME_FORMAT).getValue();
|
||||
this.timestampFormat = context.getProperty(DateTimeUtils.TIMESTAMP_FORMAT).getValue();
|
||||
|
@ -120,6 +136,7 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
|
|||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
final List<PropertyDescriptor> properties = new ArrayList<>(super.getSupportedPropertyDescriptors());
|
||||
properties.add(PARSE_XML_ATTRIBUTES);
|
||||
properties.add(SchemaInferenceUtil.SCHEMA_CACHE);
|
||||
properties.add(RECORD_FORMAT);
|
||||
properties.add(ATTRIBUTE_PREFIX);
|
||||
|
@ -143,7 +160,7 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
|
|||
final RecordSourceFactory<XmlNode> sourceFactory = (variables, contentStream) -> {
|
||||
String contentFieldName = trim(context.getProperty(CONTENT_FIELD_NAME).evaluateAttributeExpressions(variables).getValue());
|
||||
contentFieldName = (contentFieldName == null) ? "value" : contentFieldName;
|
||||
return new XmlRecordSource(contentStream, contentFieldName, isMultipleRecords(context, variables));
|
||||
return new XmlRecordSource(contentStream, contentFieldName, isMultipleRecords(context, variables), parseXmlAttributes);
|
||||
};
|
||||
final Supplier<SchemaInferenceEngine<XmlNode>> schemaInference = () -> new XmlSchemaInference(new TimeValueInference(dateFormat, timeFormat, timestampFormat));
|
||||
|
||||
|
@ -179,7 +196,7 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact
|
|||
final String contentFieldName = trim(context.getProperty(CONTENT_FIELD_NAME).evaluateAttributeExpressions(variables).getValue());
|
||||
final boolean isArray = isMultipleRecords(context, variables);
|
||||
|
||||
return new XMLRecordReader(in, schema, isArray, attributePrefix, contentFieldName, dateFormat, timeFormat, timestampFormat, logger);
|
||||
return new XMLRecordReader(in, schema, isArray, parseXmlAttributes, attributePrefix, contentFieldName, dateFormat, timeFormat, timestampFormat, logger);
|
||||
}
|
||||
|
||||
private String trim(final String value) {
|
||||
|
|
|
@ -57,6 +57,7 @@ public class XMLRecordReader implements RecordReader {
|
|||
|
||||
private final ComponentLog logger;
|
||||
private final RecordSchema schema;
|
||||
private final boolean parseXmlAttributes;
|
||||
private final String attributePrefix;
|
||||
private final String contentFieldName;
|
||||
|
||||
|
@ -68,9 +69,11 @@ public class XMLRecordReader implements RecordReader {
|
|||
private final Supplier<DateFormat> LAZY_TIME_FORMAT;
|
||||
private final Supplier<DateFormat> LAZY_TIMESTAMP_FORMAT;
|
||||
|
||||
public XMLRecordReader(InputStream in, RecordSchema schema, boolean isArray, String attributePrefix, String contentFieldName,
|
||||
public XMLRecordReader(final InputStream in, final RecordSchema schema, final boolean isArray,
|
||||
final boolean parseXmlAttributes, final String attributePrefix, final String contentFieldName,
|
||||
final String dateFormat, final String timeFormat, final String timestampFormat, final ComponentLog logger) throws MalformedRecordException {
|
||||
this.schema = schema;
|
||||
this.parseXmlAttributes = parseXmlAttributes;
|
||||
this.attributePrefix = attributePrefix;
|
||||
this.contentFieldName = contentFieldName;
|
||||
this.logger = logger;
|
||||
|
@ -248,23 +251,10 @@ public class XMLRecordReader implements RecordReader {
|
|||
}
|
||||
|
||||
private Object parseUnknownField(StartElement startElement, boolean dropUnknown, RecordSchema schema) throws XMLStreamException {
|
||||
// parse attributes
|
||||
final Map<String, Object> recordValues = new HashMap<>();
|
||||
final Iterator iterator = startElement.getAttributes();
|
||||
while (iterator.hasNext()) {
|
||||
final Attribute attribute = (Attribute) iterator.next();
|
||||
final String attributeName = attribute.getName().toString();
|
||||
|
||||
if (dropUnknown) {
|
||||
if (schema != null) {
|
||||
final Optional<RecordField> field = schema.getField(attributeName);
|
||||
if (field.isPresent()){
|
||||
recordValues.put(attributePrefix == null ? attributeName : attributePrefix + attributeName, attribute.getValue());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
recordValues.put(attributePrefix == null ? attributeName : attributePrefix + attributeName, attribute.getValue());
|
||||
}
|
||||
if (parseXmlAttributes) {
|
||||
parseAttributesForUnknownField(startElement, schema, dropUnknown, recordValues);
|
||||
}
|
||||
|
||||
// parse fields
|
||||
|
@ -351,54 +341,33 @@ public class XMLRecordReader implements RecordReader {
|
|||
}
|
||||
}
|
||||
|
||||
private Record parseRecord(StartElement startElement, RecordSchema schema, boolean coerceTypes, boolean dropUnknown) throws XMLStreamException, MalformedRecordException {
|
||||
final Map<String, Object> recordValues = new HashMap<>();
|
||||
|
||||
// parse attributes
|
||||
private void parseAttributesForUnknownField(StartElement startElement, RecordSchema schema, boolean dropUnknown, Map<String, Object> recordValues) {
|
||||
final Iterator iterator = startElement.getAttributes();
|
||||
while (iterator.hasNext()) {
|
||||
final Attribute attribute = (Attribute) iterator.next();
|
||||
final String attributeName = attribute.getName().toString();
|
||||
|
||||
final String targetFieldName = attributePrefix == null ? attributeName : attributePrefix + attributeName;
|
||||
final String fieldName = ((attributePrefix == null) ? attributeName : (attributePrefix + attributeName));
|
||||
|
||||
if (dropUnknown) {
|
||||
final Optional<RecordField> field = schema.getField(attributeName);
|
||||
if (field.isPresent()){
|
||||
|
||||
// dropUnknown == true && coerceTypes == true
|
||||
if (coerceTypes) {
|
||||
final Object value;
|
||||
final DataType dataType = field.get().getDataType();
|
||||
if ((value = parseStringForType(attribute.getValue(), attributeName, dataType)) != null) {
|
||||
recordValues.put(targetFieldName, value);
|
||||
}
|
||||
|
||||
// dropUnknown == true && coerceTypes == false
|
||||
} else {
|
||||
recordValues.put(targetFieldName, attribute.getValue());
|
||||
if (schema != null) {
|
||||
final Optional<RecordField> field = schema.getField(attributeName);
|
||||
if (field.isPresent()){
|
||||
recordValues.put(fieldName, attribute.getValue());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
// dropUnknown == false && coerceTypes == true
|
||||
if (coerceTypes) {
|
||||
final Object value;
|
||||
final Optional<RecordField> field = schema.getField(attributeName);
|
||||
if (field.isPresent()){
|
||||
if ((value = parseStringForType(attribute.getValue(), attributeName, field.get().getDataType())) != null) {
|
||||
recordValues.put(targetFieldName, value);
|
||||
}
|
||||
} else {
|
||||
recordValues.put(targetFieldName, attribute.getValue());
|
||||
}
|
||||
|
||||
// dropUnknown == false && coerceTypes == false
|
||||
} else {
|
||||
recordValues.put(targetFieldName, attribute.getValue());
|
||||
}
|
||||
recordValues.put(fieldName, attribute.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Record parseRecord(StartElement startElement, RecordSchema schema, boolean coerceTypes, boolean dropUnknown) throws XMLStreamException, MalformedRecordException {
|
||||
final Map<String, Object> recordValues = new HashMap<>();
|
||||
|
||||
// parse attributes
|
||||
if (parseXmlAttributes) {
|
||||
parseAttributesForRecord(startElement, schema, coerceTypes, dropUnknown, recordValues);
|
||||
}
|
||||
|
||||
// parse fields
|
||||
StringBuilder content = new StringBuilder();
|
||||
|
@ -506,6 +475,53 @@ public class XMLRecordReader implements RecordReader {
|
|||
}
|
||||
}
|
||||
|
||||
private void parseAttributesForRecord(StartElement startElement, RecordSchema schema, boolean coerceTypes, boolean dropUnknown, Map<String, Object> recordValues) {
|
||||
final Iterator iterator = startElement.getAttributes();
|
||||
while (iterator.hasNext()) {
|
||||
final Attribute attribute = (Attribute) iterator.next();
|
||||
final String attributeName = attribute.getName().toString();
|
||||
|
||||
final String targetFieldName = attributePrefix == null ? attributeName : attributePrefix + attributeName;
|
||||
|
||||
if (dropUnknown) {
|
||||
final Optional<RecordField> field = schema.getField(attributeName);
|
||||
if (field.isPresent()){
|
||||
|
||||
// dropUnknown == true && coerceTypes == true
|
||||
if (coerceTypes) {
|
||||
final Object value;
|
||||
final DataType dataType = field.get().getDataType();
|
||||
if ((value = parseStringForType(attribute.getValue(), attributeName, dataType)) != null) {
|
||||
recordValues.put(targetFieldName, value);
|
||||
}
|
||||
|
||||
// dropUnknown == true && coerceTypes == false
|
||||
} else {
|
||||
recordValues.put(targetFieldName, attribute.getValue());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
// dropUnknown == false && coerceTypes == true
|
||||
if (coerceTypes) {
|
||||
final Object value;
|
||||
final Optional<RecordField> field = schema.getField(attributeName);
|
||||
if (field.isPresent()){
|
||||
if ((value = parseStringForType(attribute.getValue(), attributeName, field.get().getDataType())) != null) {
|
||||
recordValues.put(targetFieldName, value);
|
||||
}
|
||||
} else {
|
||||
recordValues.put(targetFieldName, attribute.getValue());
|
||||
}
|
||||
|
||||
// dropUnknown == false && coerceTypes == false
|
||||
} else {
|
||||
recordValues.put(targetFieldName, attribute.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void putUnknownTypeInMap(Map<String, Object> values, String fieldName, Object fieldValue) {
|
||||
final Object oldValues = values.get(fieldName);
|
||||
|
||||
|
|
|
@ -39,9 +39,11 @@ public class XmlRecordSource implements RecordSource<XmlNode> {
|
|||
|
||||
private final XMLEventReader xmlEventReader;
|
||||
private final String contentFieldName;
|
||||
private final boolean parseXmlAttributes;
|
||||
|
||||
public XmlRecordSource(final InputStream in, final String contentFieldName, final boolean ignoreWrapper) throws IOException {
|
||||
public XmlRecordSource(final InputStream in, final String contentFieldName, final boolean ignoreWrapper, final boolean parseXmlAttributes) throws IOException {
|
||||
this.contentFieldName = contentFieldName;
|
||||
this.parseXmlAttributes = parseXmlAttributes;
|
||||
try {
|
||||
final XMLEventReaderProvider provider = new StandardXMLEventReaderProvider();
|
||||
xmlEventReader = provider.getEventReader(new StreamSource(in));
|
||||
|
@ -75,11 +77,8 @@ public class XmlRecordSource implements RecordSource<XmlNode> {
|
|||
final StringBuilder content = new StringBuilder();
|
||||
final Map<String, XmlNode> childNodes = new LinkedHashMap<>();
|
||||
|
||||
final Iterator<?> attributeIterator = startElement.getAttributes();
|
||||
while (attributeIterator.hasNext()) {
|
||||
final Attribute attribute = (Attribute) attributeIterator.next();
|
||||
final String attributeName = attribute.getName().getLocalPart();
|
||||
childNodes.put(attributeName, new XmlTextNode(attributeName, attribute.getValue()));
|
||||
if (parseXmlAttributes) {
|
||||
addXmlAttributesToChildNodes(startElement, childNodes);
|
||||
}
|
||||
|
||||
while (xmlEventReader.hasNext()) {
|
||||
|
@ -144,4 +143,13 @@ public class XmlRecordSource implements RecordSource<XmlNode> {
|
|||
|
||||
return null;
|
||||
}
|
||||
|
||||
private void addXmlAttributesToChildNodes(StartElement startElement, Map<String, XmlNode> childNodes) {
|
||||
final Iterator<?> attributeIterator = startElement.getAttributes();
|
||||
while (attributeIterator.hasNext()) {
|
||||
final Attribute attribute = (Attribute) attributeIterator.next();
|
||||
final String attributeName = attribute.getName().getLocalPart();
|
||||
childNodes.put(attributeName, new XmlTextNode(attributeName, attribute.getValue()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,8 +36,7 @@
|
|||
<record>
|
||||
<field1>content</field1>
|
||||
<field2>content</field2>
|
||||
</record>
|
||||
</pre>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -56,8 +55,7 @@
|
|||
<field1>content</field1>
|
||||
<field2>content</field2>
|
||||
</record>
|
||||
</root>
|
||||
</pre>
|
||||
</root></pre>
|
||||
</code>
|
||||
|
||||
<h2>Example: Simple Fields</h2>
|
||||
|
@ -73,8 +71,7 @@
|
|||
<record>
|
||||
<simple_field>content</simple_field>
|
||||
</record>
|
||||
</root>
|
||||
</pre>
|
||||
</root></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -91,8 +88,7 @@
|
|||
"fields": [
|
||||
{ "name": "simple_field", "type": "string" }
|
||||
]
|
||||
}
|
||||
</pre>
|
||||
}</pre>
|
||||
</code>
|
||||
|
||||
<h2>Example: Arrays with Simple Fields</h2>
|
||||
|
@ -109,8 +105,7 @@
|
|||
<array_field>content</array_field>
|
||||
<array_field>content</array_field>
|
||||
<simple_field>content</simple_field>
|
||||
</record>
|
||||
</pre>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -129,8 +124,7 @@
|
|||
},
|
||||
{ "name": "simple_field", "type": "string" }
|
||||
]
|
||||
}
|
||||
</pre>
|
||||
}</pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -151,8 +145,7 @@
|
|||
<pre>
|
||||
<record>
|
||||
<field_with_attribute attr="attr_content">content of field</field_with_attribute>
|
||||
</record>
|
||||
</pre>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -207,8 +200,7 @@
|
|||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
</pre>
|
||||
}</pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -225,8 +217,7 @@
|
|||
RecordField "prefix_attr" = "attr_content",
|
||||
RecordField "field_name_for_content" = "content of field"
|
||||
)
|
||||
)
|
||||
</pre>
|
||||
)</pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -235,6 +226,11 @@
|
|||
the property "Field Name for Content" is set.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
It is possible that the schema is not provided explicitly, but schema inference is used.
|
||||
For details on XML attributes and schema inference, see "Example: Tags with Attributes and Schema Inference" below.
|
||||
</p>
|
||||
|
||||
<h2>Example: Tags within tags</h2>
|
||||
|
||||
<p>
|
||||
|
@ -248,8 +244,7 @@
|
|||
<embedded_field>embedded content</embedded_field>
|
||||
<another_embedded_field>another embedded content</another_embedded_field>
|
||||
</field_with_embedded_fields>
|
||||
</record>
|
||||
</pre>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -277,8 +272,7 @@
|
|||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
</pre>
|
||||
}</pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -317,6 +311,11 @@
|
|||
|
||||
<h3>XML Attributes and Schema Inference Example 1</h3>
|
||||
|
||||
<p>
|
||||
The simplest case is when XML attributes are ignored completely during schema inference. To achieve this, the "Parse XML Attributes" property in
|
||||
XMLReader is set to "false".
|
||||
</p>
|
||||
|
||||
<p>
|
||||
XMLReader settings:
|
||||
</p>
|
||||
|
@ -330,6 +329,78 @@
|
|||
<td>Schema Access Strategy</td>
|
||||
<td><code>Infer Schema</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Parse XML Attributes</td>
|
||||
<td><code>false</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Expect Records as Array</td>
|
||||
<td><code>false</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Field Name for Content</td>
|
||||
<td>not set</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
Input:
|
||||
</p>
|
||||
|
||||
<code>
|
||||
<pre>
|
||||
<record>
|
||||
<field_with_attribute attr="attr_content">
|
||||
content of field
|
||||
</field_with_attribute>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
Output:
|
||||
</p>
|
||||
|
||||
<code>
|
||||
<pre>
|
||||
<record>
|
||||
<field_with_attribute>
|
||||
content of field
|
||||
</field_with_attribute>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
If "Parse XML Attributes" is "false", the XML attribute is not parsed. Its name does not appear in the inferred schema and its value
|
||||
is ignored. The reader behaves as if the XML attribute was not there.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Important note: "Field Name for Content" was not set in this example. This could lead to data loss if "field_with_attribute" had child elements,
|
||||
similarly to what is described in "XML Attributes and Schema Inference Example 2" and
|
||||
"XML Attributes and Schema Inference Example 4". To avoid that, "Field Name for Content" needs to be assigned a value that is
|
||||
different from any existing XML tags in the data, like in "XML Attributes and Schema Inference Example 6".
|
||||
|
||||
</p>
|
||||
|
||||
<h3>XML Attributes and Schema Inference Example 2</h3>
|
||||
|
||||
<p>
|
||||
XMLReader settings:
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Property Name</th>
|
||||
<th>Property Value</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Schema Access Strategy</td>
|
||||
<td><code>Infer Schema</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Parse XML Attributes</td>
|
||||
<td><code>true</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Expect Records as Array</td>
|
||||
<td><code>false</code></td>
|
||||
|
@ -376,10 +447,10 @@
|
|||
in the example). So a new field named "value" appears in the schema but no value is assigned to it from the data, thus the field is empty.
|
||||
The XML attribute (named "attr") is processed, a field named "attr" is added to the schema and the attribute's value ("attr_content") is assigned to it.
|
||||
In a case like this, the parent field's original content is lost and a new field named "value" appears in the schema with no data assigned to it.
|
||||
This is to make sure that no data is overwritten in the record if it already contains a field named "value". More on that case in Example 3 and Example 4.
|
||||
This is to make sure that no data is overwritten in the record if it already contains a field named "value". More on that case in Example 4 and Example 5.
|
||||
</p>
|
||||
|
||||
<h3>XML Attributes and Schema Inference Example 2</h3>
|
||||
<h3>XML Attributes and Schema Inference Example 3</h3>
|
||||
|
||||
<p>
|
||||
In this example, the XMLReader's "Field Name for Content" property is filled with the value "original_content". The input data is the same as
|
||||
|
@ -399,6 +470,10 @@
|
|||
<td>Schema Access Strategy</td>
|
||||
<td><code>Infer Schema</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Parse XML Attributes</td>
|
||||
<td><code>true</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Expect Records as Array</td>
|
||||
<td><code>false</code></td>
|
||||
|
@ -444,7 +519,7 @@
|
|||
Since the <code><field_with_attribute></code> element had no child-element with the name "original_content", no data is lost.
|
||||
</p>
|
||||
|
||||
<h3>XML Attributes and Schema Inference Example 3</h3>
|
||||
<h3>XML Attributes and Schema Inference Example 4</h3>
|
||||
|
||||
<p>
|
||||
In this example, XMLReader's "Field Name for Content" property is left empty. In the input data, the <code><field_with_attribute></code> element
|
||||
|
@ -464,6 +539,10 @@
|
|||
<td>Schema Access Strategy</td>
|
||||
<td><code>Infer Schema</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Parse XML Attributes</td>
|
||||
<td><code>true</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Expect Records as Array</td>
|
||||
<td><code>false</code></td>
|
||||
|
@ -503,16 +582,16 @@
|
|||
|
||||
<p>
|
||||
The "Field Name for Content" property is not set, and the XML element has a sub-element named "value". The name of the sub-element clashes with the
|
||||
default field name added to the schema by the Schema Inference logic (see Example 1). As seen in the output data, the input XML attribute's value
|
||||
default field name added to the schema by the Schema Inference logic (see Example 2). As seen in the output data, the input XML attribute's value
|
||||
is added to the record just like in the previous examples. The value of the <code><value></code> element is retained, but the content of the
|
||||
<code><field_with_attribute></code> that was outside of the sub-element, is lost.
|
||||
</p>
|
||||
|
||||
<h3>XML Attributes and Schema Inference Example 4</h3>
|
||||
<h3>XML Attributes and Schema Inference Example 5</h3>
|
||||
|
||||
<p>
|
||||
In this example, XMLReader's "Field Name for Content" property is given the value "value". In the input data, the <code><field_with_attribute></code> element
|
||||
has some content and a sub-element named <code><value></code>. The name of the sub-element clashes with the value of the "Field Name for Content" property.
|
||||
has some content and a sub-element named <code><value></code>. The name of the sub-element clashes with the value of the "Field Name for Content" property.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
@ -528,6 +607,10 @@
|
|||
<td>Schema Access Strategy</td>
|
||||
<td><code>Infer Schema</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Parse XML Attributes</td>
|
||||
<td><code>true</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Expect Records as Array</td>
|
||||
<td><code>false</code></td>
|
||||
|
@ -571,7 +654,7 @@
|
|||
<code><field_with_attribute></code> element, and the original content of the <code><value></code> element is lost.
|
||||
</p>
|
||||
|
||||
<h3>XML Attributes and Schema Inference Example 5</h3>
|
||||
<h3>XML Attributes and Schema Inference Example 6</h3>
|
||||
|
||||
<p>
|
||||
To avoid losing any data, the XMLReader's "Field Name for Content" property needs to be given a value that does not clash with any sub-element's name
|
||||
|
@ -592,6 +675,10 @@
|
|||
<td>Schema Access Strategy</td>
|
||||
<td><code>Infer Schema</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Parse XML Attributes</td>
|
||||
<td><code>true</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Expect Records as Array</td>
|
||||
<td><code>false</code></td>
|
||||
|
@ -656,8 +743,7 @@
|
|||
<embedded_field>embedded content 2</embedded_field>
|
||||
<another_embedded_field>another embedded content 2</another_embedded_field>
|
||||
</array_field>
|
||||
</record>
|
||||
</pre>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -686,8 +772,7 @@
|
|||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
</pre>
|
||||
}</pre>
|
||||
</code>
|
||||
|
||||
<h2>Example: Array in record</h2>
|
||||
|
@ -704,8 +789,7 @@
|
|||
<element>content 2</element>
|
||||
</field_enclosing_array>
|
||||
<field_without_array> content 3</field_without_array>
|
||||
</record>
|
||||
</pre>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -737,8 +821,7 @@
|
|||
},
|
||||
{ "name": "field_without_array", "type": "string" }
|
||||
]
|
||||
}
|
||||
</pre>
|
||||
}</pre>
|
||||
</code>
|
||||
|
||||
|
||||
|
@ -757,8 +840,7 @@
|
|||
...
|
||||
</map_field>
|
||||
<simple_field>content</simple_field>
|
||||
</record>
|
||||
</pre>
|
||||
</record></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
|
@ -777,8 +859,7 @@
|
|||
},
|
||||
{ "name": "simple_field", "type": "string" }
|
||||
]
|
||||
}
|
||||
</pre>
|
||||
}</pre>
|
||||
</code>
|
||||
|
||||
|
||||
|
@ -794,21 +875,22 @@
|
|||
<p>
|
||||
A common concern when inferring schemas is how to handle the condition of two values that have different types. For example, consider a FlowFile with the following two records:
|
||||
</p>
|
||||
<code><pre>
|
||||
<root>
|
||||
<record>
|
||||
<name>John</name>
|
||||
<age>8</age>
|
||||
<values>N/A</values>
|
||||
</record>
|
||||
<record>
|
||||
<name>Jane</name>
|
||||
<age>Ten</age>
|
||||
<values>8</values>
|
||||
<values>Ten</values>
|
||||
</record>
|
||||
</root>
|
||||
</pre></code>
|
||||
<code>
|
||||
<pre>
|
||||
<root>
|
||||
<record>
|
||||
<name>John</name>
|
||||
<age>8</age>
|
||||
<values>N/A</values>
|
||||
</record>
|
||||
<record>
|
||||
<name>Jane</name>
|
||||
<age>Ten</age>
|
||||
<values>8</values>
|
||||
<values>Ten</values>
|
||||
</record>
|
||||
</root></pre>
|
||||
</code>
|
||||
|
||||
<p>
|
||||
It is clear that the "name" field will be inferred as a STRING type. However, how should we handle the "age" field? Should the field be an CHOICE between INT and STRING? Should we
|
||||
|
@ -876,7 +958,5 @@
|
|||
will typically only be inferred once, regardless of how many Processors handle the data.
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -48,7 +48,7 @@ public class TestInferXmlSchema {
|
|||
|
||||
@Test
|
||||
public void testFlatXml() throws IOException {
|
||||
final RecordSchema schema = inferSchema("src/test/resources/xml/person.xml", false);
|
||||
final RecordSchema schema = inferSchema("src/test/resources/xml/person.xml", false, true);
|
||||
|
||||
assertEquals(7, schema.getFieldCount());
|
||||
|
||||
|
@ -64,7 +64,7 @@ public class TestInferXmlSchema {
|
|||
|
||||
@Test
|
||||
public void testFieldsFromAllRecordsIncluded() throws IOException {
|
||||
final RecordSchema schema = inferSchema("src/test/resources/xml/people_nested.xml", true);
|
||||
final RecordSchema schema = inferSchema("src/test/resources/xml/people_nested.xml", true, true);
|
||||
|
||||
assertEquals(8, schema.getFieldCount());
|
||||
|
||||
|
@ -94,7 +94,7 @@ public class TestInferXmlSchema {
|
|||
@Test
|
||||
public void testStringFieldWithAttributes() throws IOException {
|
||||
final String contentFieldName = "contentfield";
|
||||
final RecordSchema schema = inferSchema("src/test/resources/xml/TextNodeWithAttribute.xml", contentFieldName, true);
|
||||
final RecordSchema schema = inferSchema("src/test/resources/xml/TextNodeWithAttribute.xml", contentFieldName, true, true);
|
||||
|
||||
assertEquals(3, schema.getFieldCount());
|
||||
|
||||
|
@ -110,13 +110,24 @@ public class TestInferXmlSchema {
|
|||
assertSame(RecordFieldType.STRING, childSchema.getDataType(contentFieldName).get().getFieldType());
|
||||
}
|
||||
|
||||
private RecordSchema inferSchema(final String filename, final boolean ignoreWrapper) throws IOException {
|
||||
return inferSchema(filename, "contentfield", ignoreWrapper);
|
||||
@Test
|
||||
public void testStringFieldWithAttributesIgnored() throws IOException {
|
||||
final RecordSchema schema = inferSchema("src/test/resources/xml/TextNodeWithAttribute.xml", true, false);
|
||||
|
||||
assertEquals(3, schema.getFieldCount());
|
||||
|
||||
assertSame(RecordFieldType.INT, schema.getDataType("num").get().getFieldType());
|
||||
assertSame(RecordFieldType.STRING, schema.getDataType("name").get().getFieldType());
|
||||
assertSame(RecordFieldType.STRING, schema.getDataType("software").get().getFieldType());
|
||||
}
|
||||
|
||||
private RecordSchema inferSchema(final String filename, final String contentFieldName, final boolean ignoreWrapper) throws IOException {
|
||||
private RecordSchema inferSchema(final String filename, final boolean ignoreWrapper, final boolean parseXMLAttributes) throws IOException {
|
||||
return inferSchema(filename, "contentfield", ignoreWrapper, parseXMLAttributes);
|
||||
}
|
||||
|
||||
private RecordSchema inferSchema(final String filename, final String contentFieldName, final boolean ignoreWrapper, final boolean parseXMLAttributes) throws IOException {
|
||||
final File file = new File(filename);
|
||||
final RecordSourceFactory<XmlNode> xmlSourceFactory = (var, in) -> new XmlRecordSource(in, contentFieldName, ignoreWrapper);
|
||||
final RecordSourceFactory<XmlNode> xmlSourceFactory = (var, in) -> new XmlRecordSource(in, contentFieldName, ignoreWrapper, parseXMLAttributes);
|
||||
final SchemaInferenceEngine<XmlNode> schemaInference = new XmlSchemaInference(timeValueInference);
|
||||
final InferSchemaAccessStrategy<XmlNode> inferStrategy = new InferSchemaAccessStrategy<>(xmlSourceFactory, schemaInference, Mockito.mock(ComponentLog.class));
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static junit.framework.TestCase.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestXMLReader {
|
||||
|
||||
|
@ -279,4 +279,23 @@ public class TestXMLReader {
|
|||
String actualContent = out.getContent();
|
||||
assertEquals(expectedContent, actualContent);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInferSchemaIgnoreAttributes() throws InitializationException, IOException {
|
||||
String expectedContent = "MapRecord[{software=Apache NiFi, num=123, name=John Doe}]";
|
||||
|
||||
Map<PropertyDescriptor, String> xmlReaderProperties = new HashMap<>();
|
||||
xmlReaderProperties.put(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaInferenceUtil.INFER_SCHEMA.getValue());
|
||||
xmlReaderProperties.put(XMLReader.RECORD_FORMAT, XMLReader.RECORD_SINGLE.getValue());
|
||||
xmlReaderProperties.put(XMLReader.PARSE_XML_ATTRIBUTES, "false");
|
||||
TestRunner runner = setup(xmlReaderProperties);
|
||||
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/person_record.xml");
|
||||
runner.enqueue(is);
|
||||
runner.run();
|
||||
|
||||
MockFlowFile out = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS).get(0);
|
||||
String actualContent = out.getContent();
|
||||
assertEquals(expectedContent, actualContent);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSingleRecord() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/person.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), false,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), false, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", 42, "USA"}, reader.nextRecord().getValues());
|
||||
assertNull(reader.nextRecord());
|
||||
|
@ -64,7 +64,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testMap() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_map.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForMap(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForMap(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord();
|
||||
|
@ -85,7 +85,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testMapWithRecords() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_map2.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForRecordMap(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForRecordMap(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord();
|
||||
|
@ -104,7 +104,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testTagInCharactersSimpleField() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", 42, null}, reader.nextRecord().getValues());
|
||||
|
@ -116,7 +116,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testTagInCharactersRecord() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaWithNestedRecord3(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaWithNestedRecord3(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(true, true);
|
||||
|
@ -158,7 +158,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testTagInCharactersCoerceTrueDropFalse() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaWithNestedRecord3(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaWithNestedRecord3(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(true, false);
|
||||
|
@ -205,7 +205,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testTagInCharactersCoerceFalseDropFalse() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(false, false);
|
||||
|
@ -252,7 +252,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleRecord() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", 42, "USA"}, reader.nextRecord().getValues());
|
||||
|
@ -264,8 +264,8 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleRecord2() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema2(), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema2(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertNull(reader.nextRecord(true, true).getValue("AGE"));
|
||||
assertNull(reader.nextRecord(false, true).getValue("AGE"));
|
||||
|
@ -276,8 +276,8 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleRecord3() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertEquals(Integer.class, reader.nextRecord(true, true).getValue("AGE").getClass());
|
||||
assertEquals(String.class, reader.nextRecord(false, true).getValue("AGE").getClass());
|
||||
|
@ -288,7 +288,7 @@ public class TestXMLRecordReader {
|
|||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.remove(2);
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertEquals(Integer.class, reader.nextRecord(true, false).getValue("AGE").getClass());
|
||||
|
@ -298,8 +298,8 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleRecordCoerceFalseDropFalse() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_no_attributes.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", "42", "USA"}, reader.nextRecord(false, false).getValues());
|
||||
assertArrayEquals(new Object[] {"Ainslie Fletcher", "33", "UK"}, reader.nextRecord(false, false).getValues());
|
||||
|
@ -312,7 +312,7 @@ public class TestXMLRecordReader {
|
|||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ID", RecordFieldType.STRING.getDataType()));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord();
|
||||
|
@ -337,7 +337,7 @@ public class TestXMLRecordReader {
|
|||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ID", RecordFieldType.STRING.getDataType()));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
"ATTR_", "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord();
|
||||
|
@ -360,8 +360,8 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleRecordWithAttribute3() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(Collections.emptyList()),
|
||||
true, null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(Collections.emptyList()), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(true, true);
|
||||
assertEquals(null, first.getAsString("ID"));
|
||||
|
@ -382,7 +382,7 @@ public class TestXMLRecordReader {
|
|||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ID", RecordFieldType.INT.getDataType()));
|
||||
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertEquals(Integer.class, reader.nextRecord(true, true).getValue("ID").getClass());
|
||||
|
@ -395,7 +395,7 @@ public class TestXMLRecordReader {
|
|||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ID", RecordFieldType.INT.getDataType()));
|
||||
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertEquals(Integer.class, reader.nextRecord(true, false).getValue("ID").getClass());
|
||||
|
@ -404,17 +404,14 @@ public class TestXMLRecordReader {
|
|||
|
||||
@Test
|
||||
public void testSimpleRecordWithAttribute6() throws IOException, MalformedRecordException {
|
||||
// given
|
||||
final InputStream is = new FileInputStream("src/test/resources/xml/people2.xml");
|
||||
final List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ID", RecordFieldType.DECIMAL.getDecimalDataType(38, 10)));
|
||||
final XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true,
|
||||
final XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
// when
|
||||
final Record record = reader.nextRecord(true, false);
|
||||
|
||||
// then
|
||||
assertEquals(BigDecimal.class, record.getValue("ID").getClass());
|
||||
}
|
||||
|
||||
|
@ -423,7 +420,7 @@ public class TestXMLRecordReader {
|
|||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ID", RecordFieldType.STRING.getDataType()));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(false, false);
|
||||
|
@ -462,8 +459,8 @@ public class TestXMLRecordReader {
|
|||
final DataType recordType2 = RecordFieldType.RECORD.getRecordDataType(new SimpleRecordSchema(nestedFields2));
|
||||
fields.add(new RecordField("AGE", recordType2));
|
||||
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(true, true);
|
||||
assertTrue(first.getValue("NAME") instanceof Record);
|
||||
|
@ -489,7 +486,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleTypeWithAttributeAsRecordCoerceFalseDropFalse() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people3.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(false, false);
|
||||
|
@ -518,7 +515,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleRecordWithHeader() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_with_header_and_comments.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, null, dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", 42, "USA"}, reader.nextRecord().getValues());
|
||||
|
@ -530,7 +527,8 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testSimpleRecordWithHeaderNoValidation() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_with_header_and_comments.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, null, null, dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, null, dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", 42, "USA"}, reader.nextRecord().getValues());
|
||||
assertArrayEquals(new Object[] {"Ainslie Fletcher", 33, "UK"}, reader.nextRecord().getValues());
|
||||
|
@ -541,7 +539,8 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testInvalidXml() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_invalid.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
int count = 0;
|
||||
|
||||
/*
|
||||
|
@ -566,8 +565,8 @@ public class TestXMLRecordReader {
|
|||
InputStream is = new FileInputStream("src/test/resources/xml/people.xml");
|
||||
List<RecordField> fields = getSimpleRecordFields2();
|
||||
fields.add(new RecordField("AGE", RecordFieldType.CHOICE.getDataType()));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record record = reader.nextRecord();
|
||||
assertTrue(record.getValue("AGE") instanceof String);
|
||||
|
@ -579,8 +578,8 @@ public class TestXMLRecordReader {
|
|||
InputStream is = new FileInputStream("src/test/resources/xml/people_nested.xml");
|
||||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ADDRESS", RecordFieldType.CHOICE.getDataType()));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record record = reader.nextRecord();
|
||||
assertTrue(record.getValue("ADDRESS") instanceof Record);
|
||||
|
@ -593,7 +592,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testNameSpaces() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_namespace.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", 42, "USA"}, reader.nextRecord().getValues());
|
||||
|
@ -605,7 +604,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testCData() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_cdata.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", 42, "USA"}, reader.nextRecord().getValues());
|
||||
|
@ -620,8 +619,8 @@ public class TestXMLRecordReader {
|
|||
List<RecordField> fields = getSimpleRecordFields2();
|
||||
final DataType recordType = RecordFieldType.RECORD.getRecordDataType(getNestedSchema());
|
||||
fields.add(new RecordField("AGE", recordType));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {"Cleve Butler", "USA", null}, reader.nextRecord().getValues());
|
||||
assertArrayEquals(new Object[] {"Ainslie Fletcher", "UK", null}, reader.nextRecord().getValues());
|
||||
|
@ -634,8 +633,8 @@ public class TestXMLRecordReader {
|
|||
InputStream is = new FileInputStream("src/test/resources/xml/people_nested.xml");
|
||||
List<RecordField> fields = getSimpleRecordFields();
|
||||
fields.add(new RecordField("ADDRESS", RecordFieldType.STRING.getDataType()));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertNull(reader.nextRecord().getValue("ADDRESS"));
|
||||
assertNull(reader.nextRecord().getValue("ADDRESS"));
|
||||
|
@ -646,7 +645,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testParseEmptyFields() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_empty.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {null, null, null}, reader.nextRecord().getValues());
|
||||
|
@ -656,7 +655,7 @@ public class TestXMLRecordReader {
|
|||
@Test
|
||||
public void testParseEmptyFieldsCoerceFalseDropFalse() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_empty.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertArrayEquals(new Object[] {null, null, null}, reader.nextRecord(false, false).getValues());
|
||||
|
@ -667,27 +666,29 @@ public class TestXMLRecordReader {
|
|||
public void testEmptyStreamAsSingleRecord() {
|
||||
InputStream is = new ByteArrayInputStream(new byte[0]);
|
||||
assertThrows(MalformedRecordException.class,
|
||||
() -> new XMLRecordReader(is, getSimpleSchema(), false, null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class)));
|
||||
() -> new XMLRecordReader(is, getSimpleSchema(), false, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyStreamAsArray() {
|
||||
InputStream is = new ByteArrayInputStream(new byte[0]);
|
||||
assertThrows(MalformedRecordException.class,
|
||||
() -> new XMLRecordReader(is, getSimpleSchema(), true, null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class)));
|
||||
() -> new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmptyStreamWIthXmlHeader() {
|
||||
InputStream is = new ByteArrayInputStream(("<?xml version=\"1.0\" encoding=\"utf-8\"?>").getBytes());
|
||||
assertThrows(MalformedRecordException.class, () -> new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
assertThrows(MalformedRecordException.class, () -> new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseEmptyArray() throws IOException, MalformedRecordException {
|
||||
InputStream is = new ByteArrayInputStream("<root></root>".getBytes());
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSimpleSchema(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
assertNull(reader.nextRecord());
|
||||
|
@ -697,7 +698,7 @@ public class TestXMLRecordReader {
|
|||
public void testNestedRecord() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_nested.xml");
|
||||
RecordSchema schema = getSchemaWithNestedRecord();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Object[] valuesFirstRecord = reader.nextRecord().getValues();
|
||||
|
@ -721,7 +722,7 @@ public class TestXMLRecordReader {
|
|||
public void testNestedRecordCoerceFalseDropFalse() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_nested.xml");
|
||||
RecordSchema schema = getSchemaWithNestedRecord();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(false, false);
|
||||
|
@ -767,7 +768,7 @@ public class TestXMLRecordReader {
|
|||
|
||||
// Fields "AGE" and "ADDRESS/CITY" are not defined here
|
||||
RecordSchema schema = getSchemaWithNestedRecord2();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record firstRecord = reader.nextRecord(true, true);
|
||||
|
@ -809,7 +810,7 @@ public class TestXMLRecordReader {
|
|||
|
||||
// Fields "AGE" and "ADDRESS/CITY" are not defined here
|
||||
RecordSchema schema = getSchemaWithNestedRecord2();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record firstRecord = reader.nextRecord(false, true);
|
||||
|
@ -851,7 +852,7 @@ public class TestXMLRecordReader {
|
|||
|
||||
// Fields "AGE" and "ADDRESS/CITY" are not defined here
|
||||
RecordSchema schema = getSchemaWithNestedRecord2();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record firstRecord = reader.nextRecord(true, false);
|
||||
|
@ -901,7 +902,7 @@ public class TestXMLRecordReader {
|
|||
|
||||
// Fields "AGE" and "ADDRESS/CITY" are not defined here
|
||||
RecordSchema schema = getSchemaWithNestedRecord2();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record firstRecord = reader.nextRecord(false, false);
|
||||
|
@ -950,7 +951,7 @@ public class TestXMLRecordReader {
|
|||
public void testSimpleArray() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_array_simple.xml");
|
||||
RecordSchema schema = getSchemaWithSimpleArray();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record firstRecord = reader.nextRecord();
|
||||
|
@ -986,7 +987,7 @@ public class TestXMLRecordReader {
|
|||
public void testSimpleArrayCoerceFalseDropFalse() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_array_simple.xml");
|
||||
RecordSchema schema = getSchemaWithSimpleArray();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(false, false);
|
||||
|
@ -1021,8 +1022,8 @@ public class TestXMLRecordReader {
|
|||
public void testNestedArrayInNestedRecord() throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_array.xml");
|
||||
RecordSchema schema = getSchemaWithNestedArray();
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, null,
|
||||
"CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, schema, true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record firstRecord = reader.nextRecord();
|
||||
Object[] valuesFirstRecord = firstRecord.getValues();
|
||||
|
@ -1059,7 +1060,7 @@ public class TestXMLRecordReader {
|
|||
public void testDeeplyNestedArraysAndRecords() throws IOException, MalformedRecordException {
|
||||
// test records in nested arrays
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_complex1.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForComplexData(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForComplexData(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(true, true);
|
||||
|
@ -1098,7 +1099,7 @@ public class TestXMLRecordReader {
|
|||
public void testDeeplyNestedArraysAndRecords2() throws IOException, MalformedRecordException {
|
||||
// test multiply nested arrays and records (recursion)
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_complex2.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForComplexData2(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForComplexData2(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord();
|
||||
|
@ -1154,7 +1155,7 @@ public class TestXMLRecordReader {
|
|||
public void testDeeplyNestedArraysAndRecordsCoerceFalseDropTrue() throws IOException, MalformedRecordException {
|
||||
// test multiply nested arrays and records (recursion)
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_complex2.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForComplexData2(), true,
|
||||
XMLRecordReader reader = new XMLRecordReader(is, getSchemaForComplexData2(), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(false, true);
|
||||
|
@ -1210,8 +1211,8 @@ public class TestXMLRecordReader {
|
|||
public void testDeeplyNestedArraysAndRecordsCoerceFalseDropFalse() throws IOException, MalformedRecordException {
|
||||
// test multiply nested arrays and records (recursion)
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people_complex2.xml");
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(Collections.emptyList()),
|
||||
true, null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(Collections.emptyList()), true, true,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
Record first = reader.nextRecord(false, false);
|
||||
assertEquals("1", first.getValue("ID"));
|
||||
|
@ -1267,6 +1268,110 @@ public class TestXMLRecordReader {
|
|||
.getValue("ID"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleTypeWithAttributesIgnored1() throws IOException, MalformedRecordException {
|
||||
boolean parseXMLAttributes = false;
|
||||
boolean coerceTypes = true;
|
||||
boolean dropUnknownFields = true;
|
||||
|
||||
List<Record> records = simpleTypeWithAttributesIgnored(parseXMLAttributes, coerceTypes, dropUnknownFields);
|
||||
|
||||
Record first = records.get(0);
|
||||
Record second = records.get(1);
|
||||
|
||||
assertTrue(first.getValue("NAME") instanceof String);
|
||||
assertEquals("Cleve Butler", first.getValue("NAME"));
|
||||
assertTrue(first.getValue("AGE") instanceof Integer);
|
||||
assertEquals(42, first.getValue("AGE"));
|
||||
assertEquals(2, first.toMap().size());
|
||||
|
||||
assertTrue(second.getValue("NAME") instanceof String);
|
||||
assertEquals("Ainslie Fletcher", second.getValue("NAME"));
|
||||
assertTrue(second.getValue("AGE") instanceof Integer);
|
||||
assertEquals(33, second.getValue("AGE"));
|
||||
assertEquals(2, second.toMap().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleTypeWithAttributesIgnored2() throws IOException, MalformedRecordException {
|
||||
boolean parseXMLAttributes = false;
|
||||
boolean coerceTypes = false;
|
||||
boolean dropUnknownFields = true;
|
||||
|
||||
List<Record> records = simpleTypeWithAttributesIgnored(parseXMLAttributes, coerceTypes, dropUnknownFields);
|
||||
|
||||
Record first = records.get(0);
|
||||
Record second = records.get(1);
|
||||
|
||||
assertTrue(first.getValue("NAME") instanceof String);
|
||||
assertEquals("Cleve Butler", first.getValue("NAME"));
|
||||
assertTrue(first.getValue("AGE") instanceof String);
|
||||
assertEquals("42", first.getValue("AGE"));
|
||||
assertEquals(2, first.toMap().size());
|
||||
|
||||
assertTrue(second.getValue("NAME") instanceof String);
|
||||
assertEquals("Ainslie Fletcher", second.getValue("NAME"));
|
||||
assertTrue(second.getValue("AGE") instanceof String);
|
||||
assertEquals("33", second.getValue("AGE"));
|
||||
assertEquals(2, second.toMap().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleTypeWithAttributesIgnored3() throws IOException, MalformedRecordException {
|
||||
boolean parseXMLAttributes = false;
|
||||
boolean coerceTypes = true;
|
||||
boolean dropUnknownFields = false;
|
||||
|
||||
List<Record> records = simpleTypeWithAttributesIgnored(parseXMLAttributes, coerceTypes, dropUnknownFields);
|
||||
|
||||
Record first = records.get(0);
|
||||
Record second = records.get(1);
|
||||
|
||||
assertTrue(first.getValue("NAME") instanceof String);
|
||||
assertEquals("Cleve Butler", first.getValue("NAME"));
|
||||
assertTrue(first.getValue("AGE") instanceof Integer);
|
||||
assertEquals(42, first.getValue("AGE"));
|
||||
assertTrue(first.getValue("COUNTRY") instanceof String);
|
||||
assertEquals("USA", first.getValue("COUNTRY"));
|
||||
assertEquals(3, first.toMap().size());
|
||||
|
||||
assertTrue(second.getValue("NAME") instanceof String);
|
||||
assertEquals("Ainslie Fletcher", second.getValue("NAME"));
|
||||
assertTrue(second.getValue("AGE") instanceof Integer);
|
||||
assertEquals(33, second.getValue("AGE"));
|
||||
assertTrue(second.getValue("COUNTRY") instanceof String);
|
||||
assertEquals("UK", second.getValue("COUNTRY"));
|
||||
assertEquals(3, second.toMap().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleTypeWithAttributesIgnored4() throws IOException, MalformedRecordException {
|
||||
boolean parseXMLAttributes = false;
|
||||
boolean coerceTypes = false;
|
||||
boolean dropUnknownFields = false;
|
||||
|
||||
List<Record> records = simpleTypeWithAttributesIgnored(parseXMLAttributes, coerceTypes, dropUnknownFields);
|
||||
|
||||
Record first = records.get(0);
|
||||
Record second = records.get(1);
|
||||
|
||||
assertTrue(first.getValue("NAME") instanceof String);
|
||||
assertEquals("Cleve Butler", first.getValue("NAME"));
|
||||
assertTrue(first.getValue("AGE") instanceof String);
|
||||
assertEquals("42", first.getValue("AGE"));
|
||||
assertTrue(first.getValue("COUNTRY") instanceof String);
|
||||
assertEquals("USA", first.getValue("COUNTRY"));
|
||||
assertEquals(3, first.toMap().size());
|
||||
|
||||
assertTrue(second.getValue("NAME") instanceof String);
|
||||
assertEquals("Ainslie Fletcher", second.getValue("NAME"));
|
||||
assertTrue(second.getValue("AGE") instanceof String);
|
||||
assertEquals("33", second.getValue("AGE"));
|
||||
assertTrue(second.getValue("COUNTRY") instanceof String);
|
||||
assertEquals("UK", second.getValue("COUNTRY"));
|
||||
assertEquals(3, second.toMap().size());
|
||||
}
|
||||
|
||||
private List<RecordField> getSimpleRecordFields() {
|
||||
final List<RecordField> fields = new ArrayList<>();
|
||||
fields.add(new RecordField("NAME", RecordFieldType.STRING.getDataType()));
|
||||
|
@ -1449,4 +1554,20 @@ public class TestXMLRecordReader {
|
|||
}};
|
||||
return new SimpleRecordSchema(fields);
|
||||
}
|
||||
|
||||
private List<Record> simpleTypeWithAttributesIgnored(boolean parseXMLAttributes, boolean coerceTypes, boolean dropunknownFields) throws IOException, MalformedRecordException {
|
||||
InputStream is = new FileInputStream("src/test/resources/xml/people3.xml");
|
||||
|
||||
final List<RecordField> fields = new ArrayList<>();
|
||||
fields.add(new RecordField("NAME", RecordFieldType.STRING.getDataType()));
|
||||
fields.add(new RecordField("AGE", RecordFieldType.INT.getDataType()));
|
||||
|
||||
XMLRecordReader reader = new XMLRecordReader(is, new SimpleRecordSchema(fields), true, parseXMLAttributes,
|
||||
null, "CONTENT", dateFormat, timeFormat, timestampFormat, Mockito.mock(ComponentLog.class));
|
||||
|
||||
List<Record> records = new ArrayList<>(2);
|
||||
records.add(reader.nextRecord(coerceTypes, dropunknownFields));
|
||||
records.add(reader.nextRecord(coerceTypes, dropunknownFields));
|
||||
return records;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue