NIFI-7925: ValidateRecord reports false positive for avro arrays with null elements

This commit is contained in:
Denes Arvay 2020-10-17 21:31:37 +02:00 committed by markap14
parent 55cb8d73cb
commit d05d0c6240
8 changed files with 125 additions and 38 deletions

BIN
flow.xml.gz Normal file

Binary file not shown.

View File

@ -288,17 +288,30 @@ public enum RecordFieldType {
/**
* Returns a Data Type that represents an "ARRAY" type with the given element type.
* The returned array data type can't contain null elements.
*
* @param elementType the type of the arrays in the element
* @return a DataType that represents an Array with the given element type, or <code>null</code> if this RecordFieldType
* is not the ARRAY type.
*/
public DataType getArrayDataType(final DataType elementType) {
return getArrayDataType(elementType, ArrayDataType.DEFAULT_NULLABLE);
}
/**
* Returns a Data Type that represents an "ARRAY" type with the given element type.
*
* @param elementType the type of the arrays in the element
* @param elementsNullable indicates whether the array can contain null elements
* @return a DataType that represents an Array with the given element type, or <code>null</code> if this RecordFieldType
* is not the ARRAY type.
*/
public DataType getArrayDataType(final DataType elementType, final boolean elementsNullable) {
if (this != ARRAY) {
return null;
}
return new ArrayDataType(elementType);
return new ArrayDataType(elementType, elementsNullable);
}
@ -341,17 +354,30 @@ public enum RecordFieldType {
/**
* Returns a Data Type that represents a "MAP" type with the given value type.
* The returned map data type can't contain null values.
*
* @param valueDataType the type of the values in the map
* @return a DataType that represents a Map with the given value type, or <code>null</code> if this RecordFieldType
* is not the MAP type.
*/
public DataType getMapDataType(final DataType valueDataType) {
return getMapDataType(valueDataType, MapDataType.DEFAULT_NULLABLE);
}
/**
* Returns a Data Type that represents a "MAP" type with the given value type.
*
* @param valueDataType the type of the values in the map
* @param valuesNullable indicates whether the map can contain null values
* @return a DataType that represents a Map with the given value type, or <code>null</code> if this RecordFieldType
* is not the MAP type.
*/
public DataType getMapDataType(final DataType valueDataType, boolean valuesNullable) {
if (this != MAP) {
return null;
}
return new MapDataType(valueDataType);
return new MapDataType(valueDataType, valuesNullable);
}
/**

View File

@ -23,11 +23,20 @@ import org.apache.nifi.serialization.record.RecordFieldType;
import java.util.Objects;
public class ArrayDataType extends DataType {
public static final boolean DEFAULT_NULLABLE = false;
private final DataType elementType;
private final boolean elementsNullable;
public ArrayDataType(final DataType elementType) {
this(elementType, DEFAULT_NULLABLE);
}
public ArrayDataType(final DataType elementType, boolean elementsNullable) {
super(RecordFieldType.ARRAY, null);
this.elementType = elementType;
this.elementsNullable = elementsNullable;
}
public DataType getElementType() {
@ -39,25 +48,23 @@ public class ArrayDataType extends DataType {
return RecordFieldType.ARRAY;
}
@Override
public int hashCode() {
return 31 + 41 * getFieldType().hashCode() + 41 * (elementType == null ? 0 : elementType.hashCode());
public boolean isElementsNullable() {
return elementsNullable;
}
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof ArrayDataType)) {
return false;
}
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ArrayDataType)) return false;
if (!super.equals(o)) return false;
ArrayDataType that = (ArrayDataType) o;
return isElementsNullable() == that.isElementsNullable()
&& Objects.equals(getElementType(), that.getElementType());
}
final ArrayDataType other = (ArrayDataType) obj;
return Objects.equals(elementType, other.elementType);
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), getElementType(), isElementsNullable());
}
@Override

View File

@ -23,41 +23,48 @@ import org.apache.nifi.serialization.record.RecordFieldType;
import java.util.Objects;
public class MapDataType extends DataType {
public static final boolean DEFAULT_NULLABLE = false;
private final DataType valueType;
private final boolean valuesNullable;
public MapDataType(final DataType elementType) {
this(elementType, DEFAULT_NULLABLE);
}
public MapDataType(final DataType elementType, boolean valuesNullable) {
super(RecordFieldType.MAP, null);
this.valueType = elementType;
this.valuesNullable = valuesNullable;
}
public DataType getValueType() {
return valueType;
}
public boolean isValuesNullable() {
return valuesNullable;
}
@Override
public RecordFieldType getFieldType() {
return RecordFieldType.MAP;
}
@Override
public int hashCode() {
return 31 + 41 * getFieldType().hashCode() + 41 * (valueType == null ? 0 : valueType.hashCode());
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof MapDataType)) return false;
if (!super.equals(o)) return false;
MapDataType that = (MapDataType) o;
return valuesNullable == that.valuesNullable
&& Objects.equals(getValueType(), that.getValueType());
}
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof MapDataType)) {
return false;
}
final MapDataType other = (MapDataType) obj;
return Objects.equals(valueType, other.valueType);
public int hashCode() {
return Objects.hash(super.hashCode(), getValueType(), valuesNullable);
}
@Override

View File

@ -354,7 +354,9 @@ public class AvroTypeUtil {
switch (avroType) {
case ARRAY:
return RecordFieldType.ARRAY.getArrayDataType(determineDataType(avroSchema.getElementType(), knownRecordTypes));
final DataType elementType = determineDataType(avroSchema.getElementType(), knownRecordTypes);
final boolean elementsNullable = isNullable(avroSchema.getElementType());
return RecordFieldType.ARRAY.getArrayDataType(elementType, elementsNullable);
case BYTES:
case FIXED:
return RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType());
@ -403,7 +405,8 @@ public class AvroTypeUtil {
case MAP:
final Schema valueSchema = avroSchema.getValueType();
final DataType valueType = determineDataType(valueSchema, knownRecordTypes);
return RecordFieldType.MAP.getMapDataType(valueType);
final boolean valuesNullable = isNullable(valueSchema);
return RecordFieldType.MAP.getMapDataType(valueType, valuesNullable);
case UNION: {
final List<Schema> nonNullSubSchemas = getNonNullSubSchemas(avroSchema);

View File

@ -69,7 +69,7 @@ public class StandardSchemaValidator implements RecordSchemaValidator {
if (validationContext.isStrictTypeChecking()) {
if (!isTypeCorrect(rawValue, dataType)) {
result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD,
"Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + dataType));
"Value is of type " + classNameOrNull(rawValue) + " but was expected to be of type " + dataType));
continue;
}
@ -78,7 +78,7 @@ public class StandardSchemaValidator implements RecordSchemaValidator {
// but will be false if the value is "123" and should be an Array or Record.
if (!DataTypeUtils.isCompatibleDataType(rawValue, dataType)) {
result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD,
"Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + dataType));
"Value is of type " + classNameOrNull(rawValue) + " but was expected to be of type " + dataType));
continue;
}
@ -140,7 +140,7 @@ public class StandardSchemaValidator implements RecordSchemaValidator {
if (canonicalDataType == null) {
result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD,
"Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + dataType));
"Value is of type " + classNameOrNull(rawValue) + " but was expected to be of type " + dataType));
return null;
}
@ -157,7 +157,7 @@ public class StandardSchemaValidator implements RecordSchemaValidator {
if (canonicalDataType.getFieldType() == RecordFieldType.RECORD) {
if (!(rawValue instanceof Record)) { // sanity check
result.addValidationError(new StandardValidationError(concat(fieldPrefix, field), rawValue, ValidationErrorType.INVALID_FIELD,
"Value is of type " + rawValue.getClass().getName() + " but was expected to be of type " + expectedDataType));
"Value is of type " + classNameOrNull(rawValue) + " but was expected to be of type " + expectedDataType));
return;
}
@ -189,6 +189,9 @@ public class StandardSchemaValidator implements RecordSchemaValidator {
final Object[] array = (Object[]) value;
for (final Object arrayVal : array) {
if (arrayVal == null && arrayDataType.isElementsNullable()) {
continue;
}
if (!isTypeCorrect(arrayVal, elementType)) {
return false;
}
@ -202,6 +205,9 @@ public class StandardSchemaValidator implements RecordSchemaValidator {
final Map<?, ?> map = (Map<?, ?>) value;
for (final Object mapValue : map.values()) {
if (mapValue == null && mapDataType.isValuesNullable()) {
continue;
}
if (!isTypeCorrect(mapValue, valueDataType)) {
return false;
}
@ -287,4 +293,8 @@ public class StandardSchemaValidator implements RecordSchemaValidator {
private String concat(final String fieldPrefix, final RecordField field) {
return fieldPrefix + "/" + field.getFieldName();
}
private String classNameOrNull(Object value) {
return value == null ? "null" : value.getClass().getName();
}
}

View File

@ -640,4 +640,38 @@ public class TestValidateRecord {
+ "The following 1 fields had values whose type did not match the schema: [/id]");
}
@Test
public void testValidationForNullElementArrayAndMap() throws Exception {
AvroReader avroReader = new AvroReader();
runner.addControllerService("reader", avroReader);
runner.enableControllerService(avroReader);
final MockRecordWriter validWriter = new MockRecordWriter("valid", false);
runner.addControllerService("writer", validWriter);
runner.enableControllerService(validWriter);
final MockRecordWriter invalidWriter = new MockRecordWriter("invalid", true);
runner.addControllerService("invalid-writer", invalidWriter);
runner.enableControllerService(invalidWriter);
runner.setProperty(ValidateRecord.RECORD_READER, "reader");
runner.setProperty(ValidateRecord.RECORD_WRITER, "writer");
runner.setProperty(ValidateRecord.INVALID_RECORD_WRITER, "invalid-writer");
runner.setProperty(ValidateRecord.ALLOW_EXTRA_FIELDS, "false");
runner.setProperty(ValidateRecord.MAX_VALIDATION_DETAILS_LENGTH, "150");
runner.setProperty(ValidateRecord.VALIDATION_DETAILS_ATTRIBUTE_NAME, "valDetails");
runner.enqueue(Paths.get("src/test/resources/TestValidateRecord/array-and-map-with-null-element.avro"));
runner.run();
runner.assertTransferCount(ValidateRecord.REL_INVALID, 0);
runner.assertTransferCount(ValidateRecord.REL_FAILURE, 0);
runner.assertTransferCount(ValidateRecord.REL_VALID, 1);
final MockFlowFile validFlowFile = runner.getFlowFilesForRelationship(ValidateRecord.REL_VALID).get(0);
validFlowFile.assertAttributeEquals("record.count", "1");
validFlowFile.assertContentEquals("valid\n[text, null],{key=null}\n");
}
}