diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java index 024f1b508d..3eb26e30f3 100644 --- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java @@ -76,7 +76,6 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Function; -import java.util.stream.Collectors; public class AvroTypeUtil { private static final Logger logger = LoggerFactory.getLogger(AvroTypeUtil.class); @@ -900,10 +899,19 @@ public class AvroTypeUtil { private static Object convertUnionFieldValue(final Object originalValue, final Schema fieldSchema, final Function conversion, final String fieldName) { boolean foundNonNull = false; + // It is an extremely common case to have a UNION type because a field can be NULL or some other type. In this situation, + // we will have two possible types, and one of them will be null. When this happens, we can be much more efficient by simply + // determining the non-null type and converting to that. + final List schemaTypes = fieldSchema.getTypes(); + if (schemaTypes.size() == 2 && (schemaTypes.get(0).getType() == Type.NULL || schemaTypes.get(1).getType() == Type.NULL)) { + final Schema nonNullType = schemaTypes.get(0).getType() == Type.NULL ? schemaTypes.get(1) : schemaTypes.get(0); + return conversion.apply(nonNullType); + } + Optional mostSuitableType = DataTypeUtils.findMostSuitableType( originalValue, - fieldSchema.getTypes().stream().filter(schema -> schema.getType() != Type.NULL).collect(Collectors.toList()), - subSchema -> AvroTypeUtil.determineDataType(subSchema) + getNonNullSubSchemas(fieldSchema), + AvroTypeUtil::determineDataType ); if (mostSuitableType.isPresent()) { return conversion.apply(mostSuitableType.get());