NIFI-8512: When converting to/from Avro UNION type, we can be more efficient when the UNION consists of a Null type and one other type by determinine the non-null type and just using that. Also eliminated a call to List.stream() and related .collect() call by using an existing method that performs the logic without the very expensive call to stream()

Signed-off-by: Matthew Burgess <mattyb149@apache.org>

This closes #5051
This commit is contained in:
Mark Payne 2021-05-03 17:49:36 -04:00 committed by Matthew Burgess
parent 30fc26647e
commit 2895bac2c0
No known key found for this signature in database
GPG Key ID: 05D3DEB8126DAD24
1 changed files with 11 additions and 3 deletions

View File

@ -76,7 +76,6 @@ import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
public class AvroTypeUtil {
private static final Logger logger = LoggerFactory.getLogger(AvroTypeUtil.class);
@ -900,10 +899,19 @@ public class AvroTypeUtil {
private static Object convertUnionFieldValue(final Object originalValue, final Schema fieldSchema, final Function<Schema, Object> conversion, final String fieldName) {
boolean foundNonNull = false;
// It is an extremely common case to have a UNION type because a field can be NULL or some other type. In this situation,
// we will have two possible types, and one of them will be null. When this happens, we can be much more efficient by simply
// determining the non-null type and converting to that.
final List<Schema> schemaTypes = fieldSchema.getTypes();
if (schemaTypes.size() == 2 && (schemaTypes.get(0).getType() == Type.NULL || schemaTypes.get(1).getType() == Type.NULL)) {
final Schema nonNullType = schemaTypes.get(0).getType() == Type.NULL ? schemaTypes.get(1) : schemaTypes.get(0);
return conversion.apply(nonNullType);
}
Optional<Schema> mostSuitableType = DataTypeUtils.findMostSuitableType(
originalValue,
fieldSchema.getTypes().stream().filter(schema -> schema.getType() != Type.NULL).collect(Collectors.toList()),
subSchema -> AvroTypeUtil.determineDataType(subSchema)
getNonNullSubSchemas(fieldSchema),
AvroTypeUtil::determineDataType
);
if (mostSuitableType.isPresent()) {
return conversion.apply(mostSuitableType.get());