mirror of https://github.com/apache/nifi.git
NIFI-8512: When converting to/from Avro UNION type, we can be more efficient when the UNION consists of a Null type and one other type by determinine the non-null type and just using that. Also eliminated a call to List.stream() and related .collect() call by using an existing method that performs the logic without the very expensive call to stream()
Signed-off-by: Matthew Burgess <mattyb149@apache.org> This closes #5051
This commit is contained in:
parent
30fc26647e
commit
2895bac2c0
|
@ -76,7 +76,6 @@ import java.util.Optional;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class AvroTypeUtil {
|
||||
private static final Logger logger = LoggerFactory.getLogger(AvroTypeUtil.class);
|
||||
|
@ -900,10 +899,19 @@ public class AvroTypeUtil {
|
|||
private static Object convertUnionFieldValue(final Object originalValue, final Schema fieldSchema, final Function<Schema, Object> conversion, final String fieldName) {
|
||||
boolean foundNonNull = false;
|
||||
|
||||
// It is an extremely common case to have a UNION type because a field can be NULL or some other type. In this situation,
|
||||
// we will have two possible types, and one of them will be null. When this happens, we can be much more efficient by simply
|
||||
// determining the non-null type and converting to that.
|
||||
final List<Schema> schemaTypes = fieldSchema.getTypes();
|
||||
if (schemaTypes.size() == 2 && (schemaTypes.get(0).getType() == Type.NULL || schemaTypes.get(1).getType() == Type.NULL)) {
|
||||
final Schema nonNullType = schemaTypes.get(0).getType() == Type.NULL ? schemaTypes.get(1) : schemaTypes.get(0);
|
||||
return conversion.apply(nonNullType);
|
||||
}
|
||||
|
||||
Optional<Schema> mostSuitableType = DataTypeUtils.findMostSuitableType(
|
||||
originalValue,
|
||||
fieldSchema.getTypes().stream().filter(schema -> schema.getType() != Type.NULL).collect(Collectors.toList()),
|
||||
subSchema -> AvroTypeUtil.determineDataType(subSchema)
|
||||
getNonNullSubSchemas(fieldSchema),
|
||||
AvroTypeUtil::determineDataType
|
||||
);
|
||||
if (mostSuitableType.isPresent()) {
|
||||
return conversion.apply(mostSuitableType.get());
|
||||
|
|
Loading…
Reference in New Issue