NIFI-4647: Fix support for strings in unions for ConvertAvroToORC

Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com>

This closes #2644.
This commit is contained in:
Matthew Burgess 2018-04-18 14:49:16 -04:00 committed by Pierre Villard
parent aa196bc01f
commit 090e748726
2 changed files with 26 additions and 1 deletions

View File

@ -67,9 +67,15 @@ public class NiFiOrcUtils {
if (o != null) {
if (typeInfo instanceof UnionTypeInfo) {
OrcUnion union = new OrcUnion();
// Avro uses Utf8 and GenericData.EnumSymbol objects instead of Strings. This is handled in other places in the method, but here
// we need to determine the union types from the objects, so choose String.class if the object is one of those Avro classes
Class clazzToCompareTo = o.getClass();
if (o instanceof org.apache.avro.util.Utf8 || o instanceof GenericData.EnumSymbol) {
clazzToCompareTo = String.class;
}
// Need to find which of the union types correspond to the primitive object
TypeInfo objectTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(
ObjectInspectorFactory.getReflectionObjectInspector(o.getClass(), ObjectInspectorFactory.ObjectInspectorOptions.JAVA));
ObjectInspectorFactory.getReflectionObjectInspector(clazzToCompareTo, ObjectInspectorFactory.ObjectInspectorOptions.JAVA));
List<TypeInfo> unionTypeInfos = ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos();
int index = 0;

View File

@ -20,7 +20,9 @@ package org.apache.nifi.util.orc;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObject;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@ -267,6 +269,23 @@ public class TestNiFiOrcUtils {
+ " STORED AS ORC", ddl);
}
@Test
public void test_convertToORCObject() {
Schema schema = SchemaBuilder.enumeration("myEnum").symbols("x","y","z");
List<Object> objects = Arrays.asList(new Utf8("Hello"), new GenericData.EnumSymbol(schema, "x"));
objects.forEach((avroObject) -> {
Object o = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,string>"), avroObject);
assertTrue(o instanceof UnionObject);
UnionObject uo = (UnionObject) o;
assertTrue(uo.getObject() instanceof Text);
});
}
@Test(expected = IllegalArgumentException.class)
public void test_convertToORCObjectBadUnion() {
NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,long>"), "Hello");
}
//////////////////
// Helper methods