mirror of https://github.com/apache/nifi.git
NIFI-11739 - Add ability to ignore missing fields in PutIceberg
Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com> This closes #7421.
This commit is contained in:
parent
2c79b5f8e2
commit
3f7b1de6b8
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.iceberg;
|
||||||
|
|
||||||
|
import org.apache.nifi.components.DescribedValue;
|
||||||
|
|
||||||
|
public enum UnmatchedColumnBehavior implements DescribedValue {
|
||||||
|
IGNORE_UNMATCHED_COLUMN("Ignore Unmatched Columns",
|
||||||
|
"Any column in the database that does not have a field in the document will be assumed to not be required. No notification will be logged"),
|
||||||
|
|
||||||
|
WARNING_UNMATCHED_COLUMN("Warn on Unmatched Columns",
|
||||||
|
"Any column in the database that does not have a field in the document will be assumed to not be required. A warning will be logged"),
|
||||||
|
|
||||||
|
FAIL_UNMATCHED_COLUMN("Fail on Unmatched Columns",
|
||||||
|
"A flow will fail if any column in the database that does not have a field in the document. An error will be logged");
|
||||||
|
|
||||||
|
|
||||||
|
private final String displayName;
|
||||||
|
private final String description;
|
||||||
|
|
||||||
|
UnmatchedColumnBehavior(final String displayName, final String description) {
|
||||||
|
this.displayName = displayName;
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getValue() {
|
||||||
|
return name();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDisplayName() {
|
||||||
|
return displayName;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,9 +21,11 @@ import org.apache.commons.lang3.Validate;
|
||||||
import org.apache.iceberg.data.GenericRecord;
|
import org.apache.iceberg.data.GenericRecord;
|
||||||
import org.apache.iceberg.types.Type;
|
import org.apache.iceberg.types.Type;
|
||||||
import org.apache.iceberg.types.Types;
|
import org.apache.iceberg.types.Types;
|
||||||
|
import org.apache.nifi.serialization.SimpleRecordSchema;
|
||||||
import org.apache.nifi.serialization.record.DataType;
|
import org.apache.nifi.serialization.record.DataType;
|
||||||
import org.apache.nifi.serialization.record.Record;
|
import org.apache.nifi.serialization.record.Record;
|
||||||
import org.apache.nifi.serialization.record.RecordField;
|
import org.apache.nifi.serialization.record.RecordField;
|
||||||
|
import org.apache.nifi.serialization.record.RecordFieldType;
|
||||||
import org.apache.nifi.serialization.record.RecordSchema;
|
import org.apache.nifi.serialization.record.RecordSchema;
|
||||||
import org.apache.nifi.serialization.record.util.DataTypeUtils;
|
import org.apache.nifi.serialization.record.util.DataTypeUtils;
|
||||||
|
|
||||||
|
@ -265,9 +267,16 @@ public class GenericDataConverters {
|
||||||
|
|
||||||
for (DataConverter<?, ?> converter : converters) {
|
for (DataConverter<?, ?> converter : converters) {
|
||||||
final Optional<RecordField> recordField = recordSchema.getField(converter.getSourceFieldName());
|
final Optional<RecordField> recordField = recordSchema.getField(converter.getSourceFieldName());
|
||||||
final RecordField field = recordField.get();
|
if (recordField.isEmpty()) {
|
||||||
// creates a record field accessor for every data converter
|
final Types.NestedField missingField = schema.field(converter.getTargetFieldName());
|
||||||
getters.put(converter.getTargetFieldName(), createFieldGetter(field.getDataType(), field.getFieldName(), field.isNullable()));
|
if (missingField != null) {
|
||||||
|
getters.put(converter.getTargetFieldName(), createFieldGetter(convertSchemaTypeToDataType(missingField.type()), missingField.name(), missingField.isOptional()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
final RecordField field = recordField.get();
|
||||||
|
// creates a record field accessor for every data converter
|
||||||
|
getters.put(converter.getTargetFieldName(), createFieldGetter(field.getDataType(), field.getFieldName(), field.isNullable()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -290,4 +299,54 @@ public class GenericDataConverters {
|
||||||
return converter.convert((S) getters.get(converter.getTargetFieldName()).getFieldOrNull(record));
|
return converter.convert((S) getters.get(converter.getTargetFieldName()).getFieldOrNull(record));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static DataType convertSchemaTypeToDataType(Type schemaType) {
|
||||||
|
switch (schemaType.typeId()) {
|
||||||
|
case BOOLEAN:
|
||||||
|
return RecordFieldType.BOOLEAN.getDataType();
|
||||||
|
case INTEGER:
|
||||||
|
return RecordFieldType.INT.getDataType();
|
||||||
|
case LONG:
|
||||||
|
return RecordFieldType.LONG.getDataType();
|
||||||
|
case FLOAT:
|
||||||
|
return RecordFieldType.FLOAT.getDataType();
|
||||||
|
case DOUBLE:
|
||||||
|
return RecordFieldType.DOUBLE.getDataType();
|
||||||
|
case DATE:
|
||||||
|
return RecordFieldType.DATE.getDataType();
|
||||||
|
case TIME:
|
||||||
|
return RecordFieldType.TIME.getDataType();
|
||||||
|
case TIMESTAMP:
|
||||||
|
return RecordFieldType.TIMESTAMP.getDataType();
|
||||||
|
case STRING:
|
||||||
|
return RecordFieldType.STRING.getDataType();
|
||||||
|
case UUID:
|
||||||
|
return RecordFieldType.UUID.getDataType();
|
||||||
|
case FIXED:
|
||||||
|
case BINARY:
|
||||||
|
return RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType());
|
||||||
|
case DECIMAL:
|
||||||
|
return RecordFieldType.DECIMAL.getDataType();
|
||||||
|
case STRUCT:
|
||||||
|
// Build a record type from the struct type
|
||||||
|
Types.StructType structType = schemaType.asStructType();
|
||||||
|
List<Types.NestedField> fields = structType.fields();
|
||||||
|
List<RecordField> recordFields = new ArrayList<>(fields.size());
|
||||||
|
for (Types.NestedField field : fields) {
|
||||||
|
DataType dataType = convertSchemaTypeToDataType(field.type());
|
||||||
|
recordFields.add(new RecordField(field.name(), dataType, field.isOptional()));
|
||||||
|
}
|
||||||
|
RecordSchema recordSchema = new SimpleRecordSchema(recordFields);
|
||||||
|
return RecordFieldType.RECORD.getRecordDataType(recordSchema);
|
||||||
|
case LIST:
|
||||||
|
// Build a list type from the elements
|
||||||
|
Types.ListType listType = schemaType.asListType();
|
||||||
|
return RecordFieldType.ARRAY.getArrayDataType(convertSchemaTypeToDataType(listType.elementType()), listType.isElementOptional());
|
||||||
|
case MAP:
|
||||||
|
// Build a map type from the elements
|
||||||
|
Types.MapType mapType = schemaType.asMapType();
|
||||||
|
return RecordFieldType.MAP.getMapDataType(convertSchemaTypeToDataType(mapType.valueType()), mapType.isValueOptional());
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Invalid or unsupported type: " + schemaType);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.iceberg.data.GenericRecord;
|
||||||
import org.apache.iceberg.schema.SchemaWithPartnerVisitor;
|
import org.apache.iceberg.schema.SchemaWithPartnerVisitor;
|
||||||
import org.apache.iceberg.types.Type;
|
import org.apache.iceberg.types.Type;
|
||||||
import org.apache.iceberg.types.Types;
|
import org.apache.iceberg.types.Types;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
import org.apache.nifi.processors.iceberg.UnmatchedColumnBehavior;
|
||||||
import org.apache.nifi.serialization.record.DataType;
|
import org.apache.nifi.serialization.record.DataType;
|
||||||
import org.apache.nifi.serialization.record.Record;
|
import org.apache.nifi.serialization.record.Record;
|
||||||
import org.apache.nifi.serialization.record.RecordField;
|
import org.apache.nifi.serialization.record.RecordField;
|
||||||
|
@ -46,19 +48,26 @@ import java.util.stream.Collectors;
|
||||||
public class IcebergRecordConverter {
|
public class IcebergRecordConverter {
|
||||||
|
|
||||||
private final DataConverter<Record, GenericRecord> converter;
|
private final DataConverter<Record, GenericRecord> converter;
|
||||||
|
public final UnmatchedColumnBehavior unmatchedColumnBehavior;
|
||||||
|
public ComponentLog logger;
|
||||||
|
|
||||||
public GenericRecord convert(Record record) {
|
public GenericRecord convert(Record record) {
|
||||||
return converter.convert(record);
|
return converter.convert(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public IcebergRecordConverter(Schema schema, RecordSchema recordSchema, FileFormat fileFormat) {
|
public IcebergRecordConverter(Schema schema, RecordSchema recordSchema, FileFormat fileFormat, UnmatchedColumnBehavior unmatchedColumnBehavior, ComponentLog logger) {
|
||||||
this.converter = (DataConverter<Record, GenericRecord>) IcebergSchemaVisitor.visit(schema, new RecordDataType(recordSchema), fileFormat);
|
this.converter = (DataConverter<Record, GenericRecord>) IcebergSchemaVisitor.visit(schema, new RecordDataType(recordSchema), fileFormat, unmatchedColumnBehavior, logger);
|
||||||
|
this.unmatchedColumnBehavior = unmatchedColumnBehavior;
|
||||||
|
this.logger = logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class IcebergSchemaVisitor extends SchemaWithPartnerVisitor<DataType, DataConverter<?, ?>> {
|
private static class IcebergSchemaVisitor extends SchemaWithPartnerVisitor<DataType, DataConverter<?, ?>> {
|
||||||
|
|
||||||
public static DataConverter<?, ?> visit(Schema schema, RecordDataType recordDataType, FileFormat fileFormat) {
|
public static DataConverter<?, ?> visit(Schema schema, RecordDataType recordDataType, FileFormat fileFormat, UnmatchedColumnBehavior unmatchedColumnBehavior, ComponentLog logger) {
|
||||||
return visit(schema, new RecordTypeWithFieldNameMapper(schema, recordDataType), new IcebergSchemaVisitor(), new IcebergPartnerAccessors(schema, fileFormat));
|
return visit(schema, new RecordTypeWithFieldNameMapper(schema, recordDataType), new IcebergSchemaVisitor(),
|
||||||
|
new IcebergPartnerAccessors(schema, fileFormat, unmatchedColumnBehavior, logger));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -123,8 +132,10 @@ public class IcebergRecordConverter {
|
||||||
// set NiFi schema field names (sourceFieldName) in the data converters
|
// set NiFi schema field names (sourceFieldName) in the data converters
|
||||||
for (DataConverter<?, ?> converter : converters) {
|
for (DataConverter<?, ?> converter : converters) {
|
||||||
final Optional<String> mappedFieldName = recordType.getNameMapping(converter.getTargetFieldName());
|
final Optional<String> mappedFieldName = recordType.getNameMapping(converter.getTargetFieldName());
|
||||||
final Optional<RecordField> recordField = recordSchema.getField(mappedFieldName.get());
|
if (mappedFieldName.isPresent()) {
|
||||||
converter.setSourceFieldName(recordField.get().getFieldName());
|
final Optional<RecordField> recordField = recordSchema.getField(mappedFieldName.get());
|
||||||
|
converter.setSourceFieldName(recordField.get().getFieldName());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new GenericDataConverters.RecordConverter(converters, recordSchema, type);
|
return new GenericDataConverters.RecordConverter(converters, recordSchema, type);
|
||||||
|
@ -144,10 +155,14 @@ public class IcebergRecordConverter {
|
||||||
public static class IcebergPartnerAccessors implements SchemaWithPartnerVisitor.PartnerAccessors<DataType> {
|
public static class IcebergPartnerAccessors implements SchemaWithPartnerVisitor.PartnerAccessors<DataType> {
|
||||||
private final Schema schema;
|
private final Schema schema;
|
||||||
private final FileFormat fileFormat;
|
private final FileFormat fileFormat;
|
||||||
|
private final UnmatchedColumnBehavior unmatchedColumnBehavior;
|
||||||
|
private final ComponentLog logger;
|
||||||
|
|
||||||
IcebergPartnerAccessors(Schema schema, FileFormat fileFormat) {
|
IcebergPartnerAccessors(Schema schema, FileFormat fileFormat, UnmatchedColumnBehavior unmatchedColumnBehavior, ComponentLog logger) {
|
||||||
this.schema = schema;
|
this.schema = schema;
|
||||||
this.fileFormat = fileFormat;
|
this.fileFormat = fileFormat;
|
||||||
|
this.unmatchedColumnBehavior = unmatchedColumnBehavior;
|
||||||
|
this.logger = logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -156,8 +171,25 @@ public class IcebergRecordConverter {
|
||||||
final RecordTypeWithFieldNameMapper recordType = (RecordTypeWithFieldNameMapper) dataType;
|
final RecordTypeWithFieldNameMapper recordType = (RecordTypeWithFieldNameMapper) dataType;
|
||||||
|
|
||||||
final Optional<String> mappedFieldName = recordType.getNameMapping(name);
|
final Optional<String> mappedFieldName = recordType.getNameMapping(name);
|
||||||
Validate.isTrue(mappedFieldName.isPresent(), String.format("Cannot find field with name '%s' in the record schema", name));
|
if (UnmatchedColumnBehavior.FAIL_UNMATCHED_COLUMN.equals(unmatchedColumnBehavior)) {
|
||||||
|
Validate.isTrue(mappedFieldName.isPresent(), String.format("Cannot find field with name '%s' in the record schema", name));
|
||||||
|
}
|
||||||
|
if (mappedFieldName.isEmpty()) {
|
||||||
|
if (UnmatchedColumnBehavior.WARNING_UNMATCHED_COLUMN.equals(unmatchedColumnBehavior)) {
|
||||||
|
if (logger != null) {
|
||||||
|
logger.warn("Cannot find field with name '" + name + "' in the record schema, using the target schema for datatype and a null value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If the field is missing, use the expected type from the schema (converted to a DataType)
|
||||||
|
final Types.NestedField schemaField = schema.findField(fieldId);
|
||||||
|
final Type schemaFieldType = schemaField.type();
|
||||||
|
if (schemaField.isRequired()) {
|
||||||
|
// Iceberg requires a non-null value for required fields
|
||||||
|
throw new IllegalArgumentException("Iceberg requires a non-null value for required fields, field: "
|
||||||
|
+ schemaField.name() + ", type: " + schemaFieldType);
|
||||||
|
}
|
||||||
|
return GenericDataConverters.convertSchemaTypeToDataType(schemaFieldType);
|
||||||
|
}
|
||||||
final Optional<RecordField> recordField = recordType.getChildSchema().getField(mappedFieldName.get());
|
final Optional<RecordField> recordField = recordType.getChildSchema().getField(mappedFieldName.get());
|
||||||
final RecordField field = recordField.get();
|
final RecordField field = recordField.get();
|
||||||
|
|
||||||
|
|
|
@ -108,6 +108,14 @@ public class PutIceberg extends AbstractIcebergProcessor {
|
||||||
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
|
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
static final PropertyDescriptor UNMATCHED_COLUMN_BEHAVIOR = new PropertyDescriptor.Builder()
|
||||||
|
.name("unmatched-column-behavior")
|
||||||
|
.displayName("Unmatched Column Behavior")
|
||||||
|
.description("If an incoming record does not have a field mapping for all of the database table's columns, this property specifies how to handle the situation.")
|
||||||
|
.allowableValues(UnmatchedColumnBehavior.class)
|
||||||
|
.defaultValue(UnmatchedColumnBehavior.FAIL_UNMATCHED_COLUMN.getValue())
|
||||||
|
.required(true)
|
||||||
|
.build();
|
||||||
static final PropertyDescriptor FILE_FORMAT = new PropertyDescriptor.Builder()
|
static final PropertyDescriptor FILE_FORMAT = new PropertyDescriptor.Builder()
|
||||||
.name("file-format")
|
.name("file-format")
|
||||||
.displayName("File Format")
|
.displayName("File Format")
|
||||||
|
@ -178,6 +186,7 @@ public class PutIceberg extends AbstractIcebergProcessor {
|
||||||
CATALOG,
|
CATALOG,
|
||||||
CATALOG_NAMESPACE,
|
CATALOG_NAMESPACE,
|
||||||
TABLE_NAME,
|
TABLE_NAME,
|
||||||
|
UNMATCHED_COLUMN_BEHAVIOR,
|
||||||
FILE_FORMAT,
|
FILE_FORMAT,
|
||||||
MAXIMUM_FILE_SIZE,
|
MAXIMUM_FILE_SIZE,
|
||||||
KERBEROS_USER_SERVICE,
|
KERBEROS_USER_SERVICE,
|
||||||
|
@ -256,8 +265,10 @@ public class PutIceberg extends AbstractIcebergProcessor {
|
||||||
final FileFormat format = getFileFormat(table.properties(), fileFormat);
|
final FileFormat format = getFileFormat(table.properties(), fileFormat);
|
||||||
final IcebergTaskWriterFactory taskWriterFactory = new IcebergTaskWriterFactory(table, flowFile.getId(), format, maximumFileSize);
|
final IcebergTaskWriterFactory taskWriterFactory = new IcebergTaskWriterFactory(table, flowFile.getId(), format, maximumFileSize);
|
||||||
taskWriter = taskWriterFactory.create();
|
taskWriter = taskWriterFactory.create();
|
||||||
|
final UnmatchedColumnBehavior unmatchedColumnBehavior =
|
||||||
|
UnmatchedColumnBehavior.valueOf(context.getProperty(UNMATCHED_COLUMN_BEHAVIOR).getValue());
|
||||||
|
|
||||||
final IcebergRecordConverter recordConverter = new IcebergRecordConverter(table.schema(), reader.getSchema(), format);
|
final IcebergRecordConverter recordConverter = new IcebergRecordConverter(table.schema(), reader.getSchema(), format, unmatchedColumnBehavior, getLogger());
|
||||||
|
|
||||||
Record record;
|
Record record;
|
||||||
while ((record = reader.nextRecord()) != null) {
|
while ((record = reader.nextRecord()) != null) {
|
||||||
|
@ -353,5 +364,4 @@ public class PutIceberg extends AbstractIcebergProcessor {
|
||||||
.retry(3)
|
.retry(3)
|
||||||
.run(file -> table.io().deleteFile(file.path().toString()));
|
.run(file -> table.io().deleteFile(file.path().toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.iceberg.io.TaskWriter;
|
||||||
import org.apache.iceberg.io.WriteResult;
|
import org.apache.iceberg.io.WriteResult;
|
||||||
import org.apache.iceberg.types.Types;
|
import org.apache.iceberg.types.Types;
|
||||||
import org.apache.nifi.flowfile.FlowFile;
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
import org.apache.nifi.mock.MockComponentLogger;
|
||||||
import org.apache.nifi.processor.ProcessContext;
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
import org.apache.nifi.processors.iceberg.catalog.IcebergCatalogFactory;
|
import org.apache.nifi.processors.iceberg.catalog.IcebergCatalogFactory;
|
||||||
import org.apache.nifi.processors.iceberg.catalog.TestHadoopCatalogService;
|
import org.apache.nifi.processors.iceberg.catalog.TestHadoopCatalogService;
|
||||||
|
@ -79,10 +81,12 @@ public class TestDataFileActions {
|
||||||
);
|
);
|
||||||
|
|
||||||
private PutIceberg icebergProcessor;
|
private PutIceberg icebergProcessor;
|
||||||
|
private ComponentLog logger;
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() {
|
public void setUp() {
|
||||||
icebergProcessor = new PutIceberg();
|
icebergProcessor = new PutIceberg();
|
||||||
|
logger = new MockComponentLogger();
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
|
@ -103,7 +107,7 @@ public class TestDataFileActions {
|
||||||
IcebergTaskWriterFactory taskWriterFactory = new IcebergTaskWriterFactory(table, new Random().nextLong(), FileFormat.PARQUET, null);
|
IcebergTaskWriterFactory taskWriterFactory = new IcebergTaskWriterFactory(table, new Random().nextLong(), FileFormat.PARQUET, null);
|
||||||
TaskWriter<Record> taskWriter = taskWriterFactory.create();
|
TaskWriter<Record> taskWriter = taskWriterFactory.create();
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(table.schema(), abortSchema, FileFormat.PARQUET);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(table.schema(), abortSchema, FileFormat.PARQUET, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger);
|
||||||
|
|
||||||
for (MapRecord record : recordList) {
|
for (MapRecord record : recordList) {
|
||||||
taskWriter.write(recordConverter.convert(record));
|
taskWriter.write(recordConverter.convert(record));
|
||||||
|
|
|
@ -37,6 +37,8 @@ import org.apache.iceberg.orc.ORC;
|
||||||
import org.apache.iceberg.parquet.Parquet;
|
import org.apache.iceberg.parquet.Parquet;
|
||||||
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
|
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
|
||||||
import org.apache.iceberg.types.Types;
|
import org.apache.iceberg.types.Types;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
|
import org.apache.nifi.mock.MockComponentLogger;
|
||||||
import org.apache.nifi.processors.iceberg.converter.ArrayElementGetter;
|
import org.apache.nifi.processors.iceberg.converter.ArrayElementGetter;
|
||||||
import org.apache.nifi.processors.iceberg.converter.IcebergRecordConverter;
|
import org.apache.nifi.processors.iceberg.converter.IcebergRecordConverter;
|
||||||
import org.apache.nifi.processors.iceberg.converter.RecordFieldGetter;
|
import org.apache.nifi.processors.iceberg.converter.RecordFieldGetter;
|
||||||
|
@ -57,6 +59,7 @@ import org.junit.jupiter.api.condition.DisabledOnOs;
|
||||||
import org.junit.jupiter.params.ParameterizedTest;
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
import org.junit.jupiter.params.provider.EnumSource;
|
import org.junit.jupiter.params.provider.EnumSource;
|
||||||
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
|
@ -92,9 +95,12 @@ public class TestIcebergRecordConverter {
|
||||||
|
|
||||||
private OutputFile tempFile;
|
private OutputFile tempFile;
|
||||||
|
|
||||||
|
private ComponentLog logger;
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
tempFile = Files.localOutput(createTempFile("test", null));
|
tempFile = Files.localOutput(createTempFile("test", null));
|
||||||
|
logger = new MockComponentLogger();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterEach
|
@AfterEach
|
||||||
|
@ -145,6 +151,24 @@ public class TestIcebergRecordConverter {
|
||||||
Types.NestedField.optional(14, "choice", Types.IntegerType.get())
|
Types.NestedField.optional(14, "choice", Types.IntegerType.get())
|
||||||
);
|
);
|
||||||
|
|
||||||
|
private static final Schema PRIMITIVES_SCHEMA_WITH_REQUIRED_FIELDS = new Schema(
|
||||||
|
Types.NestedField.optional(0, "string", Types.StringType.get()),
|
||||||
|
Types.NestedField.required(1, "integer", Types.IntegerType.get()),
|
||||||
|
Types.NestedField.required(2, "float", Types.FloatType.get()),
|
||||||
|
Types.NestedField.required(3, "long", Types.LongType.get()),
|
||||||
|
Types.NestedField.optional(4, "double", Types.DoubleType.get()),
|
||||||
|
Types.NestedField.optional(5, "decimal", Types.DecimalType.of(10, 2)),
|
||||||
|
Types.NestedField.optional(6, "boolean", Types.BooleanType.get()),
|
||||||
|
Types.NestedField.optional(7, "fixed", Types.FixedType.ofLength(5)),
|
||||||
|
Types.NestedField.optional(8, "binary", Types.BinaryType.get()),
|
||||||
|
Types.NestedField.optional(9, "date", Types.DateType.get()),
|
||||||
|
Types.NestedField.optional(10, "time", Types.TimeType.get()),
|
||||||
|
Types.NestedField.optional(11, "timestamp", Types.TimestampType.withZone()),
|
||||||
|
Types.NestedField.optional(12, "timestampTz", Types.TimestampType.withoutZone()),
|
||||||
|
Types.NestedField.optional(13, "uuid", Types.UUIDType.get()),
|
||||||
|
Types.NestedField.optional(14, "choice", Types.IntegerType.get())
|
||||||
|
);
|
||||||
|
|
||||||
private static final Schema COMPATIBLE_PRIMITIVES_SCHEMA = new Schema(
|
private static final Schema COMPATIBLE_PRIMITIVES_SCHEMA = new Schema(
|
||||||
Types.NestedField.optional(0, "string", Types.StringType.get()),
|
Types.NestedField.optional(0, "string", Types.StringType.get()),
|
||||||
Types.NestedField.optional(1, "integer", Types.IntegerType.get()),
|
Types.NestedField.optional(1, "integer", Types.IntegerType.get()),
|
||||||
|
@ -240,6 +264,24 @@ public class TestIcebergRecordConverter {
|
||||||
return new SimpleRecordSchema(fields);
|
return new SimpleRecordSchema(fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static RecordSchema getPrimitivesSchemaMissingFields() {
|
||||||
|
List<RecordField> fields = new ArrayList<>();
|
||||||
|
fields.add(new RecordField("string", RecordFieldType.STRING.getDataType()));
|
||||||
|
fields.add(new RecordField("double", RecordFieldType.DOUBLE.getDataType()));
|
||||||
|
fields.add(new RecordField("decimal", RecordFieldType.DECIMAL.getDecimalDataType(10, 2)));
|
||||||
|
fields.add(new RecordField("boolean", RecordFieldType.BOOLEAN.getDataType()));
|
||||||
|
fields.add(new RecordField("fixed", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType())));
|
||||||
|
fields.add(new RecordField("binary", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType())));
|
||||||
|
fields.add(new RecordField("date", RecordFieldType.DATE.getDataType()));
|
||||||
|
fields.add(new RecordField("time", RecordFieldType.TIME.getDataType()));
|
||||||
|
fields.add(new RecordField("timestamp", RecordFieldType.TIMESTAMP.getDataType()));
|
||||||
|
fields.add(new RecordField("timestampTz", RecordFieldType.TIMESTAMP.getDataType()));
|
||||||
|
fields.add(new RecordField("uuid", RecordFieldType.UUID.getDataType()));
|
||||||
|
fields.add(new RecordField("choice", RecordFieldType.CHOICE.getChoiceDataType(RecordFieldType.STRING.getDataType(), RecordFieldType.INT.getDataType())));
|
||||||
|
|
||||||
|
return new SimpleRecordSchema(fields);
|
||||||
|
}
|
||||||
|
|
||||||
private static RecordSchema getPrimitivesAsCompatiblesSchema() {
|
private static RecordSchema getPrimitivesAsCompatiblesSchema() {
|
||||||
List<RecordField> fields = new ArrayList<>();
|
List<RecordField> fields = new ArrayList<>();
|
||||||
fields.add(new RecordField("string", RecordFieldType.INT.getDataType()));
|
fields.add(new RecordField("string", RecordFieldType.INT.getDataType()));
|
||||||
|
@ -370,6 +412,29 @@ public class TestIcebergRecordConverter {
|
||||||
return new MapRecord(getPrimitivesSchema(), values);
|
return new MapRecord(getPrimitivesSchema(), values);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Record setupPrimitivesTestRecordMissingFields() {
|
||||||
|
LocalDate localDate = LocalDate.of(2017, 4, 4);
|
||||||
|
LocalTime localTime = LocalTime.of(14, 20, 33);
|
||||||
|
LocalDateTime localDateTime = LocalDateTime.of(2017, 4, 4, 14, 20, 33, 789000000);
|
||||||
|
OffsetDateTime offsetDateTime = OffsetDateTime.of(localDateTime, ZoneOffset.ofHours(-5));
|
||||||
|
|
||||||
|
Map<String, Object> values = new HashMap<>();
|
||||||
|
values.put("string", "Test String");
|
||||||
|
values.put("double", 3.14159D);
|
||||||
|
values.put("decimal", new BigDecimal("12345678.12"));
|
||||||
|
values.put("boolean", true);
|
||||||
|
values.put("fixed", "hello".getBytes());
|
||||||
|
values.put("binary", "hello".getBytes());
|
||||||
|
values.put("date", localDate);
|
||||||
|
values.put("time", Time.valueOf(localTime));
|
||||||
|
values.put("timestamp", Timestamp.from(offsetDateTime.toInstant()));
|
||||||
|
values.put("timestampTz", Timestamp.valueOf(localDateTime));
|
||||||
|
values.put("uuid", UUID.fromString("0000-00-00-00-000000"));
|
||||||
|
values.put("choice", "10");
|
||||||
|
|
||||||
|
return new MapRecord(getPrimitivesSchemaMissingFields(), values);
|
||||||
|
}
|
||||||
|
|
||||||
private static Record setupCompatiblePrimitivesTestRecord() {
|
private static Record setupCompatiblePrimitivesTestRecord() {
|
||||||
|
|
||||||
Map<String, Object> values = new HashMap<>();
|
Map<String, Object> values = new HashMap<>();
|
||||||
|
@ -439,7 +504,7 @@ public class TestIcebergRecordConverter {
|
||||||
RecordSchema nifiSchema = getPrimitivesSchema();
|
RecordSchema nifiSchema = getPrimitivesSchema();
|
||||||
Record record = setupPrimitivesTestRecord();
|
Record record = setupPrimitivesTestRecord();
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(PRIMITIVES_SCHEMA, nifiSchema, format);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(PRIMITIVES_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger);
|
||||||
GenericRecord genericRecord = recordConverter.convert(record);
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
writeTo(format, PRIMITIVES_SCHEMA, genericRecord, tempFile);
|
writeTo(format, PRIMITIVES_SCHEMA, genericRecord, tempFile);
|
||||||
|
@ -472,6 +537,54 @@ public class TestIcebergRecordConverter {
|
||||||
} else {
|
} else {
|
||||||
assertEquals(UUID.fromString("0000-00-00-00-000000"), resultRecord.get(13, UUID.class));
|
assertEquals(UUID.fromString("0000-00-00-00-000000"), resultRecord.get(13, UUID.class));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DisabledOnOs(WINDOWS)
|
||||||
|
@ParameterizedTest
|
||||||
|
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
||||||
|
public void testPrimitivesIgnoreMissingFields(FileFormat format) throws IOException {
|
||||||
|
RecordSchema nifiSchema = getPrimitivesSchemaMissingFields();
|
||||||
|
Record record = setupPrimitivesTestRecordMissingFields();
|
||||||
|
MockComponentLogger mockComponentLogger = new MockComponentLogger();
|
||||||
|
|
||||||
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(PRIMITIVES_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, mockComponentLogger);
|
||||||
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
|
writeTo(format, PRIMITIVES_SCHEMA, genericRecord, tempFile);
|
||||||
|
|
||||||
|
List<GenericRecord> results = readFrom(format, PRIMITIVES_SCHEMA, tempFile.toInputFile());
|
||||||
|
|
||||||
|
assertEquals(results.size(), 1);
|
||||||
|
GenericRecord resultRecord = results.get(0);
|
||||||
|
|
||||||
|
LocalDateTime localDateTime = LocalDateTime.of(2017, 4, 4, 14, 20, 33, 789000000);
|
||||||
|
OffsetDateTime offsetDateTime = OffsetDateTime.of(localDateTime, ZoneOffset.ofHours(-5));
|
||||||
|
|
||||||
|
assertEquals("Test String", resultRecord.get(0, String.class));
|
||||||
|
assertNull(resultRecord.get(1, Integer.class));
|
||||||
|
assertNull(resultRecord.get(2, Float.class));
|
||||||
|
assertNull(resultRecord.get(3, Long.class));
|
||||||
|
assertEquals(Double.valueOf(3.14159D), resultRecord.get(4, Double.class));
|
||||||
|
assertEquals(new BigDecimal("12345678.12"), resultRecord.get(5, BigDecimal.class));
|
||||||
|
assertEquals(Boolean.TRUE, resultRecord.get(6, Boolean.class));
|
||||||
|
assertArrayEquals(new byte[]{104, 101, 108, 108, 111}, resultRecord.get(7, byte[].class));
|
||||||
|
assertArrayEquals(new byte[]{104, 101, 108, 108, 111}, resultRecord.get(8, ByteBuffer.class).array());
|
||||||
|
assertEquals(LocalDate.of(2017, 4, 4), resultRecord.get(9, LocalDate.class));
|
||||||
|
assertEquals(LocalTime.of(14, 20, 33), resultRecord.get(10, LocalTime.class));
|
||||||
|
assertEquals(offsetDateTime.withOffsetSameInstant(ZoneOffset.UTC), resultRecord.get(11, OffsetDateTime.class));
|
||||||
|
assertEquals(LocalDateTime.of(2017, 4, 4, 14, 20, 33, 789000000), resultRecord.get(12, LocalDateTime.class));
|
||||||
|
assertEquals(Integer.valueOf(10), resultRecord.get(14, Integer.class));
|
||||||
|
|
||||||
|
if (format.equals(FileFormat.PARQUET)) {
|
||||||
|
// Parquet uses a conversion to the byte values of numeric characters such as "0" -> byte value 0
|
||||||
|
UUID uuid = UUID.fromString("0000-00-00-00-000000");
|
||||||
|
ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[16]);
|
||||||
|
byteBuffer.putLong(uuid.getMostSignificantBits());
|
||||||
|
byteBuffer.putLong(uuid.getLeastSignificantBits());
|
||||||
|
assertArrayEquals(byteBuffer.array(), resultRecord.get(13, byte[].class));
|
||||||
|
} else {
|
||||||
|
assertEquals(UUID.fromString("0000-00-00-00-000000"), resultRecord.get(13, UUID.class));
|
||||||
|
}
|
||||||
|
|
||||||
// Test null values
|
// Test null values
|
||||||
for (String fieldName : record.getRawFieldNames()) {
|
for (String fieldName : record.getRawFieldNames()) {
|
||||||
|
@ -504,11 +617,81 @@ public class TestIcebergRecordConverter {
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@ParameterizedTest
|
||||||
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
||||||
public void testCompatiblePrimitives(FileFormat format) throws IOException {
|
public void testPrimitivesMissingRequiredFields(FileFormat format) {
|
||||||
|
RecordSchema nifiSchema = getPrimitivesSchemaMissingFields();
|
||||||
|
MockComponentLogger mockComponentLogger = new MockComponentLogger();
|
||||||
|
|
||||||
|
assertThrows(IllegalArgumentException.class,
|
||||||
|
() -> new IcebergRecordConverter(PRIMITIVES_SCHEMA_WITH_REQUIRED_FIELDS, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, mockComponentLogger));
|
||||||
|
}
|
||||||
|
|
||||||
|
@DisabledOnOs(WINDOWS)
|
||||||
|
@ParameterizedTest
|
||||||
|
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
||||||
|
public void testPrimitivesWarnMissingFields(FileFormat format) throws IOException {
|
||||||
|
RecordSchema nifiSchema = getPrimitivesSchemaMissingFields();
|
||||||
|
Record record = setupPrimitivesTestRecordMissingFields();
|
||||||
|
MockComponentLogger mockComponentLogger = new MockComponentLogger();
|
||||||
|
|
||||||
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(PRIMITIVES_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.WARNING_UNMATCHED_COLUMN, mockComponentLogger);
|
||||||
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
|
writeTo(format, PRIMITIVES_SCHEMA, genericRecord, tempFile);
|
||||||
|
|
||||||
|
List<GenericRecord> results = readFrom(format, PRIMITIVES_SCHEMA, tempFile.toInputFile());
|
||||||
|
|
||||||
|
assertEquals(results.size(), 1);
|
||||||
|
GenericRecord resultRecord = results.get(0);
|
||||||
|
|
||||||
|
LocalDateTime localDateTime = LocalDateTime.of(2017, 4, 4, 14, 20, 33, 789000000);
|
||||||
|
OffsetDateTime offsetDateTime = OffsetDateTime.of(localDateTime, ZoneOffset.ofHours(-5));
|
||||||
|
|
||||||
|
assertEquals("Test String", resultRecord.get(0, String.class));
|
||||||
|
assertNull(resultRecord.get(1, Integer.class));
|
||||||
|
assertNull(resultRecord.get(2, Float.class));
|
||||||
|
assertNull(resultRecord.get(3, Long.class));
|
||||||
|
assertEquals(Double.valueOf(3.14159D), resultRecord.get(4, Double.class));
|
||||||
|
assertEquals(new BigDecimal("12345678.12"), resultRecord.get(5, BigDecimal.class));
|
||||||
|
assertEquals(Boolean.TRUE, resultRecord.get(6, Boolean.class));
|
||||||
|
assertArrayEquals(new byte[]{104, 101, 108, 108, 111}, resultRecord.get(7, byte[].class));
|
||||||
|
assertArrayEquals(new byte[]{104, 101, 108, 108, 111}, resultRecord.get(8, ByteBuffer.class).array());
|
||||||
|
assertEquals(LocalDate.of(2017, 4, 4), resultRecord.get(9, LocalDate.class));
|
||||||
|
assertEquals(LocalTime.of(14, 20, 33), resultRecord.get(10, LocalTime.class));
|
||||||
|
assertEquals(offsetDateTime.withOffsetSameInstant(ZoneOffset.UTC), resultRecord.get(11, OffsetDateTime.class));
|
||||||
|
assertEquals(LocalDateTime.of(2017, 4, 4, 14, 20, 33, 789000000), resultRecord.get(12, LocalDateTime.class));
|
||||||
|
assertEquals(Integer.valueOf(10), resultRecord.get(14, Integer.class));
|
||||||
|
|
||||||
|
if (format.equals(FileFormat.PARQUET)) {
|
||||||
|
// Parquet uses a conversion to the byte values of numeric characters such as "0" -> byte value 0
|
||||||
|
UUID uuid = UUID.fromString("0000-00-00-00-000000");
|
||||||
|
ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[16]);
|
||||||
|
byteBuffer.putLong(uuid.getMostSignificantBits());
|
||||||
|
byteBuffer.putLong(uuid.getLeastSignificantBits());
|
||||||
|
assertArrayEquals(byteBuffer.array(), resultRecord.get(13, byte[].class));
|
||||||
|
} else {
|
||||||
|
assertEquals(UUID.fromString("0000-00-00-00-000000"), resultRecord.get(13, UUID.class));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DisabledOnOs(WINDOWS)
|
||||||
|
@ParameterizedTest
|
||||||
|
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
||||||
|
public void testPrimitivesFailMissingFields(FileFormat format) throws IOException {
|
||||||
|
RecordSchema nifiSchema = getPrimitivesSchemaMissingFields();
|
||||||
|
MockComponentLogger mockComponentLogger = new MockComponentLogger();
|
||||||
|
|
||||||
|
assertThrows(IllegalArgumentException.class,
|
||||||
|
() -> new IcebergRecordConverter(PRIMITIVES_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.FAIL_UNMATCHED_COLUMN, mockComponentLogger));
|
||||||
|
}
|
||||||
|
|
||||||
|
@DisabledOnOs(WINDOWS)
|
||||||
|
@Test
|
||||||
|
public void testCompatiblePrimitives() throws IOException {
|
||||||
RecordSchema nifiSchema = getPrimitivesAsCompatiblesSchema();
|
RecordSchema nifiSchema = getPrimitivesAsCompatiblesSchema();
|
||||||
Record record = setupCompatiblePrimitivesTestRecord();
|
Record record = setupCompatiblePrimitivesTestRecord();
|
||||||
|
final FileFormat format = PARQUET;
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(COMPATIBLE_PRIMITIVES_SCHEMA, nifiSchema, format);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(COMPATIBLE_PRIMITIVES_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.FAIL_UNMATCHED_COLUMN, logger);
|
||||||
GenericRecord genericRecord = recordConverter.convert(record);
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
writeTo(format, COMPATIBLE_PRIMITIVES_SCHEMA, genericRecord, tempFile);
|
writeTo(format, COMPATIBLE_PRIMITIVES_SCHEMA, genericRecord, tempFile);
|
||||||
|
@ -536,21 +719,17 @@ public class TestIcebergRecordConverter {
|
||||||
assertEquals(expectedLocalDateTimestamp, resultRecord.get(11, LocalDateTime.class));
|
assertEquals(expectedLocalDateTimestamp, resultRecord.get(11, LocalDateTime.class));
|
||||||
assertEquals(Integer.valueOf(10), resultRecord.get(13, Integer.class));
|
assertEquals(Integer.valueOf(10), resultRecord.get(13, Integer.class));
|
||||||
|
|
||||||
if (format.equals(PARQUET)) {
|
assertArrayEquals(new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, resultRecord.get(12, byte[].class));
|
||||||
assertArrayEquals(new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, resultRecord.get(12, byte[].class));
|
|
||||||
} else {
|
|
||||||
assertEquals(UUID.fromString("0000-00-00-00-000000"), resultRecord.get(12, UUID.class));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
public void testStruct() throws IOException {
|
||||||
public void testStruct(FileFormat format) throws IOException {
|
|
||||||
RecordSchema nifiSchema = getStructSchema();
|
RecordSchema nifiSchema = getStructSchema();
|
||||||
Record record = setupStructTestRecord();
|
Record record = setupStructTestRecord();
|
||||||
|
final FileFormat format = FileFormat.ORC;
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(STRUCT_SCHEMA, nifiSchema, format);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(STRUCT_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger);
|
||||||
GenericRecord genericRecord = recordConverter.convert(record);
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
writeTo(format, STRUCT_SCHEMA, genericRecord, tempFile);
|
writeTo(format, STRUCT_SCHEMA, genericRecord, tempFile);
|
||||||
|
@ -574,13 +753,13 @@ public class TestIcebergRecordConverter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
public void testList() throws IOException {
|
||||||
public void testList(FileFormat format) throws IOException {
|
|
||||||
RecordSchema nifiSchema = getListSchema();
|
RecordSchema nifiSchema = getListSchema();
|
||||||
Record record = setupListTestRecord();
|
Record record = setupListTestRecord();
|
||||||
|
final FileFormat format = FileFormat.AVRO;
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(LIST_SCHEMA, nifiSchema, format);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(LIST_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger);
|
||||||
GenericRecord genericRecord = recordConverter.convert(record);
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
writeTo(format, LIST_SCHEMA, genericRecord, tempFile);
|
writeTo(format, LIST_SCHEMA, genericRecord, tempFile);
|
||||||
|
@ -593,7 +772,7 @@ public class TestIcebergRecordConverter {
|
||||||
|
|
||||||
assertEquals(1, resultRecord.size());
|
assertEquals(1, resultRecord.size());
|
||||||
assertInstanceOf(List.class, resultRecord.get(0));
|
assertInstanceOf(List.class, resultRecord.get(0));
|
||||||
List nestedList = resultRecord.get(0, List.class);
|
List<?> nestedList = resultRecord.get(0, List.class);
|
||||||
|
|
||||||
assertEquals(2, nestedList.size());
|
assertEquals(2, nestedList.size());
|
||||||
assertInstanceOf(List.class, nestedList.get(0));
|
assertInstanceOf(List.class, nestedList.get(0));
|
||||||
|
@ -604,13 +783,13 @@ public class TestIcebergRecordConverter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
public void testMap() throws IOException {
|
||||||
public void testMap(FileFormat format) throws IOException {
|
|
||||||
RecordSchema nifiSchema = getMapSchema();
|
RecordSchema nifiSchema = getMapSchema();
|
||||||
Record record = setupMapTestRecord();
|
Record record = setupMapTestRecord();
|
||||||
|
final FileFormat format = PARQUET;
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(MAP_SCHEMA, nifiSchema, format);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(MAP_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger);
|
||||||
GenericRecord genericRecord = recordConverter.convert(record);
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
writeTo(format, MAP_SCHEMA, genericRecord, tempFile);
|
writeTo(format, MAP_SCHEMA, genericRecord, tempFile);
|
||||||
|
@ -636,20 +815,21 @@ public class TestIcebergRecordConverter {
|
||||||
@ParameterizedTest
|
@ParameterizedTest
|
||||||
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
||||||
public void testSchemaMismatch(FileFormat format) {
|
public void testSchemaMismatch(FileFormat format) {
|
||||||
RecordSchema nifiSchema = getPrimitivesSchema();
|
RecordSchema nifiSchema = getPrimitivesSchemaMissingFields();
|
||||||
|
|
||||||
IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> new IcebergRecordConverter(CASE_INSENSITIVE_SCHEMA, nifiSchema, format));
|
IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
|
||||||
assertTrue(e.getMessage().contains("Cannot find field with name 'FIELD1' in the record schema"), e.getMessage());
|
() -> new IcebergRecordConverter(PRIMITIVES_SCHEMA_WITH_REQUIRED_FIELDS, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger));
|
||||||
|
assertTrue(e.getMessage().contains("Iceberg requires a non-null value for required fields"), e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
public void testCaseInsensitiveFieldMapping() throws IOException {
|
||||||
public void testCaseInsensitiveFieldMapping(FileFormat format) throws IOException {
|
|
||||||
RecordSchema nifiSchema = getCaseInsensitiveSchema();
|
RecordSchema nifiSchema = getCaseInsensitiveSchema();
|
||||||
Record record = setupCaseInsensitiveTestRecord();
|
Record record = setupCaseInsensitiveTestRecord();
|
||||||
|
final FileFormat format = FileFormat.AVRO;
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(CASE_INSENSITIVE_SCHEMA, nifiSchema, format);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(CASE_INSENSITIVE_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger);
|
||||||
GenericRecord genericRecord = recordConverter.convert(record);
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
writeTo(format, CASE_INSENSITIVE_SCHEMA, genericRecord, tempFile);
|
writeTo(format, CASE_INSENSITIVE_SCHEMA, genericRecord, tempFile);
|
||||||
|
@ -667,13 +847,13 @@ public class TestIcebergRecordConverter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@EnumSource(value = FileFormat.class, names = {"AVRO", "ORC", "PARQUET"})
|
public void testUnorderedFieldMapping() throws IOException {
|
||||||
public void testUnorderedFieldMapping(FileFormat format) throws IOException {
|
|
||||||
RecordSchema nifiSchema = getUnorderedSchema();
|
RecordSchema nifiSchema = getUnorderedSchema();
|
||||||
Record record = setupUnorderedTestRecord();
|
Record record = setupUnorderedTestRecord();
|
||||||
|
final FileFormat format = PARQUET;
|
||||||
|
|
||||||
IcebergRecordConverter recordConverter = new IcebergRecordConverter(UNORDERED_SCHEMA, nifiSchema, format);
|
IcebergRecordConverter recordConverter = new IcebergRecordConverter(UNORDERED_SCHEMA, nifiSchema, format, UnmatchedColumnBehavior.IGNORE_UNMATCHED_COLUMN, logger);
|
||||||
GenericRecord genericRecord = recordConverter.convert(record);
|
GenericRecord genericRecord = recordConverter.convert(record);
|
||||||
|
|
||||||
writeTo(format, UNORDERED_SCHEMA, genericRecord, tempFile);
|
writeTo(format, UNORDERED_SCHEMA, genericRecord, tempFile);
|
||||||
|
@ -698,7 +878,7 @@ public class TestIcebergRecordConverter {
|
||||||
assertEquals("value5", resultRecord.get(2, String.class));
|
assertEquals("value5", resultRecord.get(2, String.class));
|
||||||
|
|
||||||
assertInstanceOf(Map.class, resultRecord.get(3));
|
assertInstanceOf(Map.class, resultRecord.get(3));
|
||||||
Map map = resultRecord.get(3, Map.class);
|
Map<?,?> map = resultRecord.get(3, Map.class);
|
||||||
assertEquals("map value1", map.get("key1"));
|
assertEquals("map value1", map.get("key1"));
|
||||||
assertEquals("map value2", map.get("key2"));
|
assertEquals("map value2", map.get("key2"));
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.nifi.processors.iceberg;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.iceberg.FileFormat;
|
||||||
import org.apache.iceberg.PartitionSpec;
|
import org.apache.iceberg.PartitionSpec;
|
||||||
import org.apache.iceberg.Table;
|
import org.apache.iceberg.Table;
|
||||||
import org.apache.iceberg.TableProperties;
|
import org.apache.iceberg.TableProperties;
|
||||||
|
@ -38,9 +39,8 @@ import org.apache.nifi.util.TestRunner;
|
||||||
import org.apache.nifi.util.TestRunners;
|
import org.apache.nifi.util.TestRunners;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.condition.DisabledOnOs;
|
import org.junit.jupiter.api.condition.DisabledOnOs;
|
||||||
import org.junit.jupiter.params.ParameterizedTest;
|
|
||||||
import org.junit.jupiter.params.provider.ValueSource;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
@ -102,14 +102,14 @@ public class TestPutIcebergWithHadoopCatalog {
|
||||||
runner.setProperty(PutIceberg.RECORD_READER, "mock-reader-factory");
|
runner.setProperty(PutIceberg.RECORD_READER, "mock-reader-factory");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initCatalog(PartitionSpec spec, String fileFormat) throws InitializationException, IOException {
|
private void initCatalog(PartitionSpec spec, FileFormat fileFormat) throws InitializationException, IOException {
|
||||||
TestHadoopCatalogService catalogService = new TestHadoopCatalogService();
|
TestHadoopCatalogService catalogService = new TestHadoopCatalogService();
|
||||||
IcebergCatalogFactory catalogFactory = new IcebergCatalogFactory(catalogService);
|
IcebergCatalogFactory catalogFactory = new IcebergCatalogFactory(catalogService);
|
||||||
catalog = catalogFactory.create();
|
catalog = catalogFactory.create();
|
||||||
|
|
||||||
Map<String, String> tableProperties = new HashMap<>();
|
Map<String, String> tableProperties = new HashMap<>();
|
||||||
tableProperties.put(TableProperties.FORMAT_VERSION, "2");
|
tableProperties.put(TableProperties.FORMAT_VERSION, "2");
|
||||||
tableProperties.put(TableProperties.DEFAULT_FILE_FORMAT, fileFormat);
|
tableProperties.put(TableProperties.DEFAULT_FILE_FORMAT, fileFormat.name());
|
||||||
|
|
||||||
catalog.createTable(TABLE_IDENTIFIER, DATE_SCHEMA, spec, tableProperties);
|
catalog.createTable(TABLE_IDENTIFIER, DATE_SCHEMA, spec, tableProperties);
|
||||||
|
|
||||||
|
@ -120,16 +120,15 @@ public class TestPutIcebergWithHadoopCatalog {
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@ValueSource(strings = {"avro", "orc", "parquet"})
|
public void onTriggerYearTransform() throws Exception {
|
||||||
public void onTriggerYearTransform(String fileFormat) throws Exception {
|
|
||||||
PartitionSpec spec = PartitionSpec.builderFor(DATE_SCHEMA)
|
PartitionSpec spec = PartitionSpec.builderFor(DATE_SCHEMA)
|
||||||
.year("date")
|
.year("date")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
initRecordReader();
|
initRecordReader();
|
||||||
initCatalog(spec, fileFormat);
|
initCatalog(spec, FileFormat.PARQUET);
|
||||||
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "default");
|
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "default");
|
||||||
runner.setProperty(PutIceberg.TABLE_NAME, "date");
|
runner.setProperty(PutIceberg.TABLE_NAME, "date");
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setValidateExpressionUsage(false);
|
||||||
|
@ -148,16 +147,15 @@ public class TestPutIcebergWithHadoopCatalog {
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@ValueSource(strings = {"avro", "orc", "parquet"})
|
public void onTriggerMonthTransform() throws Exception {
|
||||||
public void onTriggerMonthTransform(String fileFormat) throws Exception {
|
|
||||||
PartitionSpec spec = PartitionSpec.builderFor(DATE_SCHEMA)
|
PartitionSpec spec = PartitionSpec.builderFor(DATE_SCHEMA)
|
||||||
.month("timestampMicros")
|
.month("timestampMicros")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
initRecordReader();
|
initRecordReader();
|
||||||
initCatalog(spec, fileFormat);
|
initCatalog(spec, FileFormat.ORC);
|
||||||
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "default");
|
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "default");
|
||||||
runner.setProperty(PutIceberg.TABLE_NAME, "date");
|
runner.setProperty(PutIceberg.TABLE_NAME, "date");
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setValidateExpressionUsage(false);
|
||||||
|
@ -177,16 +175,15 @@ public class TestPutIcebergWithHadoopCatalog {
|
||||||
}
|
}
|
||||||
|
|
||||||
@DisabledOnOs(WINDOWS)
|
@DisabledOnOs(WINDOWS)
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@ValueSource(strings = {"avro", "orc", "parquet"})
|
public void onTriggerDayTransform() throws Exception {
|
||||||
public void onTriggerDayTransform(String fileFormat) throws Exception {
|
|
||||||
PartitionSpec spec = PartitionSpec.builderFor(DATE_SCHEMA)
|
PartitionSpec spec = PartitionSpec.builderFor(DATE_SCHEMA)
|
||||||
.day("timestampMicros")
|
.day("timestampMicros")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
initRecordReader();
|
initRecordReader();
|
||||||
initCatalog(spec, fileFormat);
|
initCatalog(spec, FileFormat.AVRO);
|
||||||
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "default");
|
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "default");
|
||||||
runner.setProperty(PutIceberg.TABLE_NAME, "date");
|
runner.setProperty(PutIceberg.TABLE_NAME, "date");
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setValidateExpressionUsage(false);
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.nifi.processors.iceberg;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.iceberg.FileFormat;
|
||||||
import org.apache.iceberg.PartitionSpec;
|
import org.apache.iceberg.PartitionSpec;
|
||||||
import org.apache.iceberg.Table;
|
import org.apache.iceberg.Table;
|
||||||
import org.apache.iceberg.TableProperties;
|
import org.apache.iceberg.TableProperties;
|
||||||
|
@ -44,10 +45,9 @@ import org.apache.nifi.util.TestRunners;
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
import org.junit.jupiter.api.condition.DisabledOnOs;
|
import org.junit.jupiter.api.condition.DisabledOnOs;
|
||||||
import org.junit.jupiter.api.extension.RegisterExtension;
|
import org.junit.jupiter.api.extension.RegisterExtension;
|
||||||
import org.junit.jupiter.params.ParameterizedTest;
|
|
||||||
import org.junit.jupiter.params.provider.ValueSource;
|
|
||||||
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
@ -122,10 +122,10 @@ public class TestPutIcebergWithHiveCatalog {
|
||||||
runner.setProperty(PutIceberg.RECORD_READER, "mock-reader-factory");
|
runner.setProperty(PutIceberg.RECORD_READER, "mock-reader-factory");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initCatalog(PartitionSpec spec, String fileFormat) throws InitializationException {
|
private void initCatalog(PartitionSpec spec, FileFormat fileFormat) throws InitializationException {
|
||||||
Map<String, String> tableProperties = new HashMap<>();
|
Map<String, String> tableProperties = new HashMap<>();
|
||||||
tableProperties.put(TableProperties.FORMAT_VERSION, "2");
|
tableProperties.put(TableProperties.FORMAT_VERSION, "2");
|
||||||
tableProperties.put(TableProperties.DEFAULT_FILE_FORMAT, fileFormat);
|
tableProperties.put(TableProperties.DEFAULT_FILE_FORMAT, fileFormat.name());
|
||||||
|
|
||||||
TestHiveCatalogService catalogService = new TestHiveCatalogService.Builder()
|
TestHiveCatalogService catalogService = new TestHiveCatalogService.Builder()
|
||||||
.withMetastoreUri(metastore.getThriftConnectionUri())
|
.withMetastoreUri(metastore.getThriftConnectionUri())
|
||||||
|
@ -143,16 +143,15 @@ public class TestPutIcebergWithHiveCatalog {
|
||||||
runner.setProperty(PutIceberg.CATALOG, "catalog-service");
|
runner.setProperty(PutIceberg.CATALOG, "catalog-service");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@ValueSource(strings = {"avro"})
|
public void onTriggerPartitioned() throws Exception {
|
||||||
public void onTriggerPartitioned(String fileFormat) throws Exception {
|
|
||||||
PartitionSpec spec = PartitionSpec.builderFor(USER_SCHEMA)
|
PartitionSpec spec = PartitionSpec.builderFor(USER_SCHEMA)
|
||||||
.bucket("department", 3)
|
.bucket("department", 3)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
initRecordReader();
|
initRecordReader();
|
||||||
initCatalog(spec, fileFormat);
|
initCatalog(spec, FileFormat.AVRO);
|
||||||
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, CATALOG_NAME);
|
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, CATALOG_NAME);
|
||||||
runner.setProperty(PutIceberg.TABLE_NAME, TABLE_NAME);
|
runner.setProperty(PutIceberg.TABLE_NAME, TABLE_NAME);
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setValidateExpressionUsage(false);
|
||||||
|
@ -181,16 +180,15 @@ public class TestPutIcebergWithHiveCatalog {
|
||||||
assertProvenanceEvents();
|
assertProvenanceEvents();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@ValueSource(strings = {"orc"})
|
public void onTriggerIdentityPartitioned() throws Exception {
|
||||||
public void onTriggerIdentityPartitioned(String fileFormat) throws Exception {
|
|
||||||
PartitionSpec spec = PartitionSpec.builderFor(USER_SCHEMA)
|
PartitionSpec spec = PartitionSpec.builderFor(USER_SCHEMA)
|
||||||
.identity("department")
|
.identity("department")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
initRecordReader();
|
initRecordReader();
|
||||||
initCatalog(spec, fileFormat);
|
initCatalog(spec, FileFormat.ORC);
|
||||||
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, CATALOG_NAME);
|
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, CATALOG_NAME);
|
||||||
runner.setProperty(PutIceberg.TABLE_NAME, TABLE_NAME);
|
runner.setProperty(PutIceberg.TABLE_NAME, TABLE_NAME);
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setValidateExpressionUsage(false);
|
||||||
|
@ -219,9 +217,8 @@ public class TestPutIcebergWithHiveCatalog {
|
||||||
assertProvenanceEvents();
|
assertProvenanceEvents();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@ValueSource(strings = {"parquet"})
|
public void onTriggerMultiLevelIdentityPartitioned() throws Exception {
|
||||||
public void onTriggerMultiLevelIdentityPartitioned(String fileFormat) throws Exception {
|
|
||||||
PartitionSpec spec = PartitionSpec.builderFor(USER_SCHEMA)
|
PartitionSpec spec = PartitionSpec.builderFor(USER_SCHEMA)
|
||||||
.identity("name")
|
.identity("name")
|
||||||
.identity("department")
|
.identity("department")
|
||||||
|
@ -229,7 +226,7 @@ public class TestPutIcebergWithHiveCatalog {
|
||||||
|
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
initRecordReader();
|
initRecordReader();
|
||||||
initCatalog(spec, fileFormat);
|
initCatalog(spec, FileFormat.PARQUET);
|
||||||
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, CATALOG_NAME);
|
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, CATALOG_NAME);
|
||||||
runner.setProperty(PutIceberg.TABLE_NAME, TABLE_NAME);
|
runner.setProperty(PutIceberg.TABLE_NAME, TABLE_NAME);
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setValidateExpressionUsage(false);
|
||||||
|
@ -262,12 +259,11 @@ public class TestPutIcebergWithHiveCatalog {
|
||||||
assertProvenanceEvents();
|
assertProvenanceEvents();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@Test
|
||||||
@ValueSource(strings = {"avro"})
|
public void onTriggerUnPartitioned() throws Exception {
|
||||||
public void onTriggerUnPartitioned(String fileFormat) throws Exception {
|
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
initRecordReader();
|
initRecordReader();
|
||||||
initCatalog(PartitionSpec.unpartitioned(), fileFormat);
|
initCatalog(PartitionSpec.unpartitioned(), FileFormat.AVRO);
|
||||||
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "${catalog.name}");
|
runner.setProperty(PutIceberg.CATALOG_NAMESPACE, "${catalog.name}");
|
||||||
runner.setProperty(PutIceberg.TABLE_NAME, "${table.name}");
|
runner.setProperty(PutIceberg.TABLE_NAME, "${table.name}");
|
||||||
runner.setProperty(PutIceberg.MAXIMUM_FILE_SIZE, "${max.filesize}");
|
runner.setProperty(PutIceberg.MAXIMUM_FILE_SIZE, "${max.filesize}");
|
||||||
|
|
Loading…
Reference in New Issue