mirror of https://github.com/apache/nifi.git
NIFI-7952: Allow RecordPath to be used for specifying the Insertion Operation and the data to be inserted into Kudu
This commit is contained in:
parent
bf962f6227
commit
74968991d5
|
@ -83,6 +83,11 @@
|
|||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-record</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-record-path</artifactId>
|
||||
<version>1.13.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-security-utils</artifactId>
|
||||
|
|
|
@ -179,8 +179,8 @@ public abstract class AbstractKuduProcessor extends AbstractProcessor {
|
|||
|
||||
protected KuduClient buildClient(final ProcessContext context) {
|
||||
final String masters = context.getProperty(KUDU_MASTERS).evaluateAttributeExpressions().getValue();
|
||||
final Integer operationTimeout = context.getProperty(KUDU_OPERATION_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
|
||||
final Integer adminOperationTimeout = context.getProperty(KUDU_KEEP_ALIVE_PERIOD_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
|
||||
final int operationTimeout = context.getProperty(KUDU_OPERATION_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
|
||||
final int adminOperationTimeout = context.getProperty(KUDU_KEEP_ALIVE_PERIOD_TIMEOUT_MS).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS).intValue();
|
||||
|
||||
return new KuduClient.KuduClientBuilder(masters)
|
||||
.defaultOperationTimeoutMs(operationTimeout)
|
||||
|
@ -295,68 +295,72 @@ public abstract class AbstractKuduProcessor extends AbstractProcessor {
|
|||
if (lowercaseFields) {
|
||||
colName = colName.toLowerCase();
|
||||
}
|
||||
int colIdx = this.getColumnIndex(schema, colName);
|
||||
if (colIdx != -1) {
|
||||
ColumnSchema colSchema = schema.getColumnByIndex(colIdx);
|
||||
Type colType = colSchema.getType();
|
||||
if (record.getValue(recordFieldName) == null) {
|
||||
if (schema.getColumnByIndex(colIdx).isKey()) {
|
||||
throw new IllegalArgumentException(String.format("Can't set primary key column %s to null ", colName));
|
||||
} else if(!schema.getColumnByIndex(colIdx).isNullable()) {
|
||||
throw new IllegalArgumentException(String.format("Can't set column %s to null ", colName));
|
||||
}
|
||||
|
||||
if (!ignoreNull) {
|
||||
row.setNull(colName);
|
||||
}
|
||||
} else {
|
||||
Object value = record.getValue(recordFieldName);
|
||||
switch (colType) {
|
||||
case BOOL:
|
||||
row.addBoolean(colIdx, DataTypeUtils.toBoolean(value, recordFieldName));
|
||||
break;
|
||||
case INT8:
|
||||
row.addByte(colIdx, DataTypeUtils.toByte(value, recordFieldName));
|
||||
break;
|
||||
case INT16:
|
||||
row.addShort(colIdx, DataTypeUtils.toShort(value, recordFieldName));
|
||||
break;
|
||||
case INT32:
|
||||
row.addInt(colIdx, DataTypeUtils.toInteger(value, recordFieldName));
|
||||
break;
|
||||
case INT64:
|
||||
row.addLong(colIdx, DataTypeUtils.toLong(value, recordFieldName));
|
||||
break;
|
||||
case UNIXTIME_MICROS:
|
||||
DataType fieldType = record.getSchema().getDataType(recordFieldName).get();
|
||||
Timestamp timestamp = DataTypeUtils.toTimestamp(record.getValue(recordFieldName),
|
||||
() -> DataTypeUtils.getDateFormat(fieldType.getFormat()), recordFieldName);
|
||||
row.addTimestamp(colIdx, timestamp);
|
||||
break;
|
||||
case STRING:
|
||||
row.addString(colIdx, DataTypeUtils.toString(value, recordFieldName));
|
||||
break;
|
||||
case BINARY:
|
||||
row.addBinary(colIdx, DataTypeUtils.toString(value, recordFieldName).getBytes());
|
||||
break;
|
||||
case FLOAT:
|
||||
row.addFloat(colIdx, DataTypeUtils.toFloat(value, recordFieldName));
|
||||
break;
|
||||
case DOUBLE:
|
||||
row.addDouble(colIdx, DataTypeUtils.toDouble(value, recordFieldName));
|
||||
break;
|
||||
case DECIMAL:
|
||||
row.addDecimal(colIdx, new BigDecimal(DataTypeUtils.toString(value, recordFieldName)));
|
||||
break;
|
||||
case VARCHAR:
|
||||
row.addVarchar(colIdx, DataTypeUtils.toString(value, recordFieldName));
|
||||
break;
|
||||
case DATE:
|
||||
row.addDate(colIdx, DataTypeUtils.toDate(value, () -> DataTypeUtils.getDateFormat(RecordFieldType.DATE.getDefaultFormat()), recordFieldName));
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException(String.format("unknown column type %s", colType));
|
||||
}
|
||||
if (!schema.hasColumn(colName)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final int columnIndex = schema.getColumnIndex(colName);
|
||||
final ColumnSchema colSchema = schema.getColumnByIndex(columnIndex);
|
||||
final Type colType = colSchema.getType();
|
||||
|
||||
if (record.getValue(recordFieldName) == null) {
|
||||
if (schema.getColumnByIndex(columnIndex).isKey()) {
|
||||
throw new IllegalArgumentException(String.format("Can't set primary key column %s to null ", colName));
|
||||
} else if(!schema.getColumnByIndex(columnIndex).isNullable()) {
|
||||
throw new IllegalArgumentException(String.format("Can't set column %s to null ", colName));
|
||||
}
|
||||
|
||||
if (!ignoreNull) {
|
||||
row.setNull(colName);
|
||||
}
|
||||
} else {
|
||||
Object value = record.getValue(recordFieldName);
|
||||
switch (colType) {
|
||||
case BOOL:
|
||||
row.addBoolean(columnIndex, DataTypeUtils.toBoolean(value, recordFieldName));
|
||||
break;
|
||||
case INT8:
|
||||
row.addByte(columnIndex, DataTypeUtils.toByte(value, recordFieldName));
|
||||
break;
|
||||
case INT16:
|
||||
row.addShort(columnIndex, DataTypeUtils.toShort(value, recordFieldName));
|
||||
break;
|
||||
case INT32:
|
||||
row.addInt(columnIndex, DataTypeUtils.toInteger(value, recordFieldName));
|
||||
break;
|
||||
case INT64:
|
||||
row.addLong(columnIndex, DataTypeUtils.toLong(value, recordFieldName));
|
||||
break;
|
||||
case UNIXTIME_MICROS:
|
||||
DataType fieldType = record.getSchema().getDataType(recordFieldName).get();
|
||||
Timestamp timestamp = DataTypeUtils.toTimestamp(record.getValue(recordFieldName),
|
||||
() -> DataTypeUtils.getDateFormat(fieldType.getFormat()), recordFieldName);
|
||||
row.addTimestamp(columnIndex, timestamp);
|
||||
break;
|
||||
case STRING:
|
||||
row.addString(columnIndex, DataTypeUtils.toString(value, recordFieldName));
|
||||
break;
|
||||
case BINARY:
|
||||
row.addBinary(columnIndex, DataTypeUtils.toString(value, recordFieldName).getBytes());
|
||||
break;
|
||||
case FLOAT:
|
||||
row.addFloat(columnIndex, DataTypeUtils.toFloat(value, recordFieldName));
|
||||
break;
|
||||
case DOUBLE:
|
||||
row.addDouble(columnIndex, DataTypeUtils.toDouble(value, recordFieldName));
|
||||
break;
|
||||
case DECIMAL:
|
||||
row.addDecimal(columnIndex, new BigDecimal(DataTypeUtils.toString(value, recordFieldName)));
|
||||
break;
|
||||
case VARCHAR:
|
||||
row.addVarchar(columnIndex, DataTypeUtils.toString(value, recordFieldName));
|
||||
break;
|
||||
case DATE:
|
||||
row.addDate(columnIndex, DataTypeUtils.toDate(value, () -> DataTypeUtils.getDateFormat(RecordFieldType.DATE.getDefaultFormat()), recordFieldName));
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException(String.format("unknown column type %s", colType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -425,14 +429,6 @@ public abstract class AbstractKuduProcessor extends AbstractProcessor {
|
|||
return alterTable;
|
||||
}
|
||||
|
||||
private int getColumnIndex(Schema columns, String colName) {
|
||||
try {
|
||||
return columns.getColumnIndex(colName);
|
||||
} catch (Exception ex) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
protected Upsert upsertRecordToKudu(KuduTable kuduTable, Record record, List<String> fieldNames, Boolean ignoreNull, Boolean lowercaseFields) {
|
||||
Upsert upsert = kuduTable.newUpsert();
|
||||
buildPartialRow(kuduTable.getSchema(), upsert.getRow(), record, fieldNames, ignoreNull, lowercaseFields);
|
||||
|
|
|
@ -40,34 +40,42 @@ import org.apache.nifi.components.PropertyValue;
|
|||
import org.apache.nifi.components.ValidationContext;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.components.Validator;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.record.path.FieldValue;
|
||||
import org.apache.nifi.record.path.RecordPath;
|
||||
import org.apache.nifi.record.path.RecordPathResult;
|
||||
import org.apache.nifi.record.path.validation.RecordPathValidator;
|
||||
import org.apache.nifi.security.krb.KerberosAction;
|
||||
import org.apache.nifi.security.krb.KerberosUser;
|
||||
import org.apache.nifi.serialization.RecordReader;
|
||||
import org.apache.nifi.serialization.RecordReaderFactory;
|
||||
import org.apache.nifi.serialization.record.Record;
|
||||
import org.apache.nifi.serialization.record.RecordField;
|
||||
import org.apache.nifi.serialization.record.RecordFieldType;
|
||||
import org.apache.nifi.serialization.record.RecordSet;
|
||||
|
||||
import javax.security.auth.login.LoginException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.nifi.expression.ExpressionLanguageScope.FLOWFILE_ATTRIBUTES;
|
||||
import static org.apache.nifi.expression.ExpressionLanguageScope.NONE;
|
||||
import static org.apache.nifi.expression.ExpressionLanguageScope.VARIABLE_REGISTRY;
|
||||
|
||||
@EventDriven
|
||||
@SupportsBatching
|
||||
|
@ -86,7 +94,7 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
.description("The name of the Kudu Table to put data into")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor RECORD_READER = new Builder()
|
||||
|
@ -113,7 +121,7 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
.description("Convert column names to lowercase when finding index of Kudu table columns")
|
||||
.defaultValue("false")
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
|
||||
|
@ -123,10 +131,31 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
"are encountered, the Kudu table will be altered to include new columns for those fields.")
|
||||
.defaultValue("false")
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor DATA_RECORD_PATH = new Builder()
|
||||
.name("Data RecordPath")
|
||||
.displayName("Data RecordPath")
|
||||
.description("If specified, this property denotes a RecordPath that will be evaluated against each incoming Record and the Record that results from evaluating the RecordPath will be sent to" +
|
||||
" Kudu instead of sending the entire incoming Record. If not specified, the entire incoming Record will be published to Kudu.")
|
||||
.required(false)
|
||||
.addValidator(new RecordPathValidator())
|
||||
.expressionLanguageSupported(NONE)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor OPERATION_RECORD_PATH = new Builder()
|
||||
.name("Operation RecordPath")
|
||||
.displayName("Operation RecordPath")
|
||||
.description("If specified, this property denotes a RecordPath that will be evaluated against each incoming Record in order to determine the Kudu Operation Type. When evaluated, the " +
|
||||
"RecordPath must evaluate to one of hte valid Kudu Operation Types, or the incoming FlowFile will be routed to failure. If this property is specified, the <Kudu Operation Type> property" +
|
||||
" will be ignored.")
|
||||
.required(false)
|
||||
.addValidator(new RecordPathValidator())
|
||||
.expressionLanguageSupported(NONE)
|
||||
.build();
|
||||
|
||||
protected static final Validator OperationTypeValidator = new Validator() {
|
||||
@Override
|
||||
public ValidationResult validate(String subject, String value, ValidationContext context) {
|
||||
|
@ -156,9 +185,10 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
.displayName("Kudu Operation Type")
|
||||
.description("Specify operationType for this processor.\n" +
|
||||
"Valid values are: " +
|
||||
Arrays.stream(OperationType.values()).map(Enum::toString).collect(Collectors.joining(", ")))
|
||||
Arrays.stream(OperationType.values()).map(Enum::toString).collect(Collectors.joining(", ")) +
|
||||
". This Property will be ignored if the <Operation RecordPath> property is set.")
|
||||
.defaultValue(OperationType.INSERT.toString())
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(OperationTypeValidator)
|
||||
.build();
|
||||
|
||||
|
@ -184,7 +214,7 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
.defaultValue("1")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.createLongValidator(1, 100000, true))
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.expressionLanguageSupported(VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
protected static final PropertyDescriptor BATCH_SIZE = new Builder()
|
||||
|
@ -196,7 +226,7 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
.defaultValue("100")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.createLongValidator(1, 100000, true))
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.expressionLanguageSupported(VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
protected static final PropertyDescriptor IGNORE_NULL = new Builder()
|
||||
|
@ -205,7 +235,7 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
.defaultValue("false")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
protected static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
|
@ -220,9 +250,11 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
public static final String RECORD_COUNT_ATTR = "record.count";
|
||||
|
||||
// Properties set in onScheduled.
|
||||
protected int batchSize = 100;
|
||||
protected int ffbatch = 1;
|
||||
protected SessionConfiguration.FlushMode flushMode;
|
||||
private volatile int batchSize = 100;
|
||||
private volatile int ffbatch = 1;
|
||||
private volatile SessionConfiguration.FlushMode flushMode;
|
||||
private volatile Function<Record, OperationType> recordPathOperationType;
|
||||
private volatile RecordPath dataRecordPath;
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
|
@ -236,6 +268,8 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
properties.add(LOWERCASE_FIELD_NAMES);
|
||||
properties.add(HANDLE_SCHEMA_DRIFT);
|
||||
properties.add(RECORD_READER);
|
||||
properties.add(DATA_RECORD_PATH);
|
||||
properties.add(OPERATION_RECORD_PATH);
|
||||
properties.add(INSERT_OPERATION);
|
||||
properties.add(FLUSH_MODE);
|
||||
properties.add(FLOWFILE_BATCH_SIZE);
|
||||
|
@ -255,11 +289,22 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
}
|
||||
|
||||
@OnScheduled
|
||||
public void onScheduled(final ProcessContext context) throws IOException, LoginException {
|
||||
public void onScheduled(final ProcessContext context) throws LoginException {
|
||||
batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
|
||||
ffbatch = context.getProperty(FLOWFILE_BATCH_SIZE).evaluateAttributeExpressions().asInteger();
|
||||
flushMode = SessionConfiguration.FlushMode.valueOf(context.getProperty(FLUSH_MODE).getValue().toUpperCase());
|
||||
createKerberosUserAndOrKuduClient(context);
|
||||
|
||||
final String operationRecordPathValue = context.getProperty(OPERATION_RECORD_PATH).getValue();
|
||||
if (operationRecordPathValue == null) {
|
||||
recordPathOperationType = null;
|
||||
} else {
|
||||
final RecordPath recordPath = RecordPath.compile(operationRecordPathValue);
|
||||
recordPathOperationType = new RecordPathOperationType(recordPath);
|
||||
}
|
||||
|
||||
final String dataRecordPathValue = context.getProperty(DATA_RECORD_PATH).getValue();
|
||||
dataRecordPath = dataRecordPathValue == null ? null : RecordPath.compile(dataRecordPathValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -301,83 +346,119 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
final RecordReader recordReader = recordReaderFactory.createRecordReader(flowFile, in, getLogger())) {
|
||||
|
||||
final String tableName = getEvaluatedProperty(TABLE_NAME, context, flowFile);
|
||||
final OperationType operationType = OperationType.valueOf(getEvaluatedProperty(INSERT_OPERATION, context, flowFile).toUpperCase());
|
||||
final Boolean ignoreNull = Boolean.valueOf(getEvaluatedProperty(IGNORE_NULL, context, flowFile));
|
||||
final Boolean lowercaseFields = Boolean.valueOf(getEvaluatedProperty(LOWERCASE_FIELD_NAMES, context, flowFile));
|
||||
final Boolean handleSchemaDrift = Boolean.valueOf(getEvaluatedProperty(HANDLE_SCHEMA_DRIFT, context, flowFile));
|
||||
final boolean handleSchemaDrift = Boolean.parseBoolean(getEvaluatedProperty(HANDLE_SCHEMA_DRIFT, context, flowFile));
|
||||
|
||||
final Function<Record, OperationType> operationTypeFunction;
|
||||
if (recordPathOperationType == null) {
|
||||
final OperationType staticOperationType = OperationType.valueOf(getEvaluatedProperty(INSERT_OPERATION, context, flowFile).toUpperCase());
|
||||
operationTypeFunction = record -> staticOperationType;
|
||||
} else {
|
||||
operationTypeFunction = recordPathOperationType;
|
||||
}
|
||||
|
||||
final RecordSet recordSet = recordReader.createRecordSet();
|
||||
final List<String> fieldNames = recordReader.getSchema().getFieldNames();
|
||||
KuduTable kuduTable = kuduClient.openTable(tableName);
|
||||
|
||||
// If handleSchemaDrift is true, check for any missing columns and alter the Kudu table to add them.
|
||||
if (handleSchemaDrift) {
|
||||
final Schema schema = kuduTable.getSchema();
|
||||
Stream<RecordField> fields = recordReader.getSchema().getFields().stream();
|
||||
List<RecordField> missing = fields.filter(field -> !schema.hasColumn(
|
||||
lowercaseFields ? field.getFieldName().toLowerCase() : field.getFieldName()))
|
||||
.collect(Collectors.toList());
|
||||
final List<RecordField> missing = recordReader.getSchema().getFields().stream()
|
||||
.filter(field -> !schema.hasColumn(lowercaseFields ? field.getFieldName().toLowerCase() : field.getFieldName()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (!missing.isEmpty()) {
|
||||
getLogger().info("adding {} columns to table '{}' to handle schema drift",
|
||||
new Object[]{missing.size(), tableName});
|
||||
getLogger().info("adding {} columns to table '{}' to handle schema drift", new Object[]{missing.size(), tableName});
|
||||
|
||||
// Add each column one at a time to avoid failing if some of the missing columns
|
||||
// we created by a concurrent thread or application attempting to handle schema drift.
|
||||
for (RecordField field : missing) {
|
||||
for (final RecordField field : missing) {
|
||||
try {
|
||||
final String columnName = lowercaseFields ? field.getFieldName().toLowerCase() : field.getFieldName();
|
||||
kuduClient.alterTable(tableName, getAddNullableColumnStatement(columnName, field.getDataType()));
|
||||
} catch (KuduException e) {
|
||||
} catch (final KuduException e) {
|
||||
// Ignore the exception if the column already exists due to concurrent
|
||||
// threads or applications attempting to handle schema drift.
|
||||
if (e.getStatus().isAlreadyPresent()) {
|
||||
getLogger().info("column already exists in table '{}' while handling schema drift",
|
||||
new Object[]{tableName});
|
||||
getLogger().info("Column already exists in table '{}' while handling schema drift", new Object[]{tableName});
|
||||
} else {
|
||||
throw new ProcessException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-open the table to get the new schema.
|
||||
kuduTable = kuduClient.openTable(tableName);
|
||||
}
|
||||
}
|
||||
|
||||
// In the case of INSERT_IGNORE the Kudu session is modified to ignore row errors.
|
||||
// Because the session is shared across flow files, for batching efficiency, we
|
||||
// need to flush when changing to and from INSERT_IGNORE operation types.
|
||||
// This should be updated and simplified when KUDU-1563 is completed.
|
||||
if (prevOperationType != operationType && (prevOperationType == OperationType.INSERT_IGNORE || operationType == OperationType.INSERT_IGNORE)) {
|
||||
flushKuduSession(kuduSession, false, pendingRowErrors);
|
||||
kuduSession.setIgnoreAllDuplicateRows(operationType == OperationType.INSERT_IGNORE);
|
||||
}
|
||||
prevOperationType = operationType;
|
||||
|
||||
Record record = recordSet.next();
|
||||
while (record != null) {
|
||||
Operation operation = createKuduOperation(operationType, record, fieldNames, ignoreNull, lowercaseFields, kuduTable);
|
||||
// We keep track of mappings between Operations and their origins,
|
||||
// so that we know which FlowFiles should be marked failure after buffered flush.
|
||||
operationFlowFileMap.put(operation, flowFile);
|
||||
recordReaderLoop: while (record != null) {
|
||||
final OperationType operationType = operationTypeFunction.apply(record);
|
||||
|
||||
// Flush mutation buffer of KuduSession to avoid "MANUAL_FLUSH is enabled
|
||||
// but the buffer is too big" error. This can happen when flush mode is
|
||||
// MANUAL_FLUSH and a FlowFile has more than one records.
|
||||
if (numBuffered == batchSize && flushMode == SessionConfiguration.FlushMode.MANUAL_FLUSH) {
|
||||
numBuffered = 0;
|
||||
flushKuduSession(kuduSession, false, pendingRowErrors);
|
||||
final List<Record> dataRecords;
|
||||
if (dataRecordPath == null) {
|
||||
dataRecords = Collections.singletonList(record);
|
||||
} else {
|
||||
final RecordPathResult result = dataRecordPath.evaluate(record);
|
||||
final List<FieldValue> fieldValues = result.getSelectedFields().collect(Collectors.toList());
|
||||
if (fieldValues.isEmpty()) {
|
||||
throw new ProcessException("RecordPath " + dataRecordPath.getPath() + " evaluated against Record yielded no results.");
|
||||
}
|
||||
|
||||
for (final FieldValue fieldValue : fieldValues) {
|
||||
final RecordFieldType fieldType = fieldValue.getField().getDataType().getFieldType();
|
||||
if (fieldType != RecordFieldType.RECORD) {
|
||||
throw new ProcessException("RecordPath " + dataRecordPath.getPath() + " evaluated against Record expected to return one or more Records but encountered field of type" +
|
||||
" " + fieldType);
|
||||
}
|
||||
}
|
||||
|
||||
dataRecords = new ArrayList<>(fieldValues.size());
|
||||
for (final FieldValue fieldValue : fieldValues) {
|
||||
dataRecords.add((Record) fieldValue.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
// OperationResponse is returned only when flush mode is set to AUTO_FLUSH_SYNC
|
||||
OperationResponse response = kuduSession.apply(operation);
|
||||
if (response != null && response.hasRowError()) {
|
||||
// Stop processing the records on the first error.
|
||||
// Note that Kudu does not support rolling back of previous operations.
|
||||
flowFileFailures.put(flowFile, response.getRowError());
|
||||
break;
|
||||
for (final Record dataRecord : dataRecords) {
|
||||
// In the case of INSERT_IGNORE the Kudu session is modified to ignore row errors.
|
||||
// Because the session is shared across flow files, for batching efficiency, we
|
||||
// need to flush when changing to and from INSERT_IGNORE operation types.
|
||||
// This should be updated and simplified when KUDU-1563 is completed.
|
||||
if (prevOperationType != operationType && (prevOperationType == OperationType.INSERT_IGNORE || operationType == OperationType.INSERT_IGNORE)) {
|
||||
flushKuduSession(kuduSession, false, pendingRowErrors);
|
||||
kuduSession.setIgnoreAllDuplicateRows(operationType == OperationType.INSERT_IGNORE);
|
||||
}
|
||||
prevOperationType = operationType;
|
||||
|
||||
final List<String> fieldNames = dataRecord.getSchema().getFieldNames();
|
||||
Operation operation = createKuduOperation(operationType, dataRecord, fieldNames, ignoreNull, lowercaseFields, kuduTable);
|
||||
// We keep track of mappings between Operations and their origins,
|
||||
// so that we know which FlowFiles should be marked failure after buffered flush.
|
||||
operationFlowFileMap.put(operation, flowFile);
|
||||
|
||||
// Flush mutation buffer of KuduSession to avoid "MANUAL_FLUSH is enabled
|
||||
// but the buffer is too big" error. This can happen when flush mode is
|
||||
// MANUAL_FLUSH and a FlowFile has more than one records.
|
||||
if (numBuffered == batchSize && flushMode == SessionConfiguration.FlushMode.MANUAL_FLUSH) {
|
||||
numBuffered = 0;
|
||||
flushKuduSession(kuduSession, false, pendingRowErrors);
|
||||
}
|
||||
|
||||
// OperationResponse is returned only when flush mode is set to AUTO_FLUSH_SYNC
|
||||
OperationResponse response = kuduSession.apply(operation);
|
||||
if (response != null && response.hasRowError()) {
|
||||
// Stop processing the records on the first error.
|
||||
// Note that Kudu does not support rolling back of previous operations.
|
||||
flowFileFailures.put(flowFile, response.getRowError());
|
||||
break recordReaderLoop;
|
||||
}
|
||||
|
||||
numBuffered++;
|
||||
numRecords.merge(flowFile, 1, Integer::sum);
|
||||
}
|
||||
|
||||
numBuffered++;
|
||||
numRecords.merge(flowFile, 1, Integer::sum);
|
||||
record = recordSet.next();
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
|
@ -460,4 +541,32 @@ public class PutKudu extends AbstractKuduProcessor {
|
|||
throw new IllegalArgumentException(String.format("OperationType: %s not supported by Kudu", operationType));
|
||||
}
|
||||
}
|
||||
|
||||
private static class RecordPathOperationType implements Function<Record, OperationType> {
|
||||
private final RecordPath recordPath;
|
||||
|
||||
public RecordPathOperationType(final RecordPath recordPath) {
|
||||
this.recordPath = recordPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperationType apply(final Record record) {
|
||||
final RecordPathResult recordPathResult = recordPath.evaluate(record);
|
||||
final List<FieldValue> resultList = recordPathResult.getSelectedFields().distinct().collect(Collectors.toList());
|
||||
if (resultList.isEmpty()) {
|
||||
throw new ProcessException("Evaluated RecordPath " + recordPath.getPath() + " against Record but got no results");
|
||||
}
|
||||
|
||||
if (resultList.size() > 1) {
|
||||
throw new ProcessException("Evaluated RecordPath " + recordPath.getPath() + " against Record and received multiple distinct results (" + resultList + ")");
|
||||
}
|
||||
|
||||
final String resultValue = String.valueOf(resultList.get(0).getValue());
|
||||
try {
|
||||
return OperationType.valueOf(resultValue.toUpperCase());
|
||||
} catch (final IllegalArgumentException iae) {
|
||||
throw new ProcessException("Evaluated RecordPath " + recordPath.getPath() + " against Record to determine Kudu Operation Type but found invalid value: " + resultValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue