NIFI-4786 Allow Expression Evaluation to Kinesis/Firehose Stream Name

Signed-off-by: James Wing <jvwing@gmail.com>

This closes #2409.
This commit is contained in:
dorian.bugeja 2018-01-17 11:22:39 +01:00 committed by James Wing
parent c4e2ac7cda
commit 8bdc2910e1
6 changed files with 127 additions and 82 deletions

View File

@ -64,16 +64,16 @@ public abstract class AbstractBaseKinesisProcessor<ClientType extends AmazonWebS
public static final int MAX_MESSAGE_SIZE = 1000 * 1024;
protected FlowFile handleFlowFileTooBig(final ProcessSession session, FlowFile flowFileCandidate,
final String streamName, String message) {
String message) {
flowFileCandidate = session.putAttribute(flowFileCandidate, message,
"record too big " + flowFileCandidate.getSize() + " max allowed " + MAX_MESSAGE_SIZE );
"record too big " + flowFileCandidate.getSize() + " max allowed " + MAX_MESSAGE_SIZE );
session.transfer(flowFileCandidate, REL_FAILURE);
getLogger().error("Failed to publish to kinesis {} records {} because the size was greater than {} bytes",
new Object[]{streamName, flowFileCandidate, MAX_MESSAGE_SIZE});
getLogger().error("Failed to publish to kinesis records {} because the size was greater than {} bytes",
new Object[]{flowFileCandidate, MAX_MESSAGE_SIZE});
return flowFileCandidate;
}
protected List<FlowFile> filterMessagesByMaxSize(final ProcessSession session, final int batchSize, final long maxBufferSizeBytes, final String streamName, String message) {
protected List<FlowFile> filterMessagesByMaxSize(final ProcessSession session, final int batchSize, final long maxBufferSizeBytes, String message) {
List<FlowFile> flowFiles = new ArrayList<FlowFile>(batchSize);
long currentBufferSizeBytes = 0;
@ -85,7 +85,7 @@ public abstract class AbstractBaseKinesisProcessor<ClientType extends AmazonWebS
break;
if (flowFileCandidate.getSize() > MAX_MESSAGE_SIZE) {
flowFileCandidate = handleFlowFileTooBig(session, flowFileCandidate, streamName, message);
flowFileCandidate = handleFlowFileTooBig(session, flowFileCandidate, message);
continue;
}

View File

@ -34,7 +34,7 @@ public abstract class AbstractKinesisFirehoseProcessor extends AbstractBaseKines
public static final PropertyDescriptor KINESIS_FIREHOSE_DELIVERY_STREAM_NAME = new PropertyDescriptor.Builder()
.name("Amazon Kinesis Firehose Delivery Stream Name")
.description("The name of kinesis firehose delivery stream")
.expressionLanguageSupported(false)
.expressionLanguageSupported(true)
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();

View File

@ -35,7 +35,7 @@ public abstract class AbstractKinesisStreamProcessor extends AbstractBaseKinesis
.name("kinesis-stream-name")
.displayName("Amazon Kinesis Stream Name")
.description("The name of Kinesis Stream")
.expressionLanguageSupported(false)
.expressionLanguageSupported(true)
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.build();

View File

@ -64,6 +64,16 @@
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-ssl-context-service-api</artifactId>
</dependency>
<!-- Test Dependencies for testing interaction with AWS -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.6.6</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>

View File

@ -89,16 +89,14 @@ public class PutKinesisFirehose extends AbstractKinesisFirehoseProcessor {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
final long maxBufferSizeBytes = context.getProperty(MAX_MESSAGE_BUFFER_SIZE_MB).asDataSize(DataUnit.B).longValue();
final String firehoseStreamName = context.getProperty(KINESIS_FIREHOSE_DELIVERY_STREAM_NAME).getValue();
List<FlowFile> flowFiles = filterMessagesByMaxSize(session, batchSize, maxBufferSizeBytes, firehoseStreamName,
AWS_KINESIS_FIREHOSE_ERROR_MESSAGE);
List<FlowFile> flowFiles = filterMessagesByMaxSize(session, batchSize, maxBufferSizeBytes, AWS_KINESIS_FIREHOSE_ERROR_MESSAGE);
HashMap<String, List<FlowFile>> hashFlowFiles = new HashMap<>();
HashMap<String, List<Record>> recordHash = new HashMap<String, List<Record>>();
final AmazonKinesisFirehoseClient client = getClient();
try {
List<Record> records = new ArrayList<>();
List<FlowFile> failedFlowFiles = new ArrayList<>();
List<FlowFile> successfulFlowFiles = new ArrayList<>();
@ -106,46 +104,66 @@ public class PutKinesisFirehose extends AbstractKinesisFirehoseProcessor {
for (int i = 0; i < flowFiles.size(); i++) {
FlowFile flowFile = flowFiles.get(i);
final String firehoseStreamName = context.getProperty(KINESIS_FIREHOSE_DELIVERY_STREAM_NAME).evaluateAttributeExpressions(flowFile).getValue();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
session.exportTo(flowFile, baos);
records.add(new Record().withData(ByteBuffer.wrap(baos.toByteArray())));
if (recordHash.containsKey(firehoseStreamName) == false) {
recordHash.put(firehoseStreamName, new ArrayList<>());
}
if (hashFlowFiles.containsKey(firehoseStreamName) == false) {
hashFlowFiles.put(firehoseStreamName, new ArrayList<>());
}
hashFlowFiles.get(firehoseStreamName).add(flowFile);
recordHash.get(firehoseStreamName).add(new Record().withData(ByteBuffer.wrap(baos.toByteArray())));
}
if ( records.size() > 0 ) {
// Send the batch
PutRecordBatchRequest putRecordBatchRequest = new PutRecordBatchRequest();
putRecordBatchRequest.setDeliveryStreamName(firehoseStreamName);
putRecordBatchRequest.setRecords(records);
PutRecordBatchResult results = client.putRecordBatch(putRecordBatchRequest);
for (Map.Entry<String, List<Record>> entryRecord : recordHash.entrySet()) {
String streamName = entryRecord.getKey();
List<Record> records = entryRecord.getValue();
// Separate out the successful and failed flow files
List<PutRecordBatchResponseEntry> responseEntries = results.getRequestResponses();
for (int i = 0; i < responseEntries.size(); i++ ) {
PutRecordBatchResponseEntry entry = responseEntries.get(i);
FlowFile flowFile = flowFiles.get(i);
if (records.size() > 0) {
// Send the batch
PutRecordBatchRequest putRecordBatchRequest = new PutRecordBatchRequest();
putRecordBatchRequest.setDeliveryStreamName(streamName);
putRecordBatchRequest.setRecords(records);
PutRecordBatchResult results = client.putRecordBatch(putRecordBatchRequest);
Map<String,String> attributes = new HashMap<>();
attributes.put(AWS_KINESIS_FIREHOSE_RECORD_ID, entry.getRecordId());
flowFile = session.putAttribute(flowFile, AWS_KINESIS_FIREHOSE_RECORD_ID, entry.getRecordId());
if ( ! StringUtils.isBlank(entry.getErrorCode()) ) {
attributes.put(AWS_KINESIS_FIREHOSE_ERROR_CODE, entry.getErrorCode());
attributes.put(AWS_KINESIS_FIREHOSE_ERROR_MESSAGE, entry.getErrorMessage());
flowFile = session.putAllAttributes(flowFile, attributes);
failedFlowFiles.add(flowFile);
} else {
flowFile = session.putAllAttributes(flowFile, attributes);
successfulFlowFiles.add(flowFile);
// Separate out the successful and failed flow files
List<PutRecordBatchResponseEntry> responseEntries = results.getRequestResponses();
for (int i = 0; i < responseEntries.size(); i++ ) {
PutRecordBatchResponseEntry entry = responseEntries.get(i);
FlowFile flowFile = hashFlowFiles.get(streamName).get(i);
Map<String,String> attributes = new HashMap<>();
attributes.put(AWS_KINESIS_FIREHOSE_RECORD_ID, entry.getRecordId());
flowFile = session.putAttribute(flowFile, AWS_KINESIS_FIREHOSE_RECORD_ID, entry.getRecordId());
if (StringUtils.isBlank(entry.getErrorCode()) == false) {
attributes.put(AWS_KINESIS_FIREHOSE_ERROR_CODE, entry.getErrorCode());
attributes.put(AWS_KINESIS_FIREHOSE_ERROR_MESSAGE, entry.getErrorMessage());
flowFile = session.putAllAttributes(flowFile, attributes);
failedFlowFiles.add(flowFile);
} else {
flowFile = session.putAllAttributes(flowFile, attributes);
successfulFlowFiles.add(flowFile);
}
}
recordHash.get(streamName).clear();
records.clear();
}
if ( failedFlowFiles.size() > 0 ) {
session.transfer(failedFlowFiles, REL_FAILURE);
getLogger().error("Failed to publish to kinesis firehose {} records {}", new Object[]{firehoseStreamName, failedFlowFiles});
}
if ( successfulFlowFiles.size() > 0 ) {
session.transfer(successfulFlowFiles, REL_SUCCESS);
getLogger().info("Successfully published to kinesis firehose {} records {}", new Object[]{firehoseStreamName, successfulFlowFiles});
}
records.clear();
}
if (failedFlowFiles.size() > 0) {
session.transfer(failedFlowFiles, REL_FAILURE);
getLogger().error("Failed to publish to kinesis firehose {}", new Object[]{failedFlowFiles});
}
if (successfulFlowFiles.size() > 0) {
session.transfer(successfulFlowFiles, REL_SUCCESS);
getLogger().info("Successfully published to kinesis firehose {}", new Object[]{successfulFlowFiles});
}
} catch (final Exception exception) {

View File

@ -94,15 +94,16 @@ public class PutKinesisStream extends AbstractKinesisStreamProcessor {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
final long maxBufferSizeBytes = context.getProperty(MAX_MESSAGE_BUFFER_SIZE_MB).asDataSize(DataUnit.B).longValue();
final String streamName = context.getProperty(KINESIS_STREAM_NAME).getValue();
List<FlowFile> flowFiles = filterMessagesByMaxSize(session, batchSize, maxBufferSizeBytes, streamName,
AWS_KINESIS_ERROR_MESSAGE);
List<FlowFile> flowFiles = filterMessagesByMaxSize(session, batchSize, maxBufferSizeBytes, AWS_KINESIS_ERROR_MESSAGE);
HashMap<String, List<FlowFile>> hashFlowFiles = new HashMap<>();
HashMap<String, List<PutRecordsRequestEntry>> recordHash = new HashMap<String, List<PutRecordsRequestEntry>>();
final AmazonKinesisClient client = getClient();
try {
List<PutRecordsRequestEntry> records = new ArrayList<>();
List<FlowFile> failedFlowFiles = new ArrayList<>();
List<FlowFile> successfulFlowFiles = new ArrayList<>();
@ -111,64 +112,80 @@ public class PutKinesisStream extends AbstractKinesisStreamProcessor {
for (int i = 0; i < flowFiles.size(); i++) {
FlowFile flowFile = flowFiles.get(i);
String streamName = context.getProperty(KINESIS_STREAM_NAME).evaluateAttributeExpressions(flowFile).getValue();;
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
session.exportTo(flowFile, baos);
PutRecordsRequestEntry record = new PutRecordsRequestEntry().withData(ByteBuffer.wrap(baos.toByteArray()));
String partitionKey = context.getProperty(PutKinesisStream.KINESIS_PARTITION_KEY)
.evaluateAttributeExpressions(flowFiles.get(i)).getValue();
.evaluateAttributeExpressions(flowFiles.get(i)).getValue();
if ( ! StringUtils.isBlank(partitionKey) ) {
if (StringUtils.isBlank(partitionKey) == false) {
record.setPartitionKey(partitionKey);
} else {
record.setPartitionKey(Integer.toString(randomParitionKeyGenerator.nextInt()));
}
records.add(record);
if (recordHash.containsKey(streamName) == false) {
recordHash.put(streamName, new ArrayList<>());
}
if (hashFlowFiles.containsKey(streamName) == false) {
hashFlowFiles.put(streamName, new ArrayList<>());
}
hashFlowFiles.get(streamName).add(flowFile);
recordHash.get(streamName).add(record);
}
if ( records.size() > 0 ) {
for (Map.Entry<String, List<PutRecordsRequestEntry>> entryRecord : recordHash.entrySet()) {
String streamName = entryRecord.getKey();
List<PutRecordsRequestEntry> records = entryRecord.getValue();
PutRecordsRequest putRecordRequest = new PutRecordsRequest();
putRecordRequest.setStreamName(streamName);
putRecordRequest.setRecords(records);
PutRecordsResult results = client.putRecords(putRecordRequest);
if (records.size() > 0) {
List<PutRecordsResultEntry> responseEntries = results.getRecords();
for (int i = 0; i < responseEntries.size(); i++ ) {
PutRecordsResultEntry entry = responseEntries.get(i);
FlowFile flowFile = flowFiles.get(i);
PutRecordsRequest putRecordRequest = new PutRecordsRequest();
putRecordRequest.setStreamName(streamName);
putRecordRequest.setRecords(records);
PutRecordsResult results = client.putRecords(putRecordRequest);
Map<String,String> attributes = new HashMap<>();
attributes.put(AWS_KINESIS_SHARD_ID, entry.getShardId());
attributes.put(AWS_KINESIS_SEQUENCE_NUMBER, entry.getSequenceNumber());
List<PutRecordsResultEntry> responseEntries = results.getRecords();
for (int i = 0; i < responseEntries.size(); i++ ) {
PutRecordsResultEntry entry = responseEntries.get(i);
FlowFile flowFile = hashFlowFiles.get(streamName).get(i);
if ( ! StringUtils.isBlank(entry.getErrorCode()) ) {
attributes.put(AWS_KINESIS_ERROR_CODE, entry.getErrorCode());
attributes.put(AWS_KINESIS_ERROR_MESSAGE, entry.getErrorMessage());
flowFile = session.putAllAttributes(flowFile, attributes);
failedFlowFiles.add(flowFile);
} else {
flowFile = session.putAllAttributes(flowFile, attributes);
successfulFlowFiles.add(flowFile);
Map<String,String> attributes = new HashMap<>();
attributes.put(AWS_KINESIS_SHARD_ID, entry.getShardId());
attributes.put(AWS_KINESIS_SEQUENCE_NUMBER, entry.getSequenceNumber());
if (StringUtils.isBlank(entry.getErrorCode()) == false) {
attributes.put(AWS_KINESIS_ERROR_CODE, entry.getErrorCode());
attributes.put(AWS_KINESIS_ERROR_MESSAGE, entry.getErrorMessage());
flowFile = session.putAllAttributes(flowFile, attributes);
failedFlowFiles.add(flowFile);
} else {
flowFile = session.putAllAttributes(flowFile, attributes);
successfulFlowFiles.add(flowFile);
}
}
}
if ( failedFlowFiles.size() > 0 ) {
session.transfer(failedFlowFiles, REL_FAILURE);
getLogger().error("Failed to publish to kinesis {} records {}", new Object[]{streamName, failedFlowFiles});
}
if ( successfulFlowFiles.size() > 0 ) {
session.transfer(successfulFlowFiles, REL_SUCCESS);
getLogger().debug("Successfully published to kinesis {} records {}", new Object[]{streamName, successfulFlowFiles});
}
recordHash.get(streamName).clear();
records.clear();
}
if ( failedFlowFiles.size() > 0 ) {
session.transfer(failedFlowFiles, REL_FAILURE);
getLogger().error("Failed to publish to kinesis records {}", new Object[]{failedFlowFiles});
}
if ( successfulFlowFiles.size() > 0 ) {
session.transfer(successfulFlowFiles, REL_SUCCESS);
getLogger().debug("Successfully published to kinesis records {}", new Object[]{successfulFlowFiles});
}
} catch (final Exception exception) {
getLogger().error("Failed to publish due to exception {} to kinesis {} flowfiles {} ", new Object[]{exception, streamName, flowFiles});
getLogger().error("Failed to publish due to exception {} flowfiles {} ", new Object[]{exception, flowFiles});
session.transfer(flowFiles, REL_FAILURE);
context.yield();
}
}
}