diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml
index b1ab989e67..ddcd6e8a6c 100755
--- a/nifi-assembly/pom.xml
+++ b/nifi-assembly/pom.xml
@@ -196,6 +196,11 @@ language governing permissions and limitations under the License. -->
nifi-kafka-0-11-narnar
+
+ org.apache.nifi
+ nifi-kafka-1-0-nar
+ nar
+ org.apache.nifinifi-confluent-platform-nar
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_0_10/additionalDetails.html b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_0_11/additionalDetails.html
similarity index 100%
rename from nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_0_10/additionalDetails.html
rename to nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_0_11/additionalDetails.html
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_0_10/additionalDetails.html b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_0_11/additionalDetails.html
similarity index 100%
rename from nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_0_10/additionalDetails.html
rename to nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_0_11/additionalDetails.html
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafkaRecord_0_10/additionalDetails.html b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafkaRecord_0_11/additionalDetails.html
similarity index 100%
rename from nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafkaRecord_0_10/additionalDetails.html
rename to nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafkaRecord_0_11/additionalDetails.html
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafka_0_10/additionalDetails.html b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafka_0_11/additionalDetails.html
similarity index 100%
rename from nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafka_0_10/additionalDetails.html
rename to nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/resources/docs/org.apache.nifi.processors.kafka.pubsub.PublishKafka_0_11/additionalDetails.html
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/pom.xml b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/pom.xml
new file mode 100644
index 0000000000..f1806a165c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/pom.xml
@@ -0,0 +1,40 @@
+
+
+ 4.0.0
+
+ org.apache.nifi
+ nifi-kafka-bundle
+ 1.5.0-SNAPSHOT
+
+ nifi-kafka-1-0-nar
+ nar
+ NiFi NAR for interacting with Apache Kafka 1.0
+
+ true
+ true
+
+
+
+ org.apache.nifi
+ nifi-kafka-1-0-processors
+
+
+ org.apache.nifi
+ nifi-standard-services-api-nar
+ nar
+
+
+
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/src/main/resources/META-INF/LICENSE b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/src/main/resources/META-INF/LICENSE
new file mode 100644
index 0000000000..43a2a3b5de
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/src/main/resources/META-INF/LICENSE
@@ -0,0 +1,233 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+APACHE NIFI SUBCOMPONENTS:
+
+The Apache NiFi project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses.
+
+ The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
+ under an MIT style license.
+
+ Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/src/main/resources/META-INF/NOTICE
new file mode 100644
index 0000000000..03af5fd517
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-nar/src/main/resources/META-INF/NOTICE
@@ -0,0 +1,83 @@
+nifi-kafka-nar
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+******************
+Apache Software License v2
+******************
+
+The following binary components are provided under the Apache Software License v2
+
+ (ASLv2) Apache Commons Lang
+ The following NOTICE information applies:
+ Apache Commons Lang
+ Copyright 2001-2017 The Apache Software Foundation
+
+ This product includes software from the Spring Framework,
+ under the Apache License 2.0 (see: StringUtils.containsWhitespace())
+
+ (ASLv2) Apache Commons IO
+ The following NOTICE information applies:
+ Apache Commons IO
+ Copyright 2002-2016 The Apache Software Foundation
+
+ (ASLv2) Apache Commons Codec
+ The following NOTICE information applies:
+ Apache Commons Codec
+ Copyright 2002-2014 The Apache Software Foundation
+
+ src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
+ contains test data from http://aspell.net/test/orig/batch0.tab.
+ Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
+
+ ===============================================================================
+
+ The content of package org.apache.commons.codec.language.bm has been translated
+ from the original php source code available at http://stevemorse.org/phoneticinfo.htm
+ with permission from the original authors.
+ Original source copyright:
+ Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
+
+ (ASLv2) Apache Kafka
+ The following NOTICE information applies:
+ Apache Kafka
+ Copyright 2012 The Apache Software Foundation.
+
+ (ASLv2) Snappy Java
+ The following NOTICE information applies:
+ This product includes software developed by Google
+ Snappy: http://code.google.com/p/snappy/ (New BSD License)
+
+ This product includes software developed by Apache
+ PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/
+ (Apache 2.0 license)
+
+ This library containd statically linked libstdc++. This inclusion is allowed by
+ "GCC RUntime Library Exception"
+ http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html
+
+ (ASLv2) Jackson JSON processor
+ The following NOTICE information applies:
+ # Jackson JSON processor
+
+ Jackson is a high-performance, Free/Open Source JSON processing library.
+ It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
+ been in development since 2007.
+ It is currently developed by a community of developers, as well as supported
+ commercially by FasterXML.com.
+
+ ## Licensing
+
+ Jackson core and extension components may licensed under different licenses.
+ To find the details that apply to this artifact see the accompanying LICENSE file.
+ For more information, including possible other licensing options, contact
+ FasterXML.com (http://fasterxml.com).
+
+ ## Credits
+
+ A list of contributors may be found from CREDITS file, which is included
+ in some artifacts (usually source distributions); but is always available
+ from the source code management (SCM) system project uses.
+
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/pom.xml b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/pom.xml
new file mode 100644
index 0000000000..0f51298b3c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/pom.xml
@@ -0,0 +1,92 @@
+
+
+
+ org.apache.nifi
+ nifi-kafka-bundle
+ 1.5.0-SNAPSHOT
+
+ 4.0.0
+ nifi-kafka-1-0-processors
+ jar
+
+
+ org.apache.nifi
+ nifi-api
+
+
+ org.apache.nifi
+ nifi-record-serialization-service-api
+
+
+ org.apache.nifi
+ nifi-record
+
+
+ org.apache.nifi
+ nifi-processor-utils
+
+
+ org.apache.nifi
+ nifi-utils
+
+
+ org.apache.nifi
+ nifi-ssl-context-service-api
+
+
+ org.apache.kafka
+ kafka-clients
+ ${kafka1.0.version}
+
+
+ org.apache.kafka
+ kafka_2.11
+ ${kafka1.0.version}
+ test
+
+
+
+ javax.jms
+ jms
+
+
+ com.sun.jdmk
+ jmxtools
+
+
+ com.sun.jmx
+ jmxri
+
+
+
+
+ org.apache.nifi
+ nifi-mock
+ test
+
+
+ commons-io
+ commons-io
+ test
+
+
+ org.slf4j
+ slf4j-simple
+ test
+
+
+
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaRecord_1_0.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaRecord_1_0.java
new file mode 100644
index 0000000000..f0c1bd0c45
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaRecord_1_0.java
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.errors.WakeupException;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.nifi.annotation.behavior.DynamicProperty;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnStopped;
+import org.apache.nifi.annotation.lifecycle.OnUnscheduled;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+
+@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 1.0 Consumer API. "
+ + "The complementary NiFi processor for sending messages is PublishKafkaRecord_1_0. Please note that, at this time, the Processor assumes that "
+ + "all records that are retrieved from a given partition have the same schema. If any of the Kafka messages are pulled but cannot be parsed or written with the "
+ + "configured Record Reader or Record Writer, the contents of the message will be written to a separate FlowFile, and that FlowFile will be transferred to the "
+ + "'parse.failure' relationship. Otherwise, each FlowFile is sent to the 'success' relationship and may contain many individual messages within the single FlowFile. "
+ + "A 'record.count' attribute is added to indicate how many messages are contained in the FlowFile. No two Kafka messages will be placed into the same FlowFile if they "
+ + "have different schemas, or if they have different values for a message header that is included by the property.")
+@Tags({"Kafka", "Get", "Record", "csv", "avro", "json", "Ingest", "Ingress", "Topic", "PubSub", "Consume", "1.0"})
+@WritesAttributes({
+ @WritesAttribute(attribute = "record.count", description = "The number of records received"),
+ @WritesAttribute(attribute = "mime.type", description = "The MIME Type that is provided by the configured Record Writer"),
+ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_PARTITION, description = "The partition of the topic the records are from"),
+ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_TOPIC, description = "The topic records are from")
+})
+@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN)
+@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
+ description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ + " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ + " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ")
+@SeeAlso({ConsumeKafka_1_0.class, PublishKafka_1_0.class, PublishKafkaRecord_1_0.class})
+public class ConsumeKafkaRecord_1_0 extends AbstractProcessor {
+
+ static final AllowableValue OFFSET_EARLIEST = new AllowableValue("earliest", "earliest", "Automatically reset the offset to the earliest offset");
+ static final AllowableValue OFFSET_LATEST = new AllowableValue("latest", "latest", "Automatically reset the offset to the latest offset");
+ static final AllowableValue OFFSET_NONE = new AllowableValue("none", "none", "Throw exception to the consumer if no previous offset is found for the consumer's group");
+ static final AllowableValue TOPIC_NAME = new AllowableValue("names", "names", "Topic is a full topic name or comma separated list of names");
+ static final AllowableValue TOPIC_PATTERN = new AllowableValue("pattern", "pattern", "Topic is a regex using the Java Pattern syntax");
+
+ static final PropertyDescriptor TOPICS = new PropertyDescriptor.Builder()
+ .name("topic")
+ .displayName("Topic Name(s)")
+ .description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma separated.")
+ .required(true)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .build();
+
+ static final PropertyDescriptor TOPIC_TYPE = new PropertyDescriptor.Builder()
+ .name("topic_type")
+ .displayName("Topic Name Format")
+ .description("Specifies whether the Topic(s) provided are a comma separated list of names or a single regular expression")
+ .required(true)
+ .allowableValues(TOPIC_NAME, TOPIC_PATTERN)
+ .defaultValue(TOPIC_NAME.getValue())
+ .build();
+
+ static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder()
+ .name("record-reader")
+ .displayName("Record Reader")
+ .description("The Record Reader to use for incoming FlowFiles")
+ .identifiesControllerService(RecordReaderFactory.class)
+ .expressionLanguageSupported(false)
+ .required(true)
+ .build();
+
+ static final PropertyDescriptor RECORD_WRITER = new PropertyDescriptor.Builder()
+ .name("record-writer")
+ .displayName("Record Writer")
+ .description("The Record Writer to use in order to serialize the data before sending to Kafka")
+ .identifiesControllerService(RecordSetWriterFactory.class)
+ .expressionLanguageSupported(false)
+ .required(true)
+ .build();
+
+ static final PropertyDescriptor GROUP_ID = new PropertyDescriptor.Builder()
+ .name("group.id")
+ .displayName("Group ID")
+ .description("A Group ID is used to identify consumers that are within the same consumer group. Corresponds to Kafka's 'group.id' property.")
+ .required(true)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .build();
+
+ static final PropertyDescriptor AUTO_OFFSET_RESET = new PropertyDescriptor.Builder()
+ .name("auto.offset.reset")
+ .displayName("Offset Reset")
+ .description("Allows you to manage the condition when there is no initial offset in Kafka or if the current offset does not exist any "
+ + "more on the server (e.g. because that data has been deleted). Corresponds to Kafka's 'auto.offset.reset' property.")
+ .required(true)
+ .allowableValues(OFFSET_EARLIEST, OFFSET_LATEST, OFFSET_NONE)
+ .defaultValue(OFFSET_LATEST.getValue())
+ .build();
+
+ static final PropertyDescriptor MAX_POLL_RECORDS = new PropertyDescriptor.Builder()
+ .name("max.poll.records")
+ .displayName("Max Poll Records")
+ .description("Specifies the maximum number of records Kafka should return in a single poll.")
+ .required(false)
+ .defaultValue("10000")
+ .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor MAX_UNCOMMITTED_TIME = new PropertyDescriptor.Builder()
+ .name("max-uncommit-offset-wait")
+ .displayName("Max Uncommitted Time")
+ .description("Specifies the maximum amount of time allowed to pass before offsets must be committed. "
+ + "This value impacts how often offsets will be committed. Committing offsets less often increases "
+ + "throughput but also increases the window of potential data duplication in the event of a rebalance "
+ + "or JVM restart between commits. This value is also related to maximum poll records and the use "
+ + "of a message demarcator. When using a message demarcator we can have far more uncommitted messages "
+ + "than when we're not as there is much less for us to keep track of in memory.")
+ .required(false)
+ .defaultValue("1 secs")
+ .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
+ .build();
+ static final PropertyDescriptor HONOR_TRANSACTIONS = new PropertyDescriptor.Builder()
+ .name("honor-transactions")
+ .displayName("Honor Transactions")
+ .description("Specifies whether or not NiFi should honor transactional guarantees when communicating with Kafka. If false, the Processor will use an \"isolation level\" of "
+ + "read_uncomitted. This means that messages will be received as soon as they are written to Kafka but will be pulled, even if the producer cancels the transactions. If "
+ + "this value is true, NiFi will not receive any messages for which the producer's transaction was canceled, but this can result in some latency since the consumer must wait "
+ + "for the producer to finish its entire transaction instead of pulling as the messages become available.")
+ .expressionLanguageSupported(false)
+ .allowableValues("true", "false")
+ .defaultValue("true")
+ .required(true)
+ .build();
+ static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new PropertyDescriptor.Builder()
+ .name("message-header-encoding")
+ .displayName("Message Header Encoding")
+ .description("Any message header that is found on a Kafka message will be added to the outbound FlowFile as an attribute. "
+ + "This property indicates the Character Encoding to use for deserializing the headers.")
+ .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+ .defaultValue("UTF-8")
+ .required(false)
+ .build();
+ static final PropertyDescriptor HEADER_NAME_REGEX = new PropertyDescriptor.Builder()
+ .name("header-name-regex")
+ .displayName("Headers to Add as Attributes (Regex)")
+ .description("A Regular Expression that is matched against all message headers. "
+ + "Any message header whose name matches the regex will be added to the FlowFile as an Attribute. "
+ + "If not specified, no Header values will be added as FlowFile attributes. If two messages have a different value for the same header and that header is selected by "
+ + "the provided regex, then those two messages must be added to different FlowFiles. As a result, users should be cautious about using a regex like "
+ + "\".*\" if messages are expected to have header values that are unique per message, such as an identifier or timestamp, because it will prevent NiFi from bundling "
+ + "the messages together efficiently.")
+ .addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .required(false)
+ .build();
+
+ static final Relationship REL_SUCCESS = new Relationship.Builder()
+ .name("success")
+ .description("FlowFiles received from Kafka. Depending on demarcation strategy it is a flow file per message or a bundle of messages grouped by topic and partition.")
+ .build();
+ static final Relationship REL_PARSE_FAILURE = new Relationship.Builder()
+ .name("parse.failure")
+ .description("If a message from Kafka cannot be parsed using the configured Record Reader, the contents of the "
+ + "message will be routed to this Relationship as its own individual FlowFile.")
+ .build();
+
+ static final List DESCRIPTORS;
+ static final Set RELATIONSHIPS;
+
+ private volatile ConsumerPool consumerPool = null;
+ private final Set activeLeases = Collections.synchronizedSet(new HashSet<>());
+
+ static {
+ List descriptors = new ArrayList<>();
+ descriptors.add(KafkaProcessorUtils.BOOTSTRAP_SERVERS);
+ descriptors.add(TOPICS);
+ descriptors.add(TOPIC_TYPE);
+ descriptors.add(RECORD_READER);
+ descriptors.add(RECORD_WRITER);
+ descriptors.add(HONOR_TRANSACTIONS);
+ descriptors.add(KafkaProcessorUtils.SECURITY_PROTOCOL);
+ descriptors.add(KafkaProcessorUtils.KERBEROS_PRINCIPLE);
+ descriptors.add(KafkaProcessorUtils.USER_PRINCIPAL);
+ descriptors.add(KafkaProcessorUtils.USER_KEYTAB);
+ descriptors.add(KafkaProcessorUtils.SSL_CONTEXT_SERVICE);
+ descriptors.add(GROUP_ID);
+ descriptors.add(AUTO_OFFSET_RESET);
+ descriptors.add(MESSAGE_HEADER_ENCODING);
+ descriptors.add(HEADER_NAME_REGEX);
+ descriptors.add(MAX_POLL_RECORDS);
+ descriptors.add(MAX_UNCOMMITTED_TIME);
+ DESCRIPTORS = Collections.unmodifiableList(descriptors);
+
+ final Set rels = new HashSet<>();
+ rels.add(REL_SUCCESS);
+ rels.add(REL_PARSE_FAILURE);
+ RELATIONSHIPS = Collections.unmodifiableSet(rels);
+ }
+
+ @Override
+ public Set getRelationships() {
+ return RELATIONSHIPS;
+ }
+
+ @Override
+ protected List getSupportedPropertyDescriptors() {
+ return DESCRIPTORS;
+ }
+
+ @OnStopped
+ public void close() {
+ final ConsumerPool pool = consumerPool;
+ consumerPool = null;
+
+ if (pool != null) {
+ pool.close();
+ }
+ }
+
+ @Override
+ protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
+ return new PropertyDescriptor.Builder()
+ .description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
+ .name(propertyDescriptorName).addValidator(new KafkaProcessorUtils.KafkaConfigValidator(ConsumerConfig.class)).dynamic(true)
+ .build();
+ }
+
+ @Override
+ protected Collection customValidate(final ValidationContext validationContext) {
+ return KafkaProcessorUtils.validateCommonProperties(validationContext);
+ }
+
+ private synchronized ConsumerPool getConsumerPool(final ProcessContext context) {
+ ConsumerPool pool = consumerPool;
+ if (pool != null) {
+ return pool;
+ }
+
+ return consumerPool = createConsumerPool(context, getLogger());
+ }
+
+ protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
+ final int maxLeases = context.getMaxConcurrentTasks();
+ final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
+
+ final Map props = new HashMap<>();
+ KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props);
+ props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
+ props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ final String topicListing = context.getProperty(ConsumeKafkaRecord_1_0.TOPICS).evaluateAttributeExpressions().getValue();
+ final String topicType = context.getProperty(ConsumeKafkaRecord_1_0.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
+ final List topics = new ArrayList<>();
+ final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
+ final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
+
+ final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
+ final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
+ final boolean honorTransactions = context.getProperty(HONOR_TRANSACTIONS).asBoolean();
+
+ final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
+ final Charset charset = Charset.forName(charsetName);
+
+ final String headerNameRegex = context.getProperty(HEADER_NAME_REGEX).getValue();
+ final Pattern headerNamePattern = headerNameRegex == null ? null : Pattern.compile(headerNameRegex);
+
+ if (topicType.equals(TOPIC_NAME.getValue())) {
+ for (final String topic : topicListing.split(",", 100)) {
+ final String trimmedName = topic.trim();
+ if (!trimmedName.isEmpty()) {
+ topics.add(trimmedName);
+ }
+ }
+
+ return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topics, maxUncommittedTime, securityProtocol,
+ bootstrapServers, log, honorTransactions, charset, headerNamePattern);
+ } else if (topicType.equals(TOPIC_PATTERN.getValue())) {
+ final Pattern topicPattern = Pattern.compile(topicListing.trim());
+ return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topicPattern, maxUncommittedTime, securityProtocol,
+ bootstrapServers, log, honorTransactions, charset, headerNamePattern);
+ } else {
+ getLogger().error("Subscription type has an unknown value {}", new Object[] {topicType});
+ return null;
+ }
+ }
+
+ @OnUnscheduled
+ public void interruptActiveThreads() {
+ // There are known issues with the Kafka client library that result in the client code hanging
+ // indefinitely when unable to communicate with the broker. In order to address this, we will wait
+ // up to 30 seconds for the Threads to finish and then will call Consumer.wakeup() to trigger the
+ // thread to wakeup when it is blocked, waiting on a response.
+ final long nanosToWait = TimeUnit.SECONDS.toNanos(5L);
+ final long start = System.nanoTime();
+ while (System.nanoTime() - start < nanosToWait && !activeLeases.isEmpty()) {
+ try {
+ Thread.sleep(100L);
+ } catch (final InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ return;
+ }
+ }
+
+ if (!activeLeases.isEmpty()) {
+ int count = 0;
+ for (final ConsumerLease lease : activeLeases) {
+ getLogger().info("Consumer {} has not finished after waiting 30 seconds; will attempt to wake-up the lease", new Object[] {lease});
+ lease.wakeup();
+ count++;
+ }
+
+ getLogger().info("Woke up {} consumers", new Object[] {count});
+ }
+
+ activeLeases.clear();
+ }
+
+ @Override
+ public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
+ final ConsumerPool pool = getConsumerPool(context);
+ if (pool == null) {
+ context.yield();
+ return;
+ }
+
+ try (final ConsumerLease lease = pool.obtainConsumer(session, context)) {
+ if (lease == null) {
+ context.yield();
+ return;
+ }
+
+ activeLeases.add(lease);
+ try {
+ while (this.isScheduled() && lease.continuePolling()) {
+ lease.poll();
+ }
+ if (this.isScheduled() && !lease.commit()) {
+ context.yield();
+ }
+ } catch (final WakeupException we) {
+ getLogger().warn("Was interrupted while trying to communicate with Kafka with lease {}. "
+ + "Will roll back session and discard any partially received data.", new Object[] {lease});
+ } catch (final KafkaException kex) {
+ getLogger().error("Exception while interacting with Kafka so will close the lease {} due to {}",
+ new Object[]{lease, kex}, kex);
+ } catch (final Throwable t) {
+ getLogger().error("Exception while processing data from kafka so will close the lease {} due to {}",
+ new Object[]{lease, t}, t);
+ } finally {
+ activeLeases.remove(lease);
+ }
+ }
+ }
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka_1_0.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka_1_0.java
new file mode 100644
index 0000000000..fdc2cb33a6
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafka_1_0.java
@@ -0,0 +1,386 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.concurrent.TimeUnit;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.errors.WakeupException;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.nifi.annotation.behavior.DynamicProperty;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnStopped;
+import org.apache.nifi.annotation.lifecycle.OnUnscheduled;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.HEX_ENCODING;
+import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.UTF8_ENCODING;
+
+@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 1.0 Consumer API. "
+ + "The complementary NiFi processor for sending messages is PublishKafka_1_0.")
+@Tags({"Kafka", "Get", "Ingest", "Ingress", "Topic", "PubSub", "Consume", "1.0"})
+@WritesAttributes({
+ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_COUNT, description = "The number of messages written if more than one"),
+ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_KEY, description = "The key of message if present and if single message. "
+ + "How the key is encoded depends on the value of the 'Key Attribute Encoding' property."),
+ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_OFFSET, description = "The offset of the message in the partition of the topic."),
+ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_PARTITION, description = "The partition of the topic the message or message bundle is from"),
+ @WritesAttribute(attribute = KafkaProcessorUtils.KAFKA_TOPIC, description = "The topic the message or message bundle is from")
+})
+@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN)
+@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
+ description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ + " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ + " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ")
+public class ConsumeKafka_1_0 extends AbstractProcessor {
+
+ static final AllowableValue OFFSET_EARLIEST = new AllowableValue("earliest", "earliest", "Automatically reset the offset to the earliest offset");
+
+ static final AllowableValue OFFSET_LATEST = new AllowableValue("latest", "latest", "Automatically reset the offset to the latest offset");
+
+ static final AllowableValue OFFSET_NONE = new AllowableValue("none", "none", "Throw exception to the consumer if no previous offset is found for the consumer's group");
+
+ static final AllowableValue TOPIC_NAME = new AllowableValue("names", "names", "Topic is a full topic name or comma separated list of names");
+
+ static final AllowableValue TOPIC_PATTERN = new AllowableValue("pattern", "pattern", "Topic is a regex using the Java Pattern syntax");
+
+ static final PropertyDescriptor TOPICS = new PropertyDescriptor.Builder()
+ .name("topic")
+ .displayName("Topic Name(s)")
+ .description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma separated.")
+ .required(true)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .build();
+
+ static final PropertyDescriptor TOPIC_TYPE = new PropertyDescriptor.Builder()
+ .name("topic_type")
+ .displayName("Topic Name Format")
+ .description("Specifies whether the Topic(s) provided are a comma separated list of names or a single regular expression")
+ .required(true)
+ .allowableValues(TOPIC_NAME, TOPIC_PATTERN)
+ .defaultValue(TOPIC_NAME.getValue())
+ .build();
+
+ static final PropertyDescriptor GROUP_ID = new PropertyDescriptor.Builder()
+ .name(ConsumerConfig.GROUP_ID_CONFIG)
+ .displayName("Group ID")
+ .description("A Group ID is used to identify consumers that are within the same consumer group. Corresponds to Kafka's 'group.id' property.")
+ .required(true)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .build();
+
+ static final PropertyDescriptor AUTO_OFFSET_RESET = new PropertyDescriptor.Builder()
+ .name(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG)
+ .displayName("Offset Reset")
+ .description("Allows you to manage the condition when there is no initial offset in Kafka or if the current offset does not exist any "
+ + "more on the server (e.g. because that data has been deleted). Corresponds to Kafka's 'auto.offset.reset' property.")
+ .required(true)
+ .allowableValues(OFFSET_EARLIEST, OFFSET_LATEST, OFFSET_NONE)
+ .defaultValue(OFFSET_LATEST.getValue())
+ .build();
+
+ static final PropertyDescriptor KEY_ATTRIBUTE_ENCODING = new PropertyDescriptor.Builder()
+ .name("key-attribute-encoding")
+ .displayName("Key Attribute Encoding")
+ .description("FlowFiles that are emitted have an attribute named '" + KafkaProcessorUtils.KAFKA_KEY + "'. This property dictates how the value of the attribute should be encoded.")
+ .required(true)
+ .defaultValue(UTF8_ENCODING.getValue())
+ .allowableValues(UTF8_ENCODING, HEX_ENCODING)
+ .build();
+
+ static final PropertyDescriptor MESSAGE_DEMARCATOR = new PropertyDescriptor.Builder()
+ .name("message-demarcator")
+ .displayName("Message Demarcator")
+ .required(false)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .description("Since KafkaConsumer receives messages in batches, you have an option to output FlowFiles which contains "
+ + "all Kafka messages in a single batch for a given topic and partition and this property allows you to provide a string (interpreted as UTF-8) to use "
+ + "for demarcating apart multiple Kafka messages. This is an optional property and if not provided each Kafka message received "
+ + "will result in a single FlowFile which "
+ + "time it is triggered. To enter special character such as 'new line' use CTRL+Enter or Shift+Enter depending on the OS")
+ .build();
+ static final PropertyDescriptor HEADER_NAME_REGEX = new PropertyDescriptor.Builder()
+ .name("header-name-regex")
+ .displayName("Headers to Add as Attributes (Regex)")
+ .description("A Regular Expression that is matched against all message headers. "
+ + "Any message header whose name matches the regex will be added to the FlowFile as an Attribute. "
+ + "If not specified, no Header values will be added as FlowFile attributes. If two messages have a different value for the same header and that header is selected by "
+ + "the provided regex, then those two messages must be added to different FlowFiles. As a result, users should be cautious about using a regex like "
+ + "\".*\" if messages are expected to have header values that are unique per message, such as an identifier or timestamp, because it will prevent NiFi from bundling "
+ + "the messages together efficiently.")
+ .addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .required(false)
+ .build();
+
+ static final PropertyDescriptor MAX_POLL_RECORDS = new PropertyDescriptor.Builder()
+ .name("max.poll.records")
+ .displayName("Max Poll Records")
+ .description("Specifies the maximum number of records Kafka should return in a single poll.")
+ .required(false)
+ .defaultValue("10000")
+ .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor MAX_UNCOMMITTED_TIME = new PropertyDescriptor.Builder()
+ .name("max-uncommit-offset-wait")
+ .displayName("Max Uncommitted Time")
+ .description("Specifies the maximum amount of time allowed to pass before offsets must be committed. "
+ + "This value impacts how often offsets will be committed. Committing offsets less often increases "
+ + "throughput but also increases the window of potential data duplication in the event of a rebalance "
+ + "or JVM restart between commits. This value is also related to maximum poll records and the use "
+ + "of a message demarcator. When using a message demarcator we can have far more uncommitted messages "
+ + "than when we're not as there is much less for us to keep track of in memory.")
+ .required(false)
+ .defaultValue("1 secs")
+ .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor HONOR_TRANSACTIONS = new PropertyDescriptor.Builder()
+ .name("honor-transactions")
+ .displayName("Honor Transactions")
+ .description("Specifies whether or not NiFi should honor transactional guarantees when communicating with Kafka. If false, the Processor will use an \"isolation level\" of "
+ + "read_uncomitted. This means that messages will be received as soon as they are written to Kafka but will be pulled, even if the producer cancels the transactions. If "
+ + "this value is true, NiFi will not receive any messages for which the producer's transaction was canceled, but this can result in some latency since the consumer must wait "
+ + "for the producer to finish its entire transaction instead of pulling as the messages become available.")
+ .expressionLanguageSupported(false)
+ .allowableValues("true", "false")
+ .defaultValue("true")
+ .required(true)
+ .build();
+ static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new PropertyDescriptor.Builder()
+ .name("message-header-encoding")
+ .displayName("Message Header Encoding")
+ .description("Any message header that is found on a Kafka message will be added to the outbound FlowFile as an attribute. "
+ + "This property indicates the Character Encoding to use for deserializing the headers.")
+ .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+ .defaultValue("UTF-8")
+ .required(false)
+ .build();
+
+
+ static final Relationship REL_SUCCESS = new Relationship.Builder()
+ .name("success")
+ .description("FlowFiles received from Kafka. Depending on demarcation strategy it is a flow file per message or a bundle of messages grouped by topic and partition.")
+ .build();
+
+ static final List DESCRIPTORS;
+ static final Set RELATIONSHIPS;
+
+ private volatile ConsumerPool consumerPool = null;
+ private final Set activeLeases = Collections.synchronizedSet(new HashSet<>());
+
+ static {
+ List descriptors = new ArrayList<>();
+ descriptors.addAll(KafkaProcessorUtils.getCommonPropertyDescriptors());
+ descriptors.add(TOPICS);
+ descriptors.add(TOPIC_TYPE);
+ descriptors.add(HONOR_TRANSACTIONS);
+ descriptors.add(GROUP_ID);
+ descriptors.add(AUTO_OFFSET_RESET);
+ descriptors.add(KEY_ATTRIBUTE_ENCODING);
+ descriptors.add(MESSAGE_DEMARCATOR);
+ descriptors.add(MESSAGE_HEADER_ENCODING);
+ descriptors.add(HEADER_NAME_REGEX);
+ descriptors.add(MAX_POLL_RECORDS);
+ descriptors.add(MAX_UNCOMMITTED_TIME);
+ DESCRIPTORS = Collections.unmodifiableList(descriptors);
+ RELATIONSHIPS = Collections.singleton(REL_SUCCESS);
+ }
+
+ @Override
+ public Set getRelationships() {
+ return RELATIONSHIPS;
+ }
+
+ @Override
+ protected List getSupportedPropertyDescriptors() {
+ return DESCRIPTORS;
+ }
+
+ @OnStopped
+ public void close() {
+ final ConsumerPool pool = consumerPool;
+ consumerPool = null;
+ if (pool != null) {
+ pool.close();
+ }
+ }
+
+ @Override
+ protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
+ return new PropertyDescriptor.Builder()
+ .description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
+ .name(propertyDescriptorName).addValidator(new KafkaProcessorUtils.KafkaConfigValidator(ConsumerConfig.class)).dynamic(true)
+ .build();
+ }
+
+ @Override
+ protected Collection customValidate(final ValidationContext validationContext) {
+ return KafkaProcessorUtils.validateCommonProperties(validationContext);
+ }
+
+ private synchronized ConsumerPool getConsumerPool(final ProcessContext context) {
+ ConsumerPool pool = consumerPool;
+ if (pool != null) {
+ return pool;
+ }
+
+ return consumerPool = createConsumerPool(context, getLogger());
+ }
+
+ protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
+ final int maxLeases = context.getMaxConcurrentTasks();
+ final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
+ final byte[] demarcator = context.getProperty(ConsumeKafka_1_0.MESSAGE_DEMARCATOR).isSet()
+ ? context.getProperty(ConsumeKafka_1_0.MESSAGE_DEMARCATOR).evaluateAttributeExpressions().getValue().getBytes(StandardCharsets.UTF_8)
+ : null;
+ final Map props = new HashMap<>();
+ KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props);
+ props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
+ props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+
+ final String topicListing = context.getProperty(ConsumeKafka_1_0.TOPICS).evaluateAttributeExpressions().getValue();
+ final String topicType = context.getProperty(ConsumeKafka_1_0.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
+ final List topics = new ArrayList<>();
+ final String keyEncoding = context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue();
+ final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
+ final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
+ final boolean honorTransactions = context.getProperty(HONOR_TRANSACTIONS).asBoolean();
+
+ final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
+ final Charset charset = Charset.forName(charsetName);
+
+ final String headerNameRegex = context.getProperty(HEADER_NAME_REGEX).getValue();
+ final Pattern headerNamePattern = headerNameRegex == null ? null : Pattern.compile(headerNameRegex);
+
+ if (topicType.equals(TOPIC_NAME.getValue())) {
+ for (final String topic : topicListing.split(",", 100)) {
+ final String trimmedName = topic.trim();
+ if (!trimmedName.isEmpty()) {
+ topics.add(trimmedName);
+ }
+ }
+
+ return new ConsumerPool(maxLeases, demarcator, props, topics, maxUncommittedTime, keyEncoding, securityProtocol,
+ bootstrapServers, log, honorTransactions, charset, headerNamePattern);
+ } else if (topicType.equals(TOPIC_PATTERN.getValue())) {
+ final Pattern topicPattern = Pattern.compile(topicListing.trim());
+ return new ConsumerPool(maxLeases, demarcator, props, topicPattern, maxUncommittedTime, keyEncoding, securityProtocol,
+ bootstrapServers, log, honorTransactions, charset, headerNamePattern);
+ } else {
+ getLogger().error("Subscription type has an unknown value {}", new Object[] {topicType});
+ return null;
+ }
+ }
+
+ @OnUnscheduled
+ public void interruptActiveThreads() {
+ // There are known issues with the Kafka client library that result in the client code hanging
+ // indefinitely when unable to communicate with the broker. In order to address this, we will wait
+ // up to 30 seconds for the Threads to finish and then will call Consumer.wakeup() to trigger the
+ // thread to wakeup when it is blocked, waiting on a response.
+ final long nanosToWait = TimeUnit.SECONDS.toNanos(5L);
+ final long start = System.nanoTime();
+ while (System.nanoTime() - start < nanosToWait && !activeLeases.isEmpty()) {
+ try {
+ Thread.sleep(100L);
+ } catch (final InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ return;
+ }
+ }
+
+ if (!activeLeases.isEmpty()) {
+ int count = 0;
+ for (final ConsumerLease lease : activeLeases) {
+ getLogger().info("Consumer {} has not finished after waiting 30 seconds; will attempt to wake-up the lease", new Object[] {lease});
+ lease.wakeup();
+ count++;
+ }
+
+ getLogger().info("Woke up {} consumers", new Object[] {count});
+ }
+
+ activeLeases.clear();
+ }
+
+ @Override
+ public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
+ final ConsumerPool pool = getConsumerPool(context);
+ if (pool == null) {
+ context.yield();
+ return;
+ }
+
+ try (final ConsumerLease lease = pool.obtainConsumer(session, context)) {
+ if (lease == null) {
+ context.yield();
+ return;
+ }
+
+ activeLeases.add(lease);
+ try {
+ while (this.isScheduled() && lease.continuePolling()) {
+ lease.poll();
+ }
+ if (this.isScheduled() && !lease.commit()) {
+ context.yield();
+ }
+ } catch (final WakeupException we) {
+ getLogger().warn("Was interrupted while trying to communicate with Kafka with lease {}. "
+ + "Will roll back session and discard any partially received data.", new Object[] {lease});
+ } catch (final KafkaException kex) {
+ getLogger().error("Exception while interacting with Kafka so will close the lease {} due to {}",
+ new Object[]{lease, kex}, kex);
+ } catch (final Throwable t) {
+ getLogger().error("Exception while processing data from kafka so will close the lease {} due to {}",
+ new Object[]{lease, t}, t);
+ } finally {
+ activeLeases.remove(lease);
+ }
+ }
+ }
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java
new file mode 100644
index 0000000000..a2a449c09c
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java
@@ -0,0 +1,701 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import static org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_1_0.REL_PARSE_FAILURE;
+import static org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_1_0.REL_SUCCESS;
+import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.HEX_ENCODING;
+import static org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.UTF8_ENCODING;
+
+import java.io.ByteArrayInputStream;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import javax.xml.bind.DatatypeConverter;
+
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.header.Header;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.RecordSetWriter;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+import org.apache.nifi.serialization.WriteResult;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordSchema;
+
+/**
+ * This class represents a lease to access a Kafka Consumer object. The lease is
+ * intended to be obtained from a ConsumerPool. The lease is closeable to allow
+ * for the clean model of a try w/resources whereby non-exceptional cases mean
+ * the lease will be returned to the pool for future use by others. A given
+ * lease may only belong to a single thread a time.
+ */
+public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListener {
+
+ private final long maxWaitMillis;
+ private final Consumer kafkaConsumer;
+ private final ComponentLog logger;
+ private final byte[] demarcatorBytes;
+ private final String keyEncoding;
+ private final String securityProtocol;
+ private final String bootstrapServers;
+ private final RecordSetWriterFactory writerFactory;
+ private final RecordReaderFactory readerFactory;
+ private final Charset headerCharacterSet;
+ private final Pattern headerNamePattern;
+ private boolean poisoned = false;
+ //used for tracking demarcated flowfiles to their TopicPartition so we can append
+ //to them on subsequent poll calls
+ private final Map bundleMap = new HashMap<>();
+ private final Map uncommittedOffsetsMap = new HashMap<>();
+ private long leaseStartNanos = -1;
+ private boolean lastPollEmpty = false;
+ private int totalMessages = 0;
+
+ ConsumerLease(
+ final long maxWaitMillis,
+ final Consumer kafkaConsumer,
+ final byte[] demarcatorBytes,
+ final String keyEncoding,
+ final String securityProtocol,
+ final String bootstrapServers,
+ final RecordReaderFactory readerFactory,
+ final RecordSetWriterFactory writerFactory,
+ final ComponentLog logger,
+ final Charset headerCharacterSet,
+ final Pattern headerNamePattern) {
+ this.maxWaitMillis = maxWaitMillis;
+ this.kafkaConsumer = kafkaConsumer;
+ this.demarcatorBytes = demarcatorBytes;
+ this.keyEncoding = keyEncoding;
+ this.securityProtocol = securityProtocol;
+ this.bootstrapServers = bootstrapServers;
+ this.readerFactory = readerFactory;
+ this.writerFactory = writerFactory;
+ this.logger = logger;
+ this.headerCharacterSet = headerCharacterSet;
+ this.headerNamePattern = headerNamePattern;
+ }
+
+ /**
+ * clears out internal state elements excluding session and consumer as
+ * those are managed by the pool itself
+ */
+ private void resetInternalState() {
+ bundleMap.clear();
+ uncommittedOffsetsMap.clear();
+ leaseStartNanos = -1;
+ lastPollEmpty = false;
+ totalMessages = 0;
+ }
+
+ /**
+ * Kafka will call this method whenever it is about to rebalance the
+ * consumers for the given partitions. We'll simply take this to mean that
+ * we need to quickly commit what we've got and will return the consumer to
+ * the pool. This method will be called during the poll() method call of
+ * this class and will be called by the same thread calling poll according
+ * to the Kafka API docs. After this method executes the session and kafka
+ * offsets are committed and this lease is closed.
+ *
+ * @param partitions partitions being reassigned
+ */
+ @Override
+ public void onPartitionsRevoked(final Collection partitions) {
+ logger.debug("Rebalance Alert: Paritions '{}' revoked for lease '{}' with consumer '{}'", new Object[]{partitions, this, kafkaConsumer});
+ //force a commit here. Can reuse the session and consumer after this but must commit now to avoid duplicates if kafka reassigns partition
+ commit();
+ }
+
+ /**
+ * This will be called by Kafka when the rebalance has completed. We don't
+ * need to do anything with this information other than optionally log it as
+ * by this point we've committed what we've got and moved on.
+ *
+ * @param partitions topic partition set being reassigned
+ */
+ @Override
+ public void onPartitionsAssigned(final Collection partitions) {
+ logger.debug("Rebalance Alert: Paritions '{}' assigned for lease '{}' with consumer '{}'", new Object[]{partitions, this, kafkaConsumer});
+ }
+
+ /**
+ * Executes a poll on the underlying Kafka Consumer and creates any new
+ * flowfiles necessary or appends to existing ones if in demarcation mode.
+ */
+ void poll() {
+ /**
+ * Implementation note:
+ * Even if ConsumeKafka is not scheduled to poll due to downstream connection back-pressure is engaged,
+ * for longer than session.timeout.ms (defaults to 10 sec), Kafka consumer sends heartbeat from background thread.
+ * If this situation lasts longer than max.poll.interval.ms (defaults to 5 min), Kafka consumer sends
+ * Leave Group request to Group Coordinator. When ConsumeKafka processor is scheduled again, Kafka client checks
+ * if this client instance is still a part of consumer group. If not, it rejoins before polling messages.
+ * This behavior has been fixed via Kafka KIP-62 and available from Kafka client 0.10.1.0.
+ */
+ try {
+ final ConsumerRecords records = kafkaConsumer.poll(10);
+ lastPollEmpty = records.count() == 0;
+ processRecords(records);
+ } catch (final ProcessException pe) {
+ throw pe;
+ } catch (final Throwable t) {
+ this.poison();
+ throw t;
+ }
+ }
+
+ /**
+ * Notifies Kafka to commit the offsets for the specified topic/partition
+ * pairs to the specified offsets w/the given metadata. This can offer
+ * higher performance than the other commitOffsets call as it allows the
+ * kafka client to collect more data from Kafka before committing the
+ * offsets.
+ *
+ * if false then we didn't do anything and should probably yield if true
+ * then we committed new data
+ *
+ */
+ boolean commit() {
+ if (uncommittedOffsetsMap.isEmpty()) {
+ resetInternalState();
+ return false;
+ }
+ try {
+ /**
+ * Committing the nifi session then the offsets means we have an at
+ * least once guarantee here. If we reversed the order we'd have at
+ * most once.
+ */
+ final Collection bundledFlowFiles = getBundles();
+ if (!bundledFlowFiles.isEmpty()) {
+ getProcessSession().transfer(bundledFlowFiles, REL_SUCCESS);
+ }
+ getProcessSession().commit();
+
+ final Map offsetsMap = uncommittedOffsetsMap;
+ kafkaConsumer.commitSync(offsetsMap);
+ resetInternalState();
+ return true;
+ } catch (final IOException ioe) {
+ poison();
+ logger.error("Failed to finish writing out FlowFile bundle", ioe);
+ throw new ProcessException(ioe);
+ } catch (final KafkaException kex) {
+ poison();
+ logger.warn("Duplicates are likely as we were able to commit the process"
+ + " session but received an exception from Kafka while committing"
+ + " offsets.");
+ throw kex;
+ } catch (final Throwable t) {
+ poison();
+ throw t;
+ }
+ }
+
+ /**
+ * Indicates whether we should continue polling for data. If we are not
+ * writing data with a demarcator then we're writing individual flow files
+ * per kafka message therefore we must be very mindful of memory usage for
+ * the flow file objects (not their content) being held in memory. The
+ * content of kafka messages will be written to the content repository
+ * immediately upon each poll call but we must still be mindful of how much
+ * memory can be used in each poll call. We will indicate that we should
+ * stop polling our last poll call produced no new results or if we've
+ * polling and processing data longer than the specified maximum polling
+ * time or if we have reached out specified max flow file limit or if a
+ * rebalance has been initiated for one of the partitions we're watching;
+ * otherwise true.
+ *
+ * @return true if should keep polling; false otherwise
+ */
+ boolean continuePolling() {
+ //stop if the last poll produced new no data
+ if (lastPollEmpty) {
+ return false;
+ }
+
+ //stop if we've gone past our desired max uncommitted wait time
+ if (leaseStartNanos < 0) {
+ leaseStartNanos = System.nanoTime();
+ }
+ final long durationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos);
+ if (durationMillis > maxWaitMillis) {
+ return false;
+ }
+
+ //stop if we've generated enough flowfiles that we need to be concerned about memory usage for the objects
+ if (bundleMap.size() > 200) { //a magic number - the number of simultaneous bundles to track
+ return false;
+ } else {
+ return totalMessages < 1000;//admittedlly a magic number - good candidate for processor property
+ }
+ }
+
+ /**
+ * Indicates that the underlying session and consumer should be immediately
+ * considered invalid. Once closed the session will be rolled back and the
+ * pool should destroy the underlying consumer. This is useful if due to
+ * external reasons, such as the processor no longer being scheduled, this
+ * lease should be terminated immediately.
+ */
+ private void poison() {
+ poisoned = true;
+ }
+
+ /**
+ * @return true if this lease has been poisoned; false otherwise
+ */
+ boolean isPoisoned() {
+ return poisoned;
+ }
+
+ /**
+ * Trigger the consumer's {@link KafkaConsumer#wakeup() wakeup()} method.
+ */
+ public void wakeup() {
+ kafkaConsumer.wakeup();
+ }
+
+ /**
+ * Abstract method that is intended to be extended by the pool that created
+ * this ConsumerLease object. It should ensure that the session given to
+ * create this session is rolled back and that the underlying kafka consumer
+ * is either returned to the pool for continued use or destroyed if this
+ * lease has been poisoned. It can only be called once. Calling it more than
+ * once can result in undefined and non threadsafe behavior.
+ */
+ @Override
+ public void close() {
+ resetInternalState();
+ }
+
+ public abstract ProcessSession getProcessSession();
+
+ public abstract void yield();
+
+ private void processRecords(final ConsumerRecords records) {
+ records.partitions().stream().forEach(partition -> {
+ List> messages = records.records(partition);
+ if (!messages.isEmpty()) {
+ //update maximum offset map for this topic partition
+ long maxOffset = messages.stream()
+ .mapToLong(record -> record.offset())
+ .max()
+ .getAsLong();
+
+ //write records to content repository and session
+ if (demarcatorBytes != null) {
+ writeDemarcatedData(getProcessSession(), messages, partition);
+ } else if (readerFactory != null && writerFactory != null) {
+ writeRecordData(getProcessSession(), messages, partition);
+ } else {
+ messages.stream().forEach(message -> {
+ writeData(getProcessSession(), message, partition);
+ });
+ }
+
+ totalMessages += messages.size();
+ uncommittedOffsetsMap.put(partition, new OffsetAndMetadata(maxOffset + 1L));
+ }
+ });
+ }
+
+ private static String encodeKafkaKey(final byte[] key, final String encoding) {
+ if (key == null) {
+ return null;
+ }
+
+ if (HEX_ENCODING.getValue().equals(encoding)) {
+ return DatatypeConverter.printHexBinary(key);
+ } else if (UTF8_ENCODING.getValue().equals(encoding)) {
+ return new String(key, StandardCharsets.UTF_8);
+ } else {
+ return null; // won't happen because it is guaranteed by the Allowable Values
+ }
+ }
+
+ private Collection getBundles() throws IOException {
+ final List flowFiles = new ArrayList<>();
+ for (final BundleTracker tracker : bundleMap.values()) {
+ final boolean includeBundle = processBundle(tracker);
+ if (includeBundle) {
+ flowFiles.add(tracker.flowFile);
+ }
+ }
+ return flowFiles;
+ }
+
+ private boolean processBundle(final BundleTracker bundle) throws IOException {
+ final RecordSetWriter writer = bundle.recordWriter;
+ if (writer != null) {
+ final WriteResult writeResult;
+
+ try {
+ writeResult = writer.finishRecordSet();
+ } finally {
+ writer.close();
+ }
+
+ if (writeResult.getRecordCount() == 0) {
+ getProcessSession().remove(bundle.flowFile);
+ return false;
+ }
+
+ final Map attributes = new HashMap<>();
+ attributes.putAll(writeResult.getAttributes());
+ attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
+
+ bundle.flowFile = getProcessSession().putAllAttributes(bundle.flowFile, attributes);
+ }
+
+ populateAttributes(bundle);
+ return true;
+ }
+
+ private void writeData(final ProcessSession session, ConsumerRecord record, final TopicPartition topicPartition) {
+ FlowFile flowFile = session.create();
+ final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding);
+ tracker.incrementRecordCount(1);
+ final byte[] value = record.value();
+ if (value != null) {
+ flowFile = session.write(flowFile, out -> {
+ out.write(value);
+ });
+ }
+ flowFile = session.putAllAttributes(flowFile, getAttributes(record));
+ tracker.updateFlowFile(flowFile);
+ populateAttributes(tracker);
+ session.transfer(tracker.flowFile, REL_SUCCESS);
+ }
+
+ private void writeDemarcatedData(final ProcessSession session, final List> records, final TopicPartition topicPartition) {
+ // Group the Records by their BundleInformation
+ final Map>> map = records.stream()
+ .collect(Collectors.groupingBy(rec -> new BundleInformation(topicPartition, null, getAttributes(rec))));
+
+ for (final Map.Entry>> entry : map.entrySet()) {
+ final BundleInformation bundleInfo = entry.getKey();
+ final List> recordList = entry.getValue();
+
+ final boolean demarcateFirstRecord;
+
+ BundleTracker tracker = bundleMap.get(bundleInfo);
+
+ FlowFile flowFile;
+ if (tracker == null) {
+ tracker = new BundleTracker(recordList.get(0), topicPartition, keyEncoding);
+ flowFile = session.create();
+ flowFile = session.putAllAttributes(flowFile, bundleInfo.attributes);
+ tracker.updateFlowFile(flowFile);
+ demarcateFirstRecord = false; //have not yet written records for this topic/partition in this lease
+ } else {
+ demarcateFirstRecord = true; //have already been writing records for this topic/partition in this lease
+ }
+ flowFile = tracker.flowFile;
+
+ tracker.incrementRecordCount(recordList.size());
+ flowFile = session.append(flowFile, out -> {
+ boolean useDemarcator = demarcateFirstRecord;
+ for (final ConsumerRecord record : recordList) {
+ if (useDemarcator) {
+ out.write(demarcatorBytes);
+ }
+ final byte[] value = record.value();
+ if (value != null) {
+ out.write(record.value());
+ }
+ useDemarcator = true;
+ }
+ });
+
+ tracker.updateFlowFile(flowFile);
+ bundleMap.put(bundleInfo, tracker);
+ }
+ }
+
+ private void handleParseFailure(final ConsumerRecord consumerRecord, final ProcessSession session, final Exception cause) {
+ handleParseFailure(consumerRecord, session, cause, "Failed to parse message from Kafka using the configured Record Reader. "
+ + "Will route message as its own FlowFile to the 'parse.failure' relationship");
+ }
+
+ private void handleParseFailure(final ConsumerRecord consumerRecord, final ProcessSession session, final Exception cause, final String message) {
+ // If we are unable to parse the data, we need to transfer it to 'parse failure' relationship
+ final Map attributes = getAttributes(consumerRecord);
+ attributes.put(KafkaProcessorUtils.KAFKA_OFFSET, String.valueOf(consumerRecord.offset()));
+ attributes.put(KafkaProcessorUtils.KAFKA_PARTITION, String.valueOf(consumerRecord.partition()));
+ attributes.put(KafkaProcessorUtils.KAFKA_TOPIC, consumerRecord.topic());
+
+ FlowFile failureFlowFile = session.create();
+
+ final byte[] value = consumerRecord.value();
+ if (value != null) {
+ failureFlowFile = session.write(failureFlowFile, out -> out.write(value));
+ }
+ failureFlowFile = session.putAllAttributes(failureFlowFile, attributes);
+
+ final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, consumerRecord.topic());
+ session.getProvenanceReporter().receive(failureFlowFile, transitUri);
+
+ session.transfer(failureFlowFile, REL_PARSE_FAILURE);
+
+ if (cause == null) {
+ logger.error(message);
+ } else {
+ logger.error(message, cause);
+ }
+
+ session.adjustCounter("Parse Failures", 1, false);
+ }
+
+ private Map getAttributes(final ConsumerRecord, ?> consumerRecord) {
+ final Map attributes = new HashMap<>();
+ if (headerNamePattern == null) {
+ return attributes;
+ }
+
+ for (final Header header : consumerRecord.headers()) {
+ final String attributeName = header.key();
+ if (headerNamePattern.matcher(attributeName).matches()) {
+ attributes.put(attributeName, new String(header.value(), headerCharacterSet));
+ }
+ }
+
+ return attributes;
+ }
+
+ private void writeRecordData(final ProcessSession session, final List> records, final TopicPartition topicPartition) {
+ // In order to obtain a RecordReader from the RecordReaderFactory, we need to give it a FlowFile.
+ // We don't want to create a new FlowFile for each record that we receive, so we will just create
+ // a "temporary flowfile" that will be removed in the finally block below and use that to pass to
+ // the createRecordReader method.
+ RecordSetWriter writer = null;
+ try {
+ for (final ConsumerRecord consumerRecord : records) {
+ final Map attributes = getAttributes(consumerRecord);
+
+ final byte[] recordBytes = consumerRecord.value() == null ? new byte[0] : consumerRecord.value();
+ try (final InputStream in = new ByteArrayInputStream(recordBytes)) {
+ final RecordReader reader;
+
+ try {
+ reader = readerFactory.createRecordReader(attributes, in, logger);
+ } catch (final Exception e) {
+ handleParseFailure(consumerRecord, session, e);
+ continue;
+ }
+
+ Record record;
+ while ((record = reader.nextRecord()) != null) {
+ // Determine the bundle for this record.
+ final RecordSchema recordSchema = record.getSchema();
+ final BundleInformation bundleInfo = new BundleInformation(topicPartition, recordSchema, attributes);
+
+ BundleTracker tracker = bundleMap.get(bundleInfo);
+ if (tracker == null) {
+ FlowFile flowFile = session.create();
+ flowFile = session.putAllAttributes(flowFile, attributes);
+
+ final OutputStream rawOut = session.write(flowFile);
+
+ final RecordSchema writeSchema;
+ try {
+ writeSchema = writerFactory.getSchema(flowFile.getAttributes(), recordSchema);
+ } catch (final Exception e) {
+ logger.error("Failed to obtain Schema for FlowFile. Will roll back the Kafka message offsets.", e);
+
+ try {
+ rollback(topicPartition);
+ } catch (final Exception rollbackException) {
+ logger.warn("Attempted to rollback Kafka message offset but was unable to do so", rollbackException);
+ }
+
+ yield();
+ throw new ProcessException(e);
+ }
+
+ writer = writerFactory.createWriter(logger, writeSchema, rawOut);
+ writer.beginRecordSet();
+
+ tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer);
+ tracker.updateFlowFile(flowFile);
+ bundleMap.put(bundleInfo, tracker);
+ } else {
+ writer = tracker.recordWriter;
+ }
+
+ try {
+ writer.write(record);
+ } catch (final RuntimeException re) {
+ handleParseFailure(consumerRecord, session, re, "Failed to write message from Kafka using the configured Record Writer. "
+ + "Will route message as its own FlowFile to the 'parse.failure' relationship");
+ continue;
+ }
+
+ tracker.incrementRecordCount(1L);
+ session.adjustCounter("Records Received", records.size(), false);
+ }
+ }
+ }
+ } catch (final Exception e) {
+ logger.error("Failed to properly receive messages from Kafka. Will roll back session and any un-committed offsets from Kafka.", e);
+
+ try {
+ if (writer != null) {
+ writer.close();
+ }
+ } catch (final Exception ioe) {
+ logger.warn("Failed to close Record Writer", ioe);
+ }
+
+ try {
+ rollback(topicPartition);
+ } catch (final Exception rollbackException) {
+ logger.warn("Attempted to rollback Kafka message offset but was unable to do so", rollbackException);
+ }
+
+ throw new ProcessException(e);
+ }
+ }
+
+
+ private void rollback(final TopicPartition topicPartition) {
+ OffsetAndMetadata offsetAndMetadata = uncommittedOffsetsMap.get(topicPartition);
+ if (offsetAndMetadata == null) {
+ offsetAndMetadata = kafkaConsumer.committed(topicPartition);
+ }
+
+ final long offset = offsetAndMetadata.offset();
+ kafkaConsumer.seek(topicPartition, offset);
+ }
+
+
+
+ private void populateAttributes(final BundleTracker tracker) {
+ final Map kafkaAttrs = new HashMap<>();
+ kafkaAttrs.put(KafkaProcessorUtils.KAFKA_OFFSET, String.valueOf(tracker.initialOffset));
+ if (tracker.key != null && tracker.totalRecords == 1) {
+ kafkaAttrs.put(KafkaProcessorUtils.KAFKA_KEY, tracker.key);
+ }
+ kafkaAttrs.put(KafkaProcessorUtils.KAFKA_PARTITION, String.valueOf(tracker.partition));
+ kafkaAttrs.put(KafkaProcessorUtils.KAFKA_TOPIC, tracker.topic);
+ if (tracker.totalRecords > 1) {
+ // Add a record.count attribute to remain consistent with other record-oriented processors. If not
+ // reading/writing records, then use "kafka.count" attribute.
+ if (tracker.recordWriter == null) {
+ kafkaAttrs.put(KafkaProcessorUtils.KAFKA_COUNT, String.valueOf(tracker.totalRecords));
+ } else {
+ kafkaAttrs.put("record.count", String.valueOf(tracker.totalRecords));
+ }
+ }
+ final FlowFile newFlowFile = getProcessSession().putAllAttributes(tracker.flowFile, kafkaAttrs);
+ final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos);
+ final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, tracker.topic);
+ getProcessSession().getProvenanceReporter().receive(newFlowFile, transitUri, executionDurationMillis);
+ tracker.updateFlowFile(newFlowFile);
+ }
+
+ private static class BundleTracker {
+
+ final long initialOffset;
+ final int partition;
+ final String topic;
+ final String key;
+ final RecordSetWriter recordWriter;
+ FlowFile flowFile;
+ long totalRecords = 0;
+
+ private BundleTracker(final ConsumerRecord initialRecord, final TopicPartition topicPartition, final String keyEncoding) {
+ this(initialRecord, topicPartition, keyEncoding, null);
+ }
+
+ private BundleTracker(final ConsumerRecord initialRecord, final TopicPartition topicPartition, final String keyEncoding, final RecordSetWriter recordWriter) {
+ this.initialOffset = initialRecord.offset();
+ this.partition = topicPartition.partition();
+ this.topic = topicPartition.topic();
+ this.recordWriter = recordWriter;
+ this.key = encodeKafkaKey(initialRecord.key(), keyEncoding);
+ }
+
+ private void incrementRecordCount(final long count) {
+ totalRecords += count;
+ }
+
+ private void updateFlowFile(final FlowFile flowFile) {
+ this.flowFile = flowFile;
+ }
+
+ }
+
+ private static class BundleInformation {
+ private final TopicPartition topicPartition;
+ private final RecordSchema schema;
+ private final Map attributes;
+
+ public BundleInformation(final TopicPartition topicPartition, final RecordSchema schema, final Map attributes) {
+ this.topicPartition = topicPartition;
+ this.schema = schema;
+ this.attributes = attributes;
+ }
+
+ @Override
+ public int hashCode() {
+ return 41 + 13 * topicPartition.hashCode() + ((schema == null) ? 0 : 13 * schema.hashCode()) + ((attributes == null) ? 0 : 13 * attributes.hashCode());
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (obj == this) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (!(obj instanceof BundleInformation)) {
+ return false;
+ }
+
+ final BundleInformation other = (BundleInformation) obj;
+ return Objects.equals(topicPartition, other.topicPartition) && Objects.equals(schema, other.schema) && Objects.equals(attributes, other.attributes);
+ }
+ }
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java
new file mode 100644
index 0000000000..a7bd96d9df
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPool.java
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.io.Closeable;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.common.KafkaException;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+
+/**
+ * A pool of Kafka Consumers for a given topic. Consumers can be obtained by
+ * calling 'obtainConsumer'. Once closed the pool is ready to be immediately
+ * used again.
+ */
+public class ConsumerPool implements Closeable {
+
+ private final BlockingQueue pooledLeases;
+ private final List topics;
+ private final Pattern topicPattern;
+ private final Map kafkaProperties;
+ private final long maxWaitMillis;
+ private final ComponentLog logger;
+ private final byte[] demarcatorBytes;
+ private final String keyEncoding;
+ private final String securityProtocol;
+ private final String bootstrapServers;
+ private final boolean honorTransactions;
+ private final RecordReaderFactory readerFactory;
+ private final RecordSetWriterFactory writerFactory;
+ private final Charset headerCharacterSet;
+ private final Pattern headerNamePattern;
+ private final AtomicLong consumerCreatedCountRef = new AtomicLong();
+ private final AtomicLong consumerClosedCountRef = new AtomicLong();
+ private final AtomicLong leasesObtainedCountRef = new AtomicLong();
+
+ /**
+ * Creates a pool of KafkaConsumer objects that will grow up to the maximum
+ * indicated threads from the given context. Consumers are lazily
+ * initialized. We may elect to not create up to the maximum number of
+ * configured consumers if the broker reported lag time for all topics is
+ * below a certain threshold.
+ *
+ * @param maxConcurrentLeases max allowable consumers at once
+ * @param demarcator bytes to use as demarcator between messages; null or
+ * empty means no demarcator
+ * @param kafkaProperties properties to use to initialize kafka consumers
+ * @param topics the topics to subscribe to
+ * @param maxWaitMillis maximum time to wait for a given lease to acquire
+ * data before committing
+ * @param keyEncoding the encoding to use for the key of a kafka message if
+ * found
+ * @param securityProtocol the security protocol used
+ * @param bootstrapServers the bootstrap servers
+ * @param logger the logger to report any errors/warnings
+ */
+ public ConsumerPool(
+ final int maxConcurrentLeases,
+ final byte[] demarcator,
+ final Map kafkaProperties,
+ final List topics,
+ final long maxWaitMillis,
+ final String keyEncoding,
+ final String securityProtocol,
+ final String bootstrapServers,
+ final ComponentLog logger,
+ final boolean honorTransactions,
+ final Charset headerCharacterSet,
+ final Pattern headerNamePattern) {
+ this.pooledLeases = new ArrayBlockingQueue<>(maxConcurrentLeases);
+ this.maxWaitMillis = maxWaitMillis;
+ this.logger = logger;
+ this.demarcatorBytes = demarcator;
+ this.keyEncoding = keyEncoding;
+ this.securityProtocol = securityProtocol;
+ this.bootstrapServers = bootstrapServers;
+ this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
+ this.topics = Collections.unmodifiableList(topics);
+ this.topicPattern = null;
+ this.readerFactory = null;
+ this.writerFactory = null;
+ this.honorTransactions = honorTransactions;
+ this.headerCharacterSet = headerCharacterSet;
+ this.headerNamePattern = headerNamePattern;
+ }
+
+ public ConsumerPool(
+ final int maxConcurrentLeases,
+ final byte[] demarcator,
+ final Map kafkaProperties,
+ final Pattern topics,
+ final long maxWaitMillis,
+ final String keyEncoding,
+ final String securityProtocol,
+ final String bootstrapServers,
+ final ComponentLog logger,
+ final boolean honorTransactions,
+ final Charset headerCharacterSet,
+ final Pattern headerNamePattern) {
+ this.pooledLeases = new ArrayBlockingQueue<>(maxConcurrentLeases);
+ this.maxWaitMillis = maxWaitMillis;
+ this.logger = logger;
+ this.demarcatorBytes = demarcator;
+ this.keyEncoding = keyEncoding;
+ this.securityProtocol = securityProtocol;
+ this.bootstrapServers = bootstrapServers;
+ this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
+ this.topics = null;
+ this.topicPattern = topics;
+ this.readerFactory = null;
+ this.writerFactory = null;
+ this.honorTransactions = honorTransactions;
+ this.headerCharacterSet = headerCharacterSet;
+ this.headerNamePattern = headerNamePattern;
+ }
+
+ public ConsumerPool(
+ final int maxConcurrentLeases,
+ final RecordReaderFactory readerFactory,
+ final RecordSetWriterFactory writerFactory,
+ final Map kafkaProperties,
+ final Pattern topics,
+ final long maxWaitMillis,
+ final String securityProtocol,
+ final String bootstrapServers,
+ final ComponentLog logger,
+ final boolean honorTransactions,
+ final Charset headerCharacterSet,
+ final Pattern headerNamePattern) {
+ this.pooledLeases = new ArrayBlockingQueue<>(maxConcurrentLeases);
+ this.maxWaitMillis = maxWaitMillis;
+ this.logger = logger;
+ this.demarcatorBytes = null;
+ this.keyEncoding = null;
+ this.readerFactory = readerFactory;
+ this.writerFactory = writerFactory;
+ this.securityProtocol = securityProtocol;
+ this.bootstrapServers = bootstrapServers;
+ this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
+ this.topics = null;
+ this.topicPattern = topics;
+ this.honorTransactions = honorTransactions;
+ this.headerCharacterSet = headerCharacterSet;
+ this.headerNamePattern = headerNamePattern;
+ }
+
+ public ConsumerPool(
+ final int maxConcurrentLeases,
+ final RecordReaderFactory readerFactory,
+ final RecordSetWriterFactory writerFactory,
+ final Map kafkaProperties,
+ final List topics,
+ final long maxWaitMillis,
+ final String securityProtocol,
+ final String bootstrapServers,
+ final ComponentLog logger,
+ final boolean honorTransactions,
+ final Charset headerCharacterSet,
+ final Pattern headerNamePattern) {
+ this.pooledLeases = new ArrayBlockingQueue<>(maxConcurrentLeases);
+ this.maxWaitMillis = maxWaitMillis;
+ this.logger = logger;
+ this.demarcatorBytes = null;
+ this.keyEncoding = null;
+ this.readerFactory = readerFactory;
+ this.writerFactory = writerFactory;
+ this.securityProtocol = securityProtocol;
+ this.bootstrapServers = bootstrapServers;
+ this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
+ this.topics = topics;
+ this.topicPattern = null;
+ this.honorTransactions = honorTransactions;
+ this.headerCharacterSet = headerCharacterSet;
+ this.headerNamePattern = headerNamePattern;
+ }
+
+ /**
+ * Obtains a consumer from the pool if one is available or lazily
+ * initializes a new one if deemed necessary.
+ *
+ * @param session the session for which the consumer lease will be
+ * associated
+ * @param processContext the ProcessContext for which the consumer
+ * lease will be associated
+ * @return consumer to use or null if not available or necessary
+ */
+ public ConsumerLease obtainConsumer(final ProcessSession session, final ProcessContext processContext) {
+ SimpleConsumerLease lease = pooledLeases.poll();
+ if (lease == null) {
+ final Consumer consumer = createKafkaConsumer();
+ consumerCreatedCountRef.incrementAndGet();
+ /**
+ * For now return a new consumer lease. But we could later elect to
+ * have this return null if we determine the broker indicates that
+ * the lag time on all topics being monitored is sufficiently low.
+ * For now we should encourage conservative use of threads because
+ * having too many means we'll have at best useless threads sitting
+ * around doing frequent network calls and at worst having consumers
+ * sitting idle which could prompt excessive rebalances.
+ */
+ lease = new SimpleConsumerLease(consumer);
+ /**
+ * This subscription tightly couples the lease to the given
+ * consumer. They cannot be separated from then on.
+ */
+ if (topics != null) {
+ consumer.subscribe(topics, lease);
+ } else {
+ consumer.subscribe(topicPattern, lease);
+ }
+ }
+ lease.setProcessSession(session, processContext);
+
+ leasesObtainedCountRef.incrementAndGet();
+ return lease;
+ }
+
+ /**
+ * Exposed as protected method for easier unit testing
+ *
+ * @return consumer
+ * @throws KafkaException if unable to subscribe to the given topics
+ */
+ protected Consumer createKafkaConsumer() {
+ final Map properties = new HashMap<>(kafkaProperties);
+ if (honorTransactions) {
+ properties.put("isolation.level", "read_committed");
+ } else {
+ properties.put("isolation.level", "read_uncommitted");
+ }
+ final Consumer consumer = new KafkaConsumer<>(properties);
+ return consumer;
+ }
+
+ /**
+ * Closes all consumers in the pool. Can be safely called repeatedly.
+ */
+ @Override
+ public void close() {
+ final List leases = new ArrayList<>();
+ pooledLeases.drainTo(leases);
+ leases.stream().forEach((lease) -> {
+ lease.close(true);
+ });
+ }
+
+ private void closeConsumer(final Consumer, ?> consumer) {
+ consumerClosedCountRef.incrementAndGet();
+ try {
+ consumer.unsubscribe();
+ } catch (Exception e) {
+ logger.warn("Failed while unsubscribing " + consumer, e);
+ }
+
+ try {
+ consumer.close();
+ } catch (Exception e) {
+ logger.warn("Failed while closing " + consumer, e);
+ }
+ }
+
+ PoolStats getPoolStats() {
+ return new PoolStats(consumerCreatedCountRef.get(), consumerClosedCountRef.get(), leasesObtainedCountRef.get());
+ }
+
+ private class SimpleConsumerLease extends ConsumerLease {
+
+ private final Consumer consumer;
+ private volatile ProcessSession session;
+ private volatile ProcessContext processContext;
+ private volatile boolean closedConsumer;
+
+ private SimpleConsumerLease(final Consumer consumer) {
+ super(maxWaitMillis, consumer, demarcatorBytes, keyEncoding, securityProtocol, bootstrapServers,
+ readerFactory, writerFactory, logger, headerCharacterSet, headerNamePattern);
+ this.consumer = consumer;
+ }
+
+ void setProcessSession(final ProcessSession session, final ProcessContext context) {
+ this.session = session;
+ this.processContext = context;
+ }
+
+ @Override
+ public void yield() {
+ if (processContext != null) {
+ processContext.yield();
+ }
+ }
+
+ @Override
+ public ProcessSession getProcessSession() {
+ return session;
+ }
+
+ @Override
+ public void close() {
+ super.close();
+ close(false);
+ }
+
+ public void close(final boolean forceClose) {
+ if (closedConsumer) {
+ return;
+ }
+ super.close();
+ if (session != null) {
+ session.rollback();
+ setProcessSession(null, null);
+ }
+ if (forceClose || isPoisoned() || !pooledLeases.offer(this)) {
+ closedConsumer = true;
+ closeConsumer(consumer);
+ }
+ }
+ }
+
+ static final class PoolStats {
+
+ final long consumerCreatedCount;
+ final long consumerClosedCount;
+ final long leasesObtainedCount;
+
+ PoolStats(
+ final long consumerCreatedCount,
+ final long consumerClosedCount,
+ final long leasesObtainedCount
+ ) {
+ this.consumerCreatedCount = consumerCreatedCount;
+ this.consumerClosedCount = consumerClosedCount;
+ this.leasesObtainedCount = leasesObtainedCount;
+ }
+
+ @Override
+ public String toString() {
+ return "Created Consumers [" + consumerCreatedCount + "]\n"
+ + "Closed Consumers [" + consumerClosedCount + "]\n"
+ + "Leases Obtained [" + leasesObtainedCount + "]\n";
+ }
+
+ }
+
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java
new file mode 100644
index 0000000000..317b274e8a
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/InFlightMessageTracker.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.logging.ComponentLog;
+
+public class InFlightMessageTracker {
+ private final ConcurrentMap messageCountsByFlowFile = new ConcurrentHashMap<>();
+ private final ConcurrentMap failures = new ConcurrentHashMap<>();
+ private final Object progressMutex = new Object();
+ private final ComponentLog logger;
+
+ public InFlightMessageTracker(final ComponentLog logger) {
+ this.logger = logger;
+ }
+
+ public void incrementAcknowledgedCount(final FlowFile flowFile) {
+ final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts());
+ counter.incrementAcknowledgedCount();
+
+ synchronized (progressMutex) {
+ progressMutex.notify();
+ }
+ }
+
+ public void trackEmpty(final FlowFile flowFile) {
+ messageCountsByFlowFile.putIfAbsent(flowFile, new Counts());
+ }
+
+ public int getAcknowledgedCount(final FlowFile flowFile) {
+ final Counts counter = messageCountsByFlowFile.get(flowFile);
+ return (counter == null) ? 0 : counter.getAcknowledgedCount();
+ }
+
+ public void incrementSentCount(final FlowFile flowFile) {
+ final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts());
+ counter.incrementSentCount();
+ }
+
+ public int getSentCount(final FlowFile flowFile) {
+ final Counts counter = messageCountsByFlowFile.get(flowFile);
+ return (counter == null) ? 0 : counter.getSentCount();
+ }
+
+ public void fail(final FlowFile flowFile, final Exception exception) {
+ failures.putIfAbsent(flowFile, exception);
+ logger.error("Failed to send " + flowFile + " to Kafka", exception);
+
+ synchronized (progressMutex) {
+ progressMutex.notify();
+ }
+ }
+
+ public Exception getFailure(final FlowFile flowFile) {
+ return failures.get(flowFile);
+ }
+
+ public boolean isFailed(final FlowFile flowFile) {
+ return getFailure(flowFile) != null;
+ }
+
+ public void reset() {
+ messageCountsByFlowFile.clear();
+ failures.clear();
+ }
+
+ public PublishResult failOutstanding(final Exception exception) {
+ messageCountsByFlowFile.keySet().stream()
+ .filter(ff -> !isComplete(ff))
+ .filter(ff -> !failures.containsKey(ff))
+ .forEach(ff -> failures.put(ff, exception));
+
+ return createPublishResult();
+ }
+
+ private boolean isComplete(final FlowFile flowFile) {
+ final Counts counts = messageCountsByFlowFile.get(flowFile);
+ if (counts.getAcknowledgedCount() == counts.getSentCount()) {
+ // all messages received successfully.
+ return true;
+ }
+
+ if (failures.containsKey(flowFile)) {
+ // FlowFile failed so is complete
+ return true;
+ }
+
+ return false;
+ }
+
+ private boolean isComplete() {
+ return messageCountsByFlowFile.keySet().stream()
+ .allMatch(flowFile -> isComplete(flowFile));
+ }
+
+ void awaitCompletion(final long millis) throws InterruptedException, TimeoutException {
+ final long startTime = System.nanoTime();
+ final long maxTime = startTime + TimeUnit.MILLISECONDS.toNanos(millis);
+
+ while (System.nanoTime() < maxTime) {
+ synchronized (progressMutex) {
+ if (isComplete()) {
+ return;
+ }
+
+ progressMutex.wait(millis);
+ }
+ }
+
+ throw new TimeoutException();
+ }
+
+
+ PublishResult createPublishResult() {
+ return new PublishResult() {
+ @Override
+ public boolean isFailure() {
+ return !failures.isEmpty();
+ }
+
+ @Override
+ public int getSuccessfulMessageCount(final FlowFile flowFile) {
+ return getAcknowledgedCount(flowFile);
+ }
+
+ @Override
+ public Exception getReasonForFailure(final FlowFile flowFile) {
+ return getFailure(flowFile);
+ }
+ };
+ }
+
+ public static class Counts {
+ private final AtomicInteger sentCount = new AtomicInteger(0);
+ private final AtomicInteger acknowledgedCount = new AtomicInteger(0);
+
+ public void incrementSentCount() {
+ sentCount.incrementAndGet();
+ }
+
+ public void incrementAcknowledgedCount() {
+ acknowledgedCount.incrementAndGet();
+ }
+
+ public int getAcknowledgedCount() {
+ return acknowledgedCount.get();
+ }
+
+ public int getSentCount() {
+ return sentCount.get();
+ }
+ }
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java
new file mode 100644
index 0000000000..de289959ea
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/KafkaProcessorUtils.java
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.config.SaslConfigs;
+import org.apache.kafka.common.config.SslConfigs;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.components.Validator;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.ssl.SSLContextService;
+import org.apache.nifi.util.FormatUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+final class KafkaProcessorUtils {
+
+ final Logger logger = LoggerFactory.getLogger(this.getClass());
+
+ static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string.");
+ static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded",
+ "The key is interpreted as arbitrary binary data and is encoded using hexadecimal characters with uppercase letters");
+
+ static final Pattern HEX_KEY_PATTERN = Pattern.compile("(?:[0123456789abcdefABCDEF]{2})+");
+
+ static final String KAFKA_KEY = "kafka.key";
+ static final String KAFKA_TOPIC = "kafka.topic";
+ static final String KAFKA_PARTITION = "kafka.partition";
+ static final String KAFKA_OFFSET = "kafka.offset";
+ static final String KAFKA_COUNT = "kafka.count";
+ static final AllowableValue SEC_PLAINTEXT = new AllowableValue("PLAINTEXT", "PLAINTEXT", "PLAINTEXT");
+ static final AllowableValue SEC_SSL = new AllowableValue("SSL", "SSL", "SSL");
+ static final AllowableValue SEC_SASL_PLAINTEXT = new AllowableValue("SASL_PLAINTEXT", "SASL_PLAINTEXT", "SASL_PLAINTEXT");
+ static final AllowableValue SEC_SASL_SSL = new AllowableValue("SASL_SSL", "SASL_SSL", "SASL_SSL");
+
+ static final PropertyDescriptor BOOTSTRAP_SERVERS = new PropertyDescriptor.Builder()
+ .name(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)
+ .displayName("Kafka Brokers")
+ .description("A comma-separated list of known Kafka Brokers in the format :")
+ .required(true)
+ .addValidator(StandardValidators.HOSTNAME_PORT_LIST_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .defaultValue("localhost:9092")
+ .build();
+ static final PropertyDescriptor SECURITY_PROTOCOL = new PropertyDescriptor.Builder()
+ .name("security.protocol")
+ .displayName("Security Protocol")
+ .description("Protocol used to communicate with brokers. Corresponds to Kafka's 'security.protocol' property.")
+ .required(true)
+ .expressionLanguageSupported(false)
+ .allowableValues(SEC_PLAINTEXT, SEC_SSL, SEC_SASL_PLAINTEXT, SEC_SASL_SSL)
+ .defaultValue(SEC_PLAINTEXT.getValue())
+ .build();
+ static final PropertyDescriptor KERBEROS_PRINCIPLE = new PropertyDescriptor.Builder()
+ .name("sasl.kerberos.service.name")
+ .displayName("Kerberos Service Name")
+ .description("The Kerberos principal name that Kafka runs as. This can be defined either in Kafka's JAAS config or in Kafka's config. "
+ + "Corresponds to Kafka's 'security.protocol' property."
+ + "It is ignored unless one of the SASL options of the are selected.")
+ .required(false)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .build();
+ static final PropertyDescriptor USER_PRINCIPAL = new PropertyDescriptor.Builder()
+ .name("sasl.kerberos.principal")
+ .displayName("Kerberos Principal")
+ .description("The Kerberos principal that will be used to connect to brokers. If not set, it is expected to set a JAAS configuration file "
+ + "in the JVM properties defined in the bootstrap.conf file. This principal will be set into 'sasl.jaas.config' Kafka's property.")
+ .required(false)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .build();
+ static final PropertyDescriptor USER_KEYTAB = new PropertyDescriptor.Builder()
+ .name("sasl.kerberos.keytab")
+ .displayName("Kerberos Keytab")
+ .description("The Kerberos keytab that will be used to connect to brokers. If not set, it is expected to set a JAAS configuration file "
+ + "in the JVM properties defined in the bootstrap.conf file. This principal will be set into 'sasl.jaas.config' Kafka's property.")
+ .required(false)
+ .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .build();
+ static final PropertyDescriptor SSL_CONTEXT_SERVICE = new PropertyDescriptor.Builder()
+ .name("ssl.context.service")
+ .displayName("SSL Context Service")
+ .description("Specifies the SSL Context Service to use for communicating with Kafka.")
+ .required(false)
+ .identifiesControllerService(SSLContextService.class)
+ .build();
+
+ static List getCommonPropertyDescriptors() {
+ return Arrays.asList(
+ BOOTSTRAP_SERVERS,
+ SECURITY_PROTOCOL,
+ KERBEROS_PRINCIPLE,
+ USER_PRINCIPAL,
+ USER_KEYTAB,
+ SSL_CONTEXT_SERVICE
+ );
+ }
+
+ static Collection validateCommonProperties(final ValidationContext validationContext) {
+ List results = new ArrayList<>();
+
+ String securityProtocol = validationContext.getProperty(SECURITY_PROTOCOL).getValue();
+
+ /*
+ * validates that if one of SASL (Kerberos) option is selected for
+ * security protocol, then Kerberos principal is provided as well
+ */
+ if (SEC_SASL_PLAINTEXT.getValue().equals(securityProtocol) || SEC_SASL_SSL.getValue().equals(securityProtocol)) {
+ String kerberosPrincipal = validationContext.getProperty(KERBEROS_PRINCIPLE).getValue();
+ if (kerberosPrincipal == null || kerberosPrincipal.trim().length() == 0) {
+ results.add(new ValidationResult.Builder().subject(KERBEROS_PRINCIPLE.getDisplayName()).valid(false)
+ .explanation("The <" + KERBEROS_PRINCIPLE.getDisplayName() + "> property must be set when <"
+ + SECURITY_PROTOCOL.getDisplayName() + "> is configured as '"
+ + SEC_SASL_PLAINTEXT.getValue() + "' or '" + SEC_SASL_SSL.getValue() + "'.")
+ .build());
+ }
+
+ String userKeytab = validationContext.getProperty(USER_KEYTAB).getValue();
+ String userPrincipal = validationContext.getProperty(USER_PRINCIPAL).getValue();
+ if((StringUtils.isBlank(userKeytab) && !StringUtils.isBlank(userPrincipal))
+ || (!StringUtils.isBlank(userKeytab) && StringUtils.isBlank(userPrincipal))) {
+ results.add(new ValidationResult.Builder().subject(KERBEROS_PRINCIPLE.getDisplayName()).valid(false)
+ .explanation("Both <" + USER_KEYTAB.getDisplayName() + "> and <" + USER_PRINCIPAL.getDisplayName() + "> "
+ + "must be set.")
+ .build());
+ }
+ }
+
+ //If SSL or SASL_SSL then CS must be set.
+ final boolean sslProtocol = SEC_SSL.getValue().equals(securityProtocol) || SEC_SASL_SSL.getValue().equals(securityProtocol);
+ final boolean csSet = validationContext.getProperty(SSL_CONTEXT_SERVICE).isSet();
+ if (csSet && !sslProtocol) {
+ results.add(new ValidationResult.Builder().subject(SECURITY_PROTOCOL.getDisplayName()).valid(false)
+ .explanation("If you set the SSL Controller Service you should also choose an SSL based security protocol.").build());
+ }
+ if (!csSet && sslProtocol) {
+ results.add(new ValidationResult.Builder().subject(SSL_CONTEXT_SERVICE.getDisplayName()).valid(false)
+ .explanation("If you set to an SSL based protocol you need to set the SSL Controller Service").build());
+ }
+
+ final String enableAutoCommit = validationContext.getProperty(new PropertyDescriptor.Builder().name(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG).build()).getValue();
+ if (enableAutoCommit != null && !enableAutoCommit.toLowerCase().equals("false")) {
+ results.add(new ValidationResult.Builder().subject(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG)
+ .explanation("Enable auto commit must be false. It is managed by the processor.").build());
+ }
+
+ final String keySerializer = validationContext.getProperty(new PropertyDescriptor.Builder().name(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).build()).getValue();
+ if (keySerializer != null && !ByteArraySerializer.class.getName().equals(keySerializer)) {
+ results.add(new ValidationResult.Builder().subject(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG)
+ .explanation("Key Serializer must be " + ByteArraySerializer.class.getName() + "' was '" + keySerializer + "'").build());
+ }
+
+ final String valueSerializer = validationContext.getProperty(new PropertyDescriptor.Builder().name(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).build()).getValue();
+ if (valueSerializer != null && !ByteArraySerializer.class.getName().equals(valueSerializer)) {
+ results.add(new ValidationResult.Builder().subject(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG)
+ .explanation("Value Serializer must be " + ByteArraySerializer.class.getName() + "' was '" + valueSerializer + "'").build());
+ }
+
+ final String keyDeSerializer = validationContext.getProperty(new PropertyDescriptor.Builder().name(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG).build()).getValue();
+ if (keyDeSerializer != null && !ByteArrayDeserializer.class.getName().equals(keyDeSerializer)) {
+ results.add(new ValidationResult.Builder().subject(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG)
+ .explanation("Key De-Serializer must be '" + ByteArrayDeserializer.class.getName() + "' was '" + keyDeSerializer + "'").build());
+ }
+
+ final String valueDeSerializer = validationContext.getProperty(new PropertyDescriptor.Builder().name(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG).build()).getValue();
+ if (valueDeSerializer != null && !ByteArrayDeserializer.class.getName().equals(valueDeSerializer)) {
+ results.add(new ValidationResult.Builder().subject(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG)
+ .explanation("Value De-Serializer must be " + ByteArrayDeserializer.class.getName() + "' was '" + valueDeSerializer + "'").build());
+ }
+
+ return results;
+ }
+
+ static final class KafkaConfigValidator implements Validator {
+
+ final Class> classType;
+
+ public KafkaConfigValidator(final Class> classType) {
+ this.classType = classType;
+ }
+
+ @Override
+ public ValidationResult validate(final String subject, final String value, final ValidationContext context) {
+ final boolean knownValue = KafkaProcessorUtils.isStaticStringFieldNamePresent(subject, classType, CommonClientConfigs.class, SslConfigs.class, SaslConfigs.class);
+ return new ValidationResult.Builder().subject(subject).explanation("Must be a known configuration parameter for this kafka client").valid(knownValue).build();
+ }
+ };
+
+ /**
+ * Builds transit URI for provenance event. The transit URI will be in the
+ * form of <security.protocol>://<bootstrap.servers>/topic
+ */
+ static String buildTransitURI(String securityProtocol, String brokers, String topic) {
+ StringBuilder builder = new StringBuilder();
+ builder.append(securityProtocol);
+ builder.append("://");
+ builder.append(brokers);
+ builder.append("/");
+ builder.append(topic);
+ return builder.toString();
+ }
+
+
+ static void buildCommonKafkaProperties(final ProcessContext context, final Class> kafkaConfigClass, final Map mapToPopulate) {
+ for (PropertyDescriptor propertyDescriptor : context.getProperties().keySet()) {
+ if (propertyDescriptor.equals(SSL_CONTEXT_SERVICE)) {
+ // Translate SSLContext Service configuration into Kafka properties
+ final SSLContextService sslContextService = context.getProperty(SSL_CONTEXT_SERVICE).asControllerService(SSLContextService.class);
+ if (sslContextService != null && sslContextService.isKeyStoreConfigured()) {
+ mapToPopulate.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, sslContextService.getKeyStoreFile());
+ mapToPopulate.put(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, sslContextService.getKeyStorePassword());
+ final String keyPass = sslContextService.getKeyPassword() == null ? sslContextService.getKeyStorePassword() : sslContextService.getKeyPassword();
+ mapToPopulate.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, keyPass);
+ mapToPopulate.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, sslContextService.getKeyStoreType());
+ }
+
+ if (sslContextService != null && sslContextService.isTrustStoreConfigured()) {
+ mapToPopulate.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, sslContextService.getTrustStoreFile());
+ mapToPopulate.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, sslContextService.getTrustStorePassword());
+ mapToPopulate.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, sslContextService.getTrustStoreType());
+ }
+ }
+
+ String propertyName = propertyDescriptor.getName();
+ String propertyValue = propertyDescriptor.isExpressionLanguageSupported()
+ ? context.getProperty(propertyDescriptor).evaluateAttributeExpressions().getValue()
+ : context.getProperty(propertyDescriptor).getValue();
+
+ if (propertyValue != null && !propertyName.equals(USER_PRINCIPAL.getName()) && !propertyName.equals(USER_KEYTAB.getName())) {
+ // If the property name ends in ".ms" then it is a time period. We want to accept either an integer as number of milliseconds
+ // or the standard NiFi time period such as "5 secs"
+ if (propertyName.endsWith(".ms") && !StringUtils.isNumeric(propertyValue.trim())) { // kafka standard time notation
+ propertyValue = String.valueOf(FormatUtils.getTimeDuration(propertyValue.trim(), TimeUnit.MILLISECONDS));
+ }
+
+ if (isStaticStringFieldNamePresent(propertyName, kafkaConfigClass, CommonClientConfigs.class, SslConfigs.class, SaslConfigs.class)) {
+ mapToPopulate.put(propertyName, propertyValue);
+ }
+ }
+ }
+
+ String securityProtocol = context.getProperty(SECURITY_PROTOCOL).getValue();
+ if (SEC_SASL_PLAINTEXT.getValue().equals(securityProtocol) || SEC_SASL_SSL.getValue().equals(securityProtocol)) {
+ setJaasConfig(mapToPopulate, context);
+ }
+ }
+
+ /**
+ * Method used to configure the 'sasl.jaas.config' property based on KAFKA-4259
+ * https://cwiki.apache.org/confluence/display/KAFKA/KIP-85%3A+Dynamic+JAAS+configuration+for+Kafka+clients
+ *
+ * It expects something with the following format:
+ *
+ * <LoginModuleClass> <ControlFlag> *(<OptionName>=<OptionValue>);
+ * ControlFlag = required / requisite / sufficient / optional
+ *
+ * @param mapToPopulate Map of configuration properties
+ * @param context Context
+ */
+ private static void setJaasConfig(Map mapToPopulate, ProcessContext context) {
+ String keytab = context.getProperty(USER_KEYTAB).getValue();
+ String principal = context.getProperty(USER_PRINCIPAL).getValue();
+ String serviceName = context.getProperty(KERBEROS_PRINCIPLE).getValue();
+ if(StringUtils.isNotBlank(keytab) && StringUtils.isNotBlank(principal) && StringUtils.isNotBlank(serviceName)) {
+ mapToPopulate.put(SaslConfigs.SASL_JAAS_CONFIG, "com.sun.security.auth.module.Krb5LoginModule required "
+ + "useTicketCache=false "
+ + "renewTicket=true "
+ + "serviceName=\"" + serviceName + "\" "
+ + "useKeyTab=true "
+ + "keyTab=\"" + keytab + "\" "
+ + "principal=\"" + principal + "\";");
+ }
+ }
+
+ private static boolean isStaticStringFieldNamePresent(final String name, final Class>... classes) {
+ return KafkaProcessorUtils.getPublicStaticStringFieldValues(classes).contains(name);
+ }
+
+ private static Set getPublicStaticStringFieldValues(final Class>... classes) {
+ final Set strings = new HashSet<>();
+ for (final Class> classType : classes) {
+ for (final Field field : classType.getDeclaredFields()) {
+ if (Modifier.isPublic(field.getModifiers()) && Modifier.isStatic(field.getModifiers()) && field.getType().equals(String.class)) {
+ try {
+ strings.add(String.valueOf(field.get(null)));
+ } catch (IllegalArgumentException | IllegalAccessException ex) {
+ //ignore
+ }
+ }
+ }
+ }
+ return strings;
+ }
+
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/Partitioners.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/Partitioners.java
new file mode 100644
index 0000000000..64ab4ce9c8
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/Partitioners.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.util.Map;
+
+import org.apache.kafka.clients.producer.Partitioner;
+import org.apache.kafka.common.Cluster;
+
+/**
+ * Collection of implementation of common Kafka {@link Partitioner}s.
+ */
+final public class Partitioners {
+
+ private Partitioners() {
+ }
+
+ /**
+ * {@link Partitioner} that implements 'round-robin' mechanism which evenly
+ * distributes load between all available partitions.
+ */
+ public static class RoundRobinPartitioner implements Partitioner {
+
+ private volatile int index;
+
+ @Override
+ public void configure(Map configs) {
+ // noop
+ }
+
+ @Override
+ public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
+ return this.next(cluster.availablePartitionsForTopic(topic).size());
+ }
+
+ @Override
+ public void close() {
+ // noop
+ }
+
+ private synchronized int next(int numberOfPartitions) {
+ if (this.index >= numberOfPartitions) {
+ this.index = 0;
+ }
+ return index++;
+ }
+ }
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaRecord_1_0.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaRecord_1_0.java
new file mode 100644
index 0000000000..c125d622ba
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublishKafkaRecord_1_0.java
@@ -0,0 +1,443 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.nifi.annotation.behavior.DynamicProperty;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnStopped;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.DataUnit;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.util.FlowFileFilters;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.schema.access.SchemaNotFoundException;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.serialization.record.RecordSet;
+
+@Tags({"Apache", "Kafka", "Record", "csv", "json", "avro", "logs", "Put", "Send", "Message", "PubSub", "1.0"})
+@CapabilityDescription("Sends the contents of a FlowFile as individual records to Apache Kafka using the Kafka 1.0 Producer API. "
+ + "The contents of the FlowFile are expected to be record-oriented data that can be read by the configured Record Reader. "
+ + "The complementary NiFi processor for fetching messages is ConsumeKafkaRecord_1_0.")
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
+ description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ + " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ + " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ")
+@WritesAttribute(attribute = "msg.count", description = "The number of messages that were sent to Kafka for this FlowFile. This attribute is added only to "
+ + "FlowFiles that are routed to success.")
+@SeeAlso({PublishKafka_1_0.class, ConsumeKafka_1_0.class, ConsumeKafkaRecord_1_0.class})
+public class PublishKafkaRecord_1_0 extends AbstractProcessor {
+ protected static final String MSG_COUNT = "msg.count";
+
+ static final AllowableValue DELIVERY_REPLICATED = new AllowableValue("all", "Guarantee Replicated Delivery",
+ "FlowFile will be routed to failure unless the message is replicated to the appropriate "
+ + "number of Kafka Nodes according to the Topic configuration");
+ static final AllowableValue DELIVERY_ONE_NODE = new AllowableValue("1", "Guarantee Single Node Delivery",
+ "FlowFile will be routed to success if the message is received by a single Kafka node, "
+ + "whether or not it is replicated. This is faster than "
+ + "but can result in data loss if a Kafka node crashes");
+ static final AllowableValue DELIVERY_BEST_EFFORT = new AllowableValue("0", "Best Effort",
+ "FlowFile will be routed to success after successfully writing the content to a Kafka node, "
+ + "without waiting for a response. This provides the best performance but may result in data loss.");
+
+ static final AllowableValue ROUND_ROBIN_PARTITIONING = new AllowableValue(Partitioners.RoundRobinPartitioner.class.getName(),
+ Partitioners.RoundRobinPartitioner.class.getSimpleName(),
+ "Messages will be assigned partitions in a round-robin fashion, sending the first message to Partition 1, "
+ + "the next Partition to Partition 2, and so on, wrapping as necessary.");
+ static final AllowableValue RANDOM_PARTITIONING = new AllowableValue("org.apache.kafka.clients.producer.internals.DefaultPartitioner",
+ "DefaultPartitioner", "Messages will be assigned to random partitions.");
+
+ static final AllowableValue UTF8_ENCODING = new AllowableValue("utf-8", "UTF-8 Encoded", "The key is interpreted as a UTF-8 Encoded string.");
+ static final AllowableValue HEX_ENCODING = new AllowableValue("hex", "Hex Encoded",
+ "The key is interpreted as arbitrary binary data that is encoded using hexadecimal characters with uppercase letters.");
+
+ static final PropertyDescriptor TOPIC = new PropertyDescriptor.Builder()
+ .name("topic")
+ .displayName("Topic Name")
+ .description("The name of the Kafka Topic to publish to.")
+ .required(true)
+ .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .build();
+
+ static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder()
+ .name("record-reader")
+ .displayName("Record Reader")
+ .description("The Record Reader to use for incoming FlowFiles")
+ .identifiesControllerService(RecordReaderFactory.class)
+ .expressionLanguageSupported(false)
+ .required(true)
+ .build();
+
+ static final PropertyDescriptor RECORD_WRITER = new PropertyDescriptor.Builder()
+ .name("record-writer")
+ .displayName("Record Writer")
+ .description("The Record Writer to use in order to serialize the data before sending to Kafka")
+ .identifiesControllerService(RecordSetWriterFactory.class)
+ .expressionLanguageSupported(false)
+ .required(true)
+ .build();
+
+ static final PropertyDescriptor MESSAGE_KEY_FIELD = new PropertyDescriptor.Builder()
+ .name("message-key-field")
+ .displayName("Message Key Field")
+ .description("The name of a field in the Input Records that should be used as the Key for the Kafka message.")
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .required(false)
+ .build();
+
+ static final PropertyDescriptor DELIVERY_GUARANTEE = new PropertyDescriptor.Builder()
+ .name("acks")
+ .displayName("Delivery Guarantee")
+ .description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.")
+ .required(true)
+ .expressionLanguageSupported(false)
+ .allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED)
+ .defaultValue(DELIVERY_BEST_EFFORT.getValue())
+ .build();
+
+ static final PropertyDescriptor METADATA_WAIT_TIME = new PropertyDescriptor.Builder()
+ .name("max.block.ms")
+ .displayName("Max Metadata Wait Time")
+ .description("The amount of time publisher will wait to obtain metadata or wait for the buffer to flush during the 'send' call before failing the "
+ + "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property")
+ .required(true)
+ .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .defaultValue("5 sec")
+ .build();
+
+ static final PropertyDescriptor ACK_WAIT_TIME = new PropertyDescriptor.Builder()
+ .name("ack.wait.time")
+ .displayName("Acknowledgment Wait Time")
+ .description("After sending a message to Kafka, this indicates the amount of time that we are willing to wait for a response from Kafka. "
+ + "If Kafka does not acknowledge the message within this time period, the FlowFile will be routed to 'failure'.")
+ .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .required(true)
+ .defaultValue("5 secs")
+ .build();
+
+ static final PropertyDescriptor MAX_REQUEST_SIZE = new PropertyDescriptor.Builder()
+ .name("max.request.size")
+ .displayName("Max Request Size")
+ .description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).")
+ .required(true)
+ .addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
+ .defaultValue("1 MB")
+ .build();
+
+ static final PropertyDescriptor PARTITION_CLASS = new PropertyDescriptor.Builder()
+ .name("partitioner.class")
+ .displayName("Partitioner class")
+ .description("Specifies which class to use to compute a partition id for a message. Corresponds to Kafka's 'partitioner.class' property.")
+ .allowableValues(ROUND_ROBIN_PARTITIONING, RANDOM_PARTITIONING)
+ .defaultValue(RANDOM_PARTITIONING.getValue())
+ .required(false)
+ .build();
+
+ static final PropertyDescriptor COMPRESSION_CODEC = new PropertyDescriptor.Builder()
+ .name("compression.type")
+ .displayName("Compression Type")
+ .description("This parameter allows you to specify the compression codec for all data generated by this producer.")
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .allowableValues("none", "gzip", "snappy", "lz4")
+ .defaultValue("none")
+ .build();
+
+ static final PropertyDescriptor ATTRIBUTE_NAME_REGEX = new PropertyDescriptor.Builder()
+ .name("attribute-name-regex")
+ .displayName("Attributes to Send as Headers (Regex)")
+ .description("A Regular Expression that is matched against all FlowFile attribute names. "
+ + "Any attribute whose name matches the regex will be added to the Kafka messages as a Header. "
+ + "If not specified, no FlowFile attributes will be added as headers.")
+ .addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
+ .expressionLanguageSupported(false)
+ .required(false)
+ .build();
+ static final PropertyDescriptor USE_TRANSACTIONS = new PropertyDescriptor.Builder()
+ .name("use-transactions")
+ .displayName("Use Transactions")
+ .description("Specifies whether or not NiFi should provide Transactional guarantees when communicating with Kafka. If there is a problem sending data to Kafka, "
+ + "and this property is set to false, then the messages that have already been sent to Kafka will continue on and be delivered to consumers. "
+ + "If this is set to true, then the Kafka transaction will be rolled back so that those messages are not available to consumers. Setting this to true "
+ + "requires that the property be set to \"Guarantee Replicated Delivery.\"")
+ .expressionLanguageSupported(false)
+ .allowableValues("true", "false")
+ .defaultValue("true")
+ .required(true)
+ .build();
+ static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new PropertyDescriptor.Builder()
+ .name("message-header-encoding")
+ .displayName("Message Header Encoding")
+ .description("For any attribute that is added as a message header, as configured via the property, "
+ + "this property indicates the Character Encoding to use for serializing the headers.")
+ .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
+ .defaultValue("UTF-8")
+ .required(false)
+ .build();
+
+ static final Relationship REL_SUCCESS = new Relationship.Builder()
+ .name("success")
+ .description("FlowFiles for which all content was sent to Kafka.")
+ .build();
+
+ static final Relationship REL_FAILURE = new Relationship.Builder()
+ .name("failure")
+ .description("Any FlowFile that cannot be sent to Kafka will be routed to this Relationship")
+ .build();
+
+ private static final List PROPERTIES;
+ private static final Set RELATIONSHIPS;
+
+ private volatile PublisherPool publisherPool = null;
+
+ static {
+ final List properties = new ArrayList<>();
+ properties.add(KafkaProcessorUtils.BOOTSTRAP_SERVERS);
+ properties.add(TOPIC);
+ properties.add(RECORD_READER);
+ properties.add(RECORD_WRITER);
+ properties.add(USE_TRANSACTIONS);
+ properties.add(DELIVERY_GUARANTEE);
+ properties.add(ATTRIBUTE_NAME_REGEX);
+ properties.add(MESSAGE_HEADER_ENCODING);
+ properties.add(KafkaProcessorUtils.SECURITY_PROTOCOL);
+ properties.add(KafkaProcessorUtils.KERBEROS_PRINCIPLE);
+ properties.add(KafkaProcessorUtils.USER_PRINCIPAL);
+ properties.add(KafkaProcessorUtils.USER_KEYTAB);
+ properties.add(KafkaProcessorUtils.SSL_CONTEXT_SERVICE);
+ properties.add(MESSAGE_KEY_FIELD);
+ properties.add(MAX_REQUEST_SIZE);
+ properties.add(ACK_WAIT_TIME);
+ properties.add(METADATA_WAIT_TIME);
+ properties.add(PARTITION_CLASS);
+ properties.add(COMPRESSION_CODEC);
+
+ PROPERTIES = Collections.unmodifiableList(properties);
+
+ final Set relationships = new HashSet<>();
+ relationships.add(REL_SUCCESS);
+ relationships.add(REL_FAILURE);
+ RELATIONSHIPS = Collections.unmodifiableSet(relationships);
+ }
+
+ @Override
+ public Set getRelationships() {
+ return RELATIONSHIPS;
+ }
+
+ @Override
+ protected List getSupportedPropertyDescriptors() {
+ return PROPERTIES;
+ }
+
+ @Override
+ protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
+ return new PropertyDescriptor.Builder()
+ .description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
+ .name(propertyDescriptorName)
+ .addValidator(new KafkaProcessorUtils.KafkaConfigValidator(ProducerConfig.class))
+ .dynamic(true)
+ .build();
+ }
+
+ @Override
+ protected Collection customValidate(final ValidationContext validationContext) {
+ final List results = new ArrayList<>();
+ results.addAll(KafkaProcessorUtils.validateCommonProperties(validationContext));
+
+ final boolean useTransactions = validationContext.getProperty(USE_TRANSACTIONS).asBoolean();
+ if (useTransactions) {
+ final String deliveryGuarantee = validationContext.getProperty(DELIVERY_GUARANTEE).getValue();
+ if (!DELIVERY_REPLICATED.getValue().equals(deliveryGuarantee)) {
+ results.add(new ValidationResult.Builder()
+ .subject("Delivery Guarantee")
+ .valid(false)
+ .explanation("In order to use Transactions, the Delivery Guarantee must be \"Guarantee Replicated Delivery.\" "
+ + "Either change the
+
SSL
+
+ This option provides an encrypted connection to the broker, with optional client authentication. In order
+ to use this option the broker must be configured with a listener of the form:
+
+ SSL://host.name:port
+
+ In addition, the processor must have an SSL Context Service selected.
+
+
+ If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
+ not be required to present a certificate. In this case, the SSL Context Service selected may specify only
+ a truststore containing the public key of the certificate authority used to sign the broker's key.
+
+
+ If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
+ In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
+ a truststore as described above.
+
+
SASL_PLAINTEXT
+
+ This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_PLAINTEXT://host.name:port
+
+ In addition, the Kerberos Service Name must be specified in the processor.
+
+
SASL_PLAINTEXT - GSSAPI
+
+ If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate. The
+ JAAS configuration can be provided by specifying the java.security.auth.login.config system property in
+ NiFi's bootstrap.conf, such as:
+
+ NOTE: The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
+
+
+ Alternatively, the JAAS
+ configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
+ directly in the processor properties. This will dynamically create a JAAS configuration like above, and
+ will take precedence over the java.security.auth.login.config system property.
+
+
SASL_PLAINTEXT - PLAIN
+
+ If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
+ the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
+ be the following:
+
+ NOTE: It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
+ the username and password unencrypted.
+
+
+ NOTE: Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
+ it visible to components in other NARs that may access the providers. There is currently a known issue
+ where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
+
+
SASL_SSL
+
+ This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_SSL://host.name:port
+
+
+
+ See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
+ depending on the SASL mechanism (GSSAPI or PLAIN).
+
+
+ See the SSL section for a description of how to configure the SSL Context Service based on the
+ ssl.client.auth property.
+
+ This Processor polls Apache Kafka
+ for data using KafkaConsumer API available with Kafka 1.0. When a message is received
+ from Kafka, this Processor emits a FlowFile where the content of the FlowFile is the value
+ of the Kafka message.
+
+
+
+
Security Configuration
+
+ The Security Protocol property allows the user to specify the protocol for communicating
+ with the Kafka broker. The following sections describe each of the protocols in further detail.
+
+
PLAINTEXT
+
+ This option provides an unsecured connection to the broker, with no client authentication and no encryption.
+ In order to use this option the broker must be configured with a listener of the form:
+
+ PLAINTEXT://host.name:port
+
+
+
SSL
+
+ This option provides an encrypted connection to the broker, with optional client authentication. In order
+ to use this option the broker must be configured with a listener of the form:
+
+ SSL://host.name:port
+
+ In addition, the processor must have an SSL Context Service selected.
+
+
+ If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
+ not be required to present a certificate. In this case, the SSL Context Service selected may specify only
+ a truststore containing the public key of the certificate authority used to sign the broker's key.
+
+
+ If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
+ In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
+ a truststore as described above.
+
+
SASL_PLAINTEXT
+
+ This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_PLAINTEXT://host.name:port
+
+ In addition, the Kerberos Service Name must be specified in the processor.
+
+
SASL_PLAINTEXT - GSSAPI
+
+ If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate. The
+ JAAS configuration can be provided by specifying the java.security.auth.login.config system property in
+ NiFi's bootstrap.conf, such as:
+
+ NOTE: The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
+
+
+ Alternatively, the JAAS
+ configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
+ directly in the processor properties. This will dynamically create a JAAS configuration like above, and
+ will take precedence over the java.security.auth.login.config system property.
+
+
SASL_PLAINTEXT - PLAIN
+
+ If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
+ the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
+ be the following:
+
+ NOTE: It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
+ the username and password unencrypted.
+
+
+ NOTE: Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
+ it visible to components in other NARs that may access the providers. There is currently a known issue
+ where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
+
+
SASL_SSL
+
+ This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_SSL://host.name:port
+
+
+
+ See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
+ depending on the SASL mechanism (GSSAPI or PLAIN).
+
+
+ See the SSL section for a description of how to configure the SSL Context Service based on the
+ ssl.client.auth property.
+
+ This Processor puts the contents of a FlowFile to a Topic in
+ Apache Kafka using KafkaProducer API available
+ with Kafka 1.0 API. The contents of the incoming FlowFile will be read using the
+ configured Record Reader. Each record will then be serialized using the configured
+ Record Writer, and this serialized form will be the content of a Kafka message.
+ This message is optionally assigned a key by using the <Kafka Key> Property.
+
+
+
+
Security Configuration
+
+ The Security Protocol property allows the user to specify the protocol for communicating
+ with the Kafka broker. The following sections describe each of the protocols in further detail.
+
+
PLAINTEXT
+
+ This option provides an unsecured connection to the broker, with no client authentication and no encryption.
+ In order to use this option the broker must be configured with a listener of the form:
+
+ PLAINTEXT://host.name:port
+
+
+
SSL
+
+ This option provides an encrypted connection to the broker, with optional client authentication. In order
+ to use this option the broker must be configured with a listener of the form:
+
+ SSL://host.name:port
+
+ In addition, the processor must have an SSL Context Service selected.
+
+
+ If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
+ not be required to present a certificate. In this case, the SSL Context Service selected may specify only
+ a truststore containing the public key of the certificate authority used to sign the broker's key.
+
+
+ If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
+ In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
+ a truststore as described above.
+
+
SASL_PLAINTEXT
+
+ This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_PLAINTEXT://host.name:port
+
+ In addition, the Kerberos Service Name must be specified in the processor.
+
+
SASL_PLAINTEXT - GSSAPI
+
+ If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate. The
+ JAAS configuration can be provided by specifying the java.security.auth.login.config system property in
+ NiFi's bootstrap.conf, such as:
+
+ NOTE: The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
+
+
+ Alternatively, the JAAS
+ configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
+ directly in the processor properties. This will dynamically create a JAAS configuration like above, and
+ will take precedence over the java.security.auth.login.config system property.
+
+
SASL_PLAINTEXT - PLAIN
+
+ If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
+ the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
+ be the following:
+
+ NOTE: It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
+ the username and password unencrypted.
+
+
+ NOTE: Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
+ it visible to components in other NARs that may access the providers. There is currently a known issue
+ where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
+
+
SASL_SSL
+
+ This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_SSL://host.name:port
+
+
+
+ See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
+ depending on the SASL mechanism (GSSAPI or PLAIN).
+
+
+ See the SSL section for a description of how to configure the SSL Context Service based on the
+ ssl.client.auth property.
+
+ This Processor puts the contents of a FlowFile to a Topic in
+ Apache Kafka using KafkaProducer API available
+ with Kafka 1.0 API. The content of a FlowFile becomes the contents of a Kafka message.
+ This message is optionally assigned a key by using the <Kafka Key> Property.
+
+
+
+ The Processor allows the user to configure an optional Message Demarcator that
+ can be used to send many messages per FlowFile. For example, a \n could be used
+ to indicate that the contents of the FlowFile should be used to send one message
+ per line of text. It also supports multi-char demarcators (e.g., 'my custom demarcator').
+ If the property is not set, the entire contents of the FlowFile
+ will be sent as a single message. When using the demarcator, if some messages are
+ successfully sent but other messages fail to send, the resulting FlowFile will be
+ considered a failed FlowFile and will have additional attributes to that effect.
+ One of such attributes is 'failed.last.idx' which indicates the index of the last message
+ that was successfully ACKed by Kafka. (if no demarcator is used the value of this index will be -1).
+ This will allow PublishKafka to only re-send un-ACKed messages on the next re-try.
+
+
+
+
Security Configuration
+
+ The Security Protocol property allows the user to specify the protocol for communicating
+ with the Kafka broker. The following sections describe each of the protocols in further detail.
+
+
PLAINTEXT
+
+ This option provides an unsecured connection to the broker, with no client authentication and no encryption.
+ In order to use this option the broker must be configured with a listener of the form:
+
+ PLAINTEXT://host.name:port
+
+
+
SSL
+
+ This option provides an encrypted connection to the broker, with optional client authentication. In order
+ to use this option the broker must be configured with a listener of the form:
+
+ SSL://host.name:port
+
+ In addition, the processor must have an SSL Context Service selected.
+
+
+ If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
+ not be required to present a certificate. In this case, the SSL Context Service selected may specify only
+ a truststore containing the public key of the certificate authority used to sign the broker's key.
+
+
+ If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
+ In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
+ a truststore as described above.
+
+
SASL_PLAINTEXT
+
+ This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_PLAINTEXT://host.name:port
+
+ In addition, the Kerberos Service Name must be specified in the processor.
+
+
SASL_PLAINTEXT - GSSAPI
+
+ If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate. The
+ JAAS configuration can be provided by specifying the java.security.auth.login.config system property in
+ NiFi's bootstrap.conf, such as:
+
+ NOTE: The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
+
+
+ Alternatively, the JAAS
+ configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
+ directly in the processor properties. This will dynamically create a JAAS configuration like above, and
+ will take precedence over the java.security.auth.login.config system property.
+
+
SASL_PLAINTEXT - PLAIN
+
+ If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
+ the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
+ be the following:
+
+ NOTE: It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
+ the username and password unencrypted.
+
+
+ NOTE: Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
+ it visible to components in other NARs that may access the providers. There is currently a known issue
+ where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
+
+
SASL_SSL
+
+ This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
+ option the broker must be configured with a listener of the form:
+
+ SASL_SSL://host.name:port
+
+
+
+ See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
+ depending on the SASL mechanism (GSSAPI or PLAIN).
+
+
+ See the SSL section for a description of how to configure the SSL Context Service based on the
+ ssl.client.auth property.
+
+
+
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java
new file mode 100644
index 0000000000..7b5a8fc5d9
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumeKafkaTest.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Before;
+import org.junit.Test;
+
+public class ConsumeKafkaTest {
+
+ ConsumerLease mockLease = null;
+ ConsumerPool mockConsumerPool = null;
+
+ @Before
+ public void setup() {
+ mockLease = mock(ConsumerLease.class);
+ mockConsumerPool = mock(ConsumerPool.class);
+ }
+
+ @Test
+ public void validateCustomValidatorSettings() throws Exception {
+ ConsumeKafka_1_0 consumeKafka = new ConsumeKafka_1_0();
+ TestRunner runner = TestRunners.newTestRunner(consumeKafka);
+ runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234");
+ runner.setProperty(ConsumeKafka_1_0.TOPICS, "foo");
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, "foo");
+ runner.setProperty(ConsumeKafka_1_0.AUTO_OFFSET_RESET, ConsumeKafka_1_0.OFFSET_EARLIEST);
+ runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ runner.assertValid();
+ runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "Foo");
+ runner.assertNotValid();
+ runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ runner.assertValid();
+ runner.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
+ runner.assertValid();
+ runner.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
+ runner.assertNotValid();
+ }
+
+ @Test
+ public void validatePropertiesValidation() throws Exception {
+ ConsumeKafka_1_0 consumeKafka = new ConsumeKafka_1_0();
+ TestRunner runner = TestRunners.newTestRunner(consumeKafka);
+ runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234");
+ runner.setProperty(ConsumeKafka_1_0.TOPICS, "foo");
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, "foo");
+ runner.setProperty(ConsumeKafka_1_0.AUTO_OFFSET_RESET, ConsumeKafka_1_0.OFFSET_EARLIEST);
+
+ runner.removeProperty(ConsumeKafka_1_0.GROUP_ID);
+ try {
+ runner.assertValid();
+ fail();
+ } catch (AssertionError e) {
+ assertTrue(e.getMessage().contains("invalid because Group ID is required"));
+ }
+
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, "");
+ try {
+ runner.assertValid();
+ fail();
+ } catch (AssertionError e) {
+ assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
+ }
+
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, " ");
+ try {
+ runner.assertValid();
+ fail();
+ } catch (AssertionError e) {
+ assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
+ }
+ }
+
+ @Test
+ public void validateGetAllMessages() throws Exception {
+ String groupName = "validateGetAllMessages";
+
+ when(mockConsumerPool.obtainConsumer(anyObject(), anyObject())).thenReturn(mockLease);
+ when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
+ when(mockLease.commit()).thenReturn(Boolean.TRUE);
+
+ ConsumeKafka_1_0 proc = new ConsumeKafka_1_0() {
+ @Override
+ protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
+ return mockConsumerPool;
+ }
+ };
+ final TestRunner runner = TestRunners.newTestRunner(proc);
+ runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234");
+ runner.setProperty(ConsumeKafka_1_0.TOPICS, "foo,bar");
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, groupName);
+ runner.setProperty(ConsumeKafka_1_0.AUTO_OFFSET_RESET, ConsumeKafka_1_0.OFFSET_EARLIEST);
+ runner.run(1, false);
+
+ verify(mockConsumerPool, times(1)).obtainConsumer(anyObject(), anyObject());
+ verify(mockLease, times(3)).continuePolling();
+ verify(mockLease, times(2)).poll();
+ verify(mockLease, times(1)).commit();
+ verify(mockLease, times(1)).close();
+ verifyNoMoreInteractions(mockConsumerPool);
+ verifyNoMoreInteractions(mockLease);
+ }
+
+ @Test
+ public void validateGetAllMessagesPattern() throws Exception {
+ String groupName = "validateGetAllMessagesPattern";
+
+ when(mockConsumerPool.obtainConsumer(anyObject(), anyObject())).thenReturn(mockLease);
+ when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
+ when(mockLease.commit()).thenReturn(Boolean.TRUE);
+
+ ConsumeKafka_1_0 proc = new ConsumeKafka_1_0() {
+ @Override
+ protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
+ return mockConsumerPool;
+ }
+ };
+ final TestRunner runner = TestRunners.newTestRunner(proc);
+ runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234");
+ runner.setProperty(ConsumeKafka_1_0.TOPICS, "(fo.*)|(ba)");
+ runner.setProperty(ConsumeKafka_1_0.TOPIC_TYPE, "pattern");
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, groupName);
+ runner.setProperty(ConsumeKafka_1_0.AUTO_OFFSET_RESET, ConsumeKafka_1_0.OFFSET_EARLIEST);
+ runner.run(1, false);
+
+ verify(mockConsumerPool, times(1)).obtainConsumer(anyObject(), anyObject());
+ verify(mockLease, times(3)).continuePolling();
+ verify(mockLease, times(2)).poll();
+ verify(mockLease, times(1)).commit();
+ verify(mockLease, times(1)).close();
+ verifyNoMoreInteractions(mockConsumerPool);
+ verifyNoMoreInteractions(mockLease);
+ }
+
+ @Test
+ public void validateGetErrorMessages() throws Exception {
+ String groupName = "validateGetErrorMessages";
+
+ when(mockConsumerPool.obtainConsumer(anyObject(), anyObject())).thenReturn(mockLease);
+ when(mockLease.continuePolling()).thenReturn(true, false);
+ when(mockLease.commit()).thenReturn(Boolean.FALSE);
+
+ ConsumeKafka_1_0 proc = new ConsumeKafka_1_0() {
+ @Override
+ protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
+ return mockConsumerPool;
+ }
+ };
+ final TestRunner runner = TestRunners.newTestRunner(proc);
+ runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "0.0.0.0:1234");
+ runner.setProperty(ConsumeKafka_1_0.TOPICS, "foo,bar");
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, groupName);
+ runner.setProperty(ConsumeKafka_1_0.AUTO_OFFSET_RESET, ConsumeKafka_1_0.OFFSET_EARLIEST);
+ runner.run(1, false);
+
+ verify(mockConsumerPool, times(1)).obtainConsumer(anyObject(), anyObject());
+ verify(mockLease, times(2)).continuePolling();
+ verify(mockLease, times(1)).poll();
+ verify(mockLease, times(1)).commit();
+ verify(mockLease, times(1)).close();
+ verifyNoMoreInteractions(mockConsumerPool);
+ verifyNoMoreInteractions(mockLease);
+ }
+
+ @Test
+ public void testJaasConfiguration() throws Exception {
+ ConsumeKafka_1_0 consumeKafka = new ConsumeKafka_1_0();
+ TestRunner runner = TestRunners.newTestRunner(consumeKafka);
+ runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234");
+ runner.setProperty(ConsumeKafka_1_0.TOPICS, "foo");
+ runner.setProperty(ConsumeKafka_1_0.GROUP_ID, "foo");
+ runner.setProperty(ConsumeKafka_1_0.AUTO_OFFSET_RESET, ConsumeKafka_1_0.OFFSET_EARLIEST);
+
+ runner.setProperty(KafkaProcessorUtils.SECURITY_PROTOCOL, KafkaProcessorUtils.SEC_SASL_PLAINTEXT);
+ runner.assertNotValid();
+
+ runner.setProperty(KafkaProcessorUtils.KERBEROS_PRINCIPLE, "kafka");
+ runner.assertValid();
+
+ runner.setProperty(KafkaProcessorUtils.USER_PRINCIPAL, "nifi@APACHE.COM");
+ runner.assertNotValid();
+
+ runner.setProperty(KafkaProcessorUtils.USER_KEYTAB, "not.A.File");
+ runner.assertNotValid();
+
+ runner.setProperty(KafkaProcessorUtils.USER_KEYTAB, "src/test/resources/server.properties");
+ runner.assertValid();
+ }
+
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java
new file mode 100644
index 0000000000..232af26894
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/ConsumerPoolTest.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.regex.Pattern;
+
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.provenance.ProvenanceReporter;
+import org.apache.nifi.processors.kafka.pubsub.ConsumerPool.PoolStats;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+public class ConsumerPoolTest {
+
+ private Consumer consumer = null;
+ private ProcessSession mockSession = null;
+ private ProcessContext mockContext = Mockito.mock(ProcessContext.class);
+ private ProvenanceReporter mockReporter = null;
+ private ConsumerPool testPool = null;
+ private ConsumerPool testDemarcatedPool = null;
+ private ComponentLog logger = null;
+
+ @Before
+ @SuppressWarnings("unchecked")
+ public void setup() {
+ consumer = mock(Consumer.class);
+ logger = mock(ComponentLog.class);
+ mockSession = mock(ProcessSession.class);
+ mockReporter = mock(ProvenanceReporter.class);
+ when(mockSession.getProvenanceReporter()).thenReturn(mockReporter);
+ testPool = new ConsumerPool(
+ 1,
+ null,
+ Collections.emptyMap(),
+ Collections.singletonList("nifi"),
+ 100L,
+ "utf-8",
+ "ssl",
+ "localhost",
+ logger,
+ true,
+ StandardCharsets.UTF_8,
+ null) {
+ @Override
+ protected Consumer createKafkaConsumer() {
+ return consumer;
+ }
+ };
+ testDemarcatedPool = new ConsumerPool(
+ 1,
+ "--demarcator--".getBytes(StandardCharsets.UTF_8),
+ Collections.emptyMap(),
+ Collections.singletonList("nifi"),
+ 100L,
+ "utf-8",
+ "ssl",
+ "localhost",
+ logger,
+ true,
+ StandardCharsets.UTF_8,
+ Pattern.compile(".*")) {
+ @Override
+ protected Consumer createKafkaConsumer() {
+ return consumer;
+ }
+ };
+ }
+
+ @Test
+ public void validatePoolSimpleCreateClose() throws Exception {
+
+ when(consumer.poll(anyLong())).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
+ try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
+ lease.poll();
+ }
+ try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
+ lease.poll();
+ }
+ try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
+ lease.poll();
+ }
+ try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
+ lease.poll();
+ }
+ testPool.close();
+ verify(mockSession, times(0)).create();
+ verify(mockSession, times(0)).commit();
+ final PoolStats stats = testPool.getPoolStats();
+ assertEquals(1, stats.consumerCreatedCount);
+ assertEquals(1, stats.consumerClosedCount);
+ assertEquals(4, stats.leasesObtainedCount);
+ }
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void validatePoolSimpleCreatePollClose() throws Exception {
+ final byte[][] firstPassValues = new byte[][]{
+ "Hello-1".getBytes(StandardCharsets.UTF_8),
+ "Hello-2".getBytes(StandardCharsets.UTF_8),
+ "Hello-3".getBytes(StandardCharsets.UTF_8)
+ };
+ final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues);
+
+ when(consumer.poll(anyLong())).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
+ try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
+ lease.poll();
+ lease.commit();
+ }
+ testPool.close();
+ verify(mockSession, times(3)).create();
+ verify(mockSession, times(1)).commit();
+ final PoolStats stats = testPool.getPoolStats();
+ assertEquals(1, stats.consumerCreatedCount);
+ assertEquals(1, stats.consumerClosedCount);
+ assertEquals(1, stats.leasesObtainedCount);
+ }
+
+ @Test
+ public void validatePoolSimpleBatchCreateClose() throws Exception {
+ when(consumer.poll(anyLong())).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
+ for (int i = 0; i < 100; i++) {
+ try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
+ for (int j = 0; j < 100; j++) {
+ lease.poll();
+ }
+ }
+ }
+ testPool.close();
+ verify(mockSession, times(0)).create();
+ verify(mockSession, times(0)).commit();
+ final PoolStats stats = testPool.getPoolStats();
+ assertEquals(1, stats.consumerCreatedCount);
+ assertEquals(1, stats.consumerClosedCount);
+ assertEquals(100, stats.leasesObtainedCount);
+ }
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void validatePoolBatchCreatePollClose() throws Exception {
+ final byte[][] firstPassValues = new byte[][]{
+ "Hello-1".getBytes(StandardCharsets.UTF_8),
+ "Hello-2".getBytes(StandardCharsets.UTF_8),
+ "Hello-3".getBytes(StandardCharsets.UTF_8)
+ };
+ final ConsumerRecords firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues);
+
+ when(consumer.poll(anyLong())).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
+ try (final ConsumerLease lease = testDemarcatedPool.obtainConsumer(mockSession, mockContext)) {
+ lease.poll();
+ lease.commit();
+ }
+ testDemarcatedPool.close();
+ verify(mockSession, times(1)).create();
+ verify(mockSession, times(1)).commit();
+ final PoolStats stats = testDemarcatedPool.getPoolStats();
+ assertEquals(1, stats.consumerCreatedCount);
+ assertEquals(1, stats.consumerClosedCount);
+ assertEquals(1, stats.leasesObtainedCount);
+ }
+
+ @Test
+ public void validatePoolConsumerFails() throws Exception {
+
+ when(consumer.poll(anyLong())).thenThrow(new KafkaException("oops"));
+ try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
+ try {
+ lease.poll();
+ fail();
+ } catch (final KafkaException ke) {
+
+ }
+ }
+ testPool.close();
+ verify(mockSession, times(0)).create();
+ verify(mockSession, times(0)).commit();
+ final PoolStats stats = testPool.getPoolStats();
+ assertEquals(1, stats.consumerCreatedCount);
+ assertEquals(1, stats.consumerClosedCount);
+ assertEquals(1, stats.leasesObtainedCount);
+ }
+
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ static ConsumerRecords createConsumerRecords(final String topic, final int partition, final long startingOffset, final byte[][] rawRecords) {
+ final Map>> map = new HashMap<>();
+ final TopicPartition tPart = new TopicPartition(topic, partition);
+ final List> records = new ArrayList<>();
+ long offset = startingOffset;
+ for (final byte[] rawRecord : rawRecords) {
+ final ConsumerRecord rec = new ConsumerRecord(topic, partition, offset++, UUID.randomUUID().toString().getBytes(), rawRecord);
+ records.add(rec);
+ }
+ map.put(tPart, records);
+ return new ConsumerRecords(map);
+ }
+
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestConsumeKafkaRecord_1_0.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestConsumeKafkaRecord_1_0.java
new file mode 100644
index 0000000000..d370fec1aa
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestConsumeKafkaRecord_1_0.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.kafka.pubsub;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyObject;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.ByteArrayDeserializer;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser;
+import org.apache.nifi.processors.kafka.pubsub.util.MockRecordWriter;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestConsumeKafkaRecord_1_0 {
+
+ private ConsumerLease mockLease = null;
+ private ConsumerPool mockConsumerPool = null;
+ private TestRunner runner;
+
+ @Before
+ public void setup() throws InitializationException {
+ mockLease = mock(ConsumerLease.class);
+ mockConsumerPool = mock(ConsumerPool.class);
+
+ ConsumeKafkaRecord_1_0 proc = new ConsumeKafkaRecord_1_0() {
+ @Override
+ protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
+ return mockConsumerPool;
+ }
+ };
+
+ runner = TestRunners.newTestRunner(proc);
+ runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234");
+
+ final String readerId = "record-reader";
+ final MockRecordParser readerService = new MockRecordParser();
+ readerService.addSchemaField("name", RecordFieldType.STRING);
+ readerService.addSchemaField("age", RecordFieldType.INT);
+ runner.addControllerService(readerId, readerService);
+ runner.enableControllerService(readerService);
+
+ final String writerId = "record-writer";
+ final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
+ runner.addControllerService(writerId, writerService);
+ runner.enableControllerService(writerService);
+
+ runner.setProperty(ConsumeKafkaRecord_1_0.RECORD_READER, readerId);
+ runner.setProperty(ConsumeKafkaRecord_1_0.RECORD_WRITER, writerId);
+ }
+
+ @Test
+ public void validateCustomValidatorSettings() throws Exception {
+ runner.setProperty(ConsumeKafkaRecord_1_0.TOPICS, "foo");
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, "foo");
+ runner.setProperty(ConsumeKafkaRecord_1_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_1_0.OFFSET_EARLIEST);
+ runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ runner.assertValid();
+ runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "Foo");
+ runner.assertNotValid();
+ runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
+ runner.assertValid();
+ runner.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
+ runner.assertValid();
+ runner.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
+ runner.assertNotValid();
+ }
+
+ @Test
+ public void validatePropertiesValidation() throws Exception {
+ runner.setProperty(ConsumeKafkaRecord_1_0.TOPICS, "foo");
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, "foo");
+ runner.setProperty(ConsumeKafkaRecord_1_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_1_0.OFFSET_EARLIEST);
+
+ runner.removeProperty(ConsumeKafkaRecord_1_0.GROUP_ID);
+ try {
+ runner.assertValid();
+ fail();
+ } catch (AssertionError e) {
+ assertTrue(e.getMessage().contains("invalid because Group ID is required"));
+ }
+
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, "");
+ try {
+ runner.assertValid();
+ fail();
+ } catch (AssertionError e) {
+ assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
+ }
+
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, " ");
+ try {
+ runner.assertValid();
+ fail();
+ } catch (AssertionError e) {
+ assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
+ }
+ }
+
+ @Test
+ public void validateGetAllMessages() throws Exception {
+ String groupName = "validateGetAllMessages";
+
+ when(mockConsumerPool.obtainConsumer(anyObject(), anyObject())).thenReturn(mockLease);
+ when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
+ when(mockLease.commit()).thenReturn(Boolean.TRUE);
+
+ runner.setProperty(ConsumeKafkaRecord_1_0.TOPICS, "foo,bar");
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, groupName);
+ runner.setProperty(ConsumeKafkaRecord_1_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_1_0.OFFSET_EARLIEST);
+ runner.run(1, false);
+
+ verify(mockConsumerPool, times(1)).obtainConsumer(anyObject(), anyObject());
+ verify(mockLease, times(3)).continuePolling();
+ verify(mockLease, times(2)).poll();
+ verify(mockLease, times(1)).commit();
+ verify(mockLease, times(1)).close();
+ verifyNoMoreInteractions(mockConsumerPool);
+ verifyNoMoreInteractions(mockLease);
+ }
+
+ @Test
+ public void validateGetAllMessagesPattern() throws Exception {
+ String groupName = "validateGetAllMessagesPattern";
+
+ when(mockConsumerPool.obtainConsumer(anyObject(), anyObject())).thenReturn(mockLease);
+ when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
+ when(mockLease.commit()).thenReturn(Boolean.TRUE);
+
+ runner.setProperty(ConsumeKafkaRecord_1_0.TOPICS, "(fo.*)|(ba)");
+ runner.setProperty(ConsumeKafkaRecord_1_0.TOPIC_TYPE, "pattern");
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, groupName);
+ runner.setProperty(ConsumeKafkaRecord_1_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_1_0.OFFSET_EARLIEST);
+ runner.run(1, false);
+
+ verify(mockConsumerPool, times(1)).obtainConsumer(anyObject(), anyObject());
+ verify(mockLease, times(3)).continuePolling();
+ verify(mockLease, times(2)).poll();
+ verify(mockLease, times(1)).commit();
+ verify(mockLease, times(1)).close();
+ verifyNoMoreInteractions(mockConsumerPool);
+ verifyNoMoreInteractions(mockLease);
+ }
+
+ @Test
+ public void validateGetErrorMessages() throws Exception {
+ String groupName = "validateGetErrorMessages";
+
+ when(mockConsumerPool.obtainConsumer(anyObject(), anyObject())).thenReturn(mockLease);
+ when(mockLease.continuePolling()).thenReturn(true, false);
+ when(mockLease.commit()).thenReturn(Boolean.FALSE);
+
+ runner.setProperty(ConsumeKafkaRecord_1_0.TOPICS, "foo,bar");
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, groupName);
+ runner.setProperty(ConsumeKafkaRecord_1_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_1_0.OFFSET_EARLIEST);
+ runner.run(1, false);
+
+ verify(mockConsumerPool, times(1)).obtainConsumer(anyObject(), anyObject());
+ verify(mockLease, times(2)).continuePolling();
+ verify(mockLease, times(1)).poll();
+ verify(mockLease, times(1)).commit();
+ verify(mockLease, times(1)).close();
+ verifyNoMoreInteractions(mockConsumerPool);
+ verifyNoMoreInteractions(mockLease);
+ }
+
+ @Test
+ public void testJaasConfiguration() throws Exception {
+ runner.setProperty(ConsumeKafkaRecord_1_0.TOPICS, "foo");
+ runner.setProperty(ConsumeKafkaRecord_1_0.GROUP_ID, "foo");
+ runner.setProperty(ConsumeKafkaRecord_1_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_1_0.OFFSET_EARLIEST);
+
+ runner.setProperty(KafkaProcessorUtils.SECURITY_PROTOCOL, KafkaProcessorUtils.SEC_SASL_PLAINTEXT);
+ runner.assertNotValid();
+
+ runner.setProperty(KafkaProcessorUtils.KERBEROS_PRINCIPLE, "kafka");
+ runner.assertValid();
+
+ runner.setProperty(KafkaProcessorUtils.USER_PRINCIPAL, "nifi@APACHE.COM");
+ runner.assertNotValid();
+
+ runner.setProperty(KafkaProcessorUtils.USER_KEYTAB, "not.A.File");
+ runner.assertNotValid();
+
+ runner.setProperty(KafkaProcessorUtils.USER_KEYTAB, "src/test/resources/server.properties");
+ runner.assertValid();
+ }
+
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java
new file mode 100644
index 0000000000..78c03ec435
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestInFlightMessageTracker.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.kafka.pubsub;
+
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.nifi.util.MockComponentLog;
+import org.apache.nifi.util.MockFlowFile;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestInFlightMessageTracker {
+
+ @Test(timeout = 5000L)
+ public void testAwaitCompletionWhenComplete() throws InterruptedException, TimeoutException {
+ final MockFlowFile flowFile = new MockFlowFile(1L);
+
+ final InFlightMessageTracker tracker = new InFlightMessageTracker(new MockComponentLog("1", "unit-test"));
+ tracker.incrementSentCount(flowFile);
+
+ verifyNotComplete(tracker);
+
+ tracker.incrementSentCount(flowFile);
+ verifyNotComplete(tracker);
+
+ tracker.incrementAcknowledgedCount(flowFile);
+ verifyNotComplete(tracker);
+
+ tracker.incrementAcknowledgedCount(flowFile);
+ tracker.awaitCompletion(1L);
+ }
+
+ @Test(timeout = 5000L)
+ public void testAwaitCompletionWhileWaiting() throws InterruptedException, ExecutionException {
+ final MockFlowFile flowFile = new MockFlowFile(1L);
+
+ final InFlightMessageTracker tracker = new InFlightMessageTracker(new MockComponentLog("1", "unit-test"));
+ tracker.incrementSentCount(flowFile);
+
+ verifyNotComplete(tracker);
+
+ tracker.incrementSentCount(flowFile);
+ verifyNotComplete(tracker);
+
+ final ExecutorService exec = Executors.newFixedThreadPool(1);
+ final Future> future = exec.submit(() -> {
+ try {
+ tracker.awaitCompletion(10000L);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+
+ tracker.incrementAcknowledgedCount(flowFile);
+ tracker.incrementAcknowledgedCount(flowFile);
+
+ future.get();
+ }
+
+ private void verifyNotComplete(final InFlightMessageTracker tracker) throws InterruptedException {
+ try {
+ tracker.awaitCompletion(10L);
+ Assert.fail("Expected timeout");
+ } catch (final TimeoutException te) {
+ // expected
+ }
+ }
+
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java
new file mode 100644
index 0000000000..6a0c7cea67
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafka.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.kafka.pubsub;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPublishKafka {
+ private static final String TOPIC_NAME = "unit-test";
+
+ private PublisherPool mockPool;
+ private PublisherLease mockLease;
+ private TestRunner runner;
+
+ @Before
+ public void setup() {
+ mockPool = mock(PublisherPool.class);
+ mockLease = mock(PublisherLease.class);
+
+ when(mockPool.obtainPublisher()).thenReturn(mockLease);
+
+ runner = TestRunners.newTestRunner(new PublishKafka_1_0() {
+ @Override
+ protected PublisherPool createPublisherPool(final ProcessContext context) {
+ return mockPool;
+ }
+ });
+
+ runner.setProperty(PublishKafka_1_0.TOPIC, TOPIC_NAME);
+ runner.setProperty(PublishKafka_1_0.DELIVERY_GUARANTEE, PublishKafka_1_0.DELIVERY_REPLICATED);
+ }
+
+ @Test
+ public void testSingleSuccess() throws IOException {
+ final MockFlowFile flowFile = runner.enqueue("hello world");
+
+ when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFile, 1));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafka_1_0.REL_SUCCESS, 1);
+
+ verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(0)).poison();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testMultipleSuccess() throws IOException {
+ final Set flowFiles = new HashSet<>();
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+
+
+ when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFiles, 1));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafka_1_0.REL_SUCCESS, 3);
+
+ verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(0)).poison();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testSingleFailure() throws IOException {
+ final MockFlowFile flowFile = runner.enqueue("hello world");
+
+ when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafka_1_0.REL_FAILURE, 1);
+
+ verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testMultipleFailures() throws IOException {
+ final Set flowFiles = new HashSet<>();
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+
+ when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafka_1_0.REL_FAILURE, 3);
+
+ verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testMultipleMessagesPerFlowFile() throws IOException {
+ final List flowFiles = new ArrayList<>();
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+
+ final Map msgCounts = new HashMap<>();
+ msgCounts.put(flowFiles.get(0), 10);
+ msgCounts.put(flowFiles.get(1), 20);
+
+ final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap());
+
+ when(mockLease.complete()).thenReturn(result);
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafka_1_0.REL_SUCCESS, 2);
+
+ verify(mockLease, times(2)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(0)).poison();
+ verify(mockLease, times(1)).close();
+
+ runner.assertAllFlowFilesContainAttribute("msg.count");
+ assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_1_0.REL_SUCCESS).stream()
+ .filter(ff -> ff.getAttribute("msg.count").equals("10"))
+ .count());
+ assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_1_0.REL_SUCCESS).stream()
+ .filter(ff -> ff.getAttribute("msg.count").equals("20"))
+ .count());
+ }
+
+
+ @Test
+ public void testSomeSuccessSomeFailure() throws IOException {
+ final List flowFiles = new ArrayList<>();
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+ flowFiles.add(runner.enqueue("hello world"));
+
+ final Map msgCounts = new HashMap<>();
+ msgCounts.put(flowFiles.get(0), 10);
+ msgCounts.put(flowFiles.get(1), 20);
+
+ final Map failureMap = new HashMap<>();
+ failureMap.put(flowFiles.get(2), new RuntimeException("Intentional Unit Test Exception"));
+ failureMap.put(flowFiles.get(3), new RuntimeException("Intentional Unit Test Exception"));
+
+ final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles.subList(0, 2)), failureMap);
+
+ when(mockLease.complete()).thenReturn(result);
+
+ runner.run();
+ runner.assertTransferCount(PublishKafka_1_0.REL_SUCCESS, 0);
+ runner.assertTransferCount(PublishKafka_1_0.REL_FAILURE, 4);
+
+ verify(mockLease, times(4)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(1)).close();
+
+ assertTrue(runner.getFlowFilesForRelationship(PublishKafka_1_0.REL_FAILURE).stream()
+ .noneMatch(ff -> ff.getAttribute("msg.count") != null));
+ }
+
+
+ private PublishResult createAllSuccessPublishResult(final FlowFile successfulFlowFile, final int msgCount) {
+ return createAllSuccessPublishResult(Collections.singleton(successfulFlowFile), msgCount);
+ }
+
+ private PublishResult createAllSuccessPublishResult(final Set successfulFlowFiles, final int msgCountPerFlowFile) {
+ final Map msgCounts = new HashMap<>();
+ for (final FlowFile ff : successfulFlowFiles) {
+ msgCounts.put(ff, msgCountPerFlowFile);
+ }
+ return createPublishResult(msgCounts, successfulFlowFiles, Collections.emptyMap());
+ }
+
+ private PublishResult createFailurePublishResult(final FlowFile failure) {
+ return createFailurePublishResult(Collections.singleton(failure));
+ }
+
+ private PublishResult createFailurePublishResult(final Set failures) {
+ final Map failureMap = failures.stream().collect(Collectors.toMap(ff -> ff, ff -> new RuntimeException("Intentional Unit Test Exception")));
+ return createPublishResult(Collections.emptyMap(), Collections.emptySet(), failureMap);
+ }
+
+ private PublishResult createPublishResult(final Map msgCounts, final Set successFlowFiles, final Map failures) {
+ // sanity check.
+ for (final FlowFile success : successFlowFiles) {
+ if (failures.containsKey(success)) {
+ throw new IllegalArgumentException("Found same FlowFile in both 'success' and 'failures' collections: " + success);
+ }
+ }
+
+ return new PublishResult() {
+ @Override
+ public boolean isFailure() {
+ return !failures.isEmpty();
+ }
+
+ @Override
+ public int getSuccessfulMessageCount(FlowFile flowFile) {
+ Integer count = msgCounts.get(flowFile);
+ return count == null ? 0 : count.intValue();
+ }
+
+ @Override
+ public Exception getReasonForFailure(FlowFile flowFile) {
+ return failures.get(flowFile);
+ }
+ };
+ }
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafkaRecord_1_0.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafkaRecord_1_0.java
new file mode 100644
index 0000000000..45439cc126
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublishKafkaRecord_1_0.java
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.kafka.pubsub;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser;
+import org.apache.nifi.processors.kafka.pubsub.util.MockRecordWriter;
+import org.apache.nifi.reporting.InitializationException;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.serialization.record.RecordSet;
+import org.apache.nifi.util.MockFlowFile;
+import org.apache.nifi.util.TestRunner;
+import org.apache.nifi.util.TestRunners;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+public class TestPublishKafkaRecord_1_0 {
+
+ private static final String TOPIC_NAME = "unit-test";
+
+ private PublisherPool mockPool;
+ private PublisherLease mockLease;
+ private TestRunner runner;
+
+ @Before
+ public void setup() throws InitializationException, IOException {
+ mockPool = mock(PublisherPool.class);
+ mockLease = mock(PublisherLease.class);
+ Mockito.doCallRealMethod().when(mockLease).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
+ any(RecordSchema.class), any(String.class), any(String.class));
+
+ when(mockPool.obtainPublisher()).thenReturn(mockLease);
+
+ runner = TestRunners.newTestRunner(new PublishKafkaRecord_1_0() {
+ @Override
+ protected PublisherPool createPublisherPool(final ProcessContext context) {
+ return mockPool;
+ }
+ });
+
+ runner.setProperty(PublishKafkaRecord_1_0.TOPIC, TOPIC_NAME);
+
+ final String readerId = "record-reader";
+ final MockRecordParser readerService = new MockRecordParser();
+ readerService.addSchemaField("name", RecordFieldType.STRING);
+ readerService.addSchemaField("age", RecordFieldType.INT);
+ runner.addControllerService(readerId, readerService);
+ runner.enableControllerService(readerService);
+
+ final String writerId = "record-writer";
+ final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
+ runner.addControllerService(writerId, writerService);
+ runner.enableControllerService(writerService);
+
+ runner.setProperty(PublishKafkaRecord_1_0.RECORD_READER, readerId);
+ runner.setProperty(PublishKafkaRecord_1_0.RECORD_WRITER, writerId);
+ runner.setProperty(PublishKafka_1_0.DELIVERY_GUARANTEE, PublishKafka_1_0.DELIVERY_REPLICATED);
+ }
+
+ @Test
+ public void testSingleSuccess() throws IOException {
+ final MockFlowFile flowFile = runner.enqueue("John Doe, 48");
+
+ when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFile, 1));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafkaRecord_1_0.REL_SUCCESS, 1);
+
+ verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(0)).poison();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testMultipleSuccess() throws IOException {
+ final Set flowFiles = new HashSet<>();
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+
+ when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFiles, 1));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafkaRecord_1_0.REL_SUCCESS, 3);
+
+ verify(mockLease, times(3)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(0)).poison();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testSingleFailure() throws IOException {
+ final MockFlowFile flowFile = runner.enqueue("John Doe, 48");
+
+ when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafkaRecord_1_0.REL_FAILURE, 1);
+
+ verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testMultipleFailures() throws IOException {
+ final Set flowFiles = new HashSet<>();
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+
+ when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles));
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafkaRecord_1_0.REL_FAILURE, 3);
+
+ verify(mockLease, times(3)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(1)).close();
+ }
+
+ @Test
+ public void testMultipleMessagesPerFlowFile() throws IOException {
+ final List flowFiles = new ArrayList<>();
+ flowFiles.add(runner.enqueue("John Doe, 48\nJane Doe, 47"));
+ flowFiles.add(runner.enqueue("John Doe, 48\nJane Doe, 29"));
+
+ final Map msgCounts = new HashMap<>();
+ msgCounts.put(flowFiles.get(0), 10);
+ msgCounts.put(flowFiles.get(1), 20);
+
+ final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap());
+
+ when(mockLease.complete()).thenReturn(result);
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafkaRecord_1_0.REL_SUCCESS, 2);
+
+ verify(mockLease, times(2)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(4)).publish(any(FlowFile.class), eq(null), any(byte[].class), eq(TOPIC_NAME), any(InFlightMessageTracker.class));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(0)).poison();
+ verify(mockLease, times(1)).close();
+
+ runner.assertAllFlowFilesContainAttribute("msg.count");
+ assertEquals(1, runner.getFlowFilesForRelationship(PublishKafkaRecord_1_0.REL_SUCCESS).stream()
+ .filter(ff -> ff.getAttribute("msg.count").equals("10"))
+ .count());
+ assertEquals(1, runner.getFlowFilesForRelationship(PublishKafkaRecord_1_0.REL_SUCCESS).stream()
+ .filter(ff -> ff.getAttribute("msg.count").equals("20"))
+ .count());
+ }
+
+ @Test
+ public void testNoRecordsInFlowFile() throws IOException {
+ final List flowFiles = new ArrayList<>();
+ flowFiles.add(runner.enqueue(new byte[0]));
+
+ final Map msgCounts = new HashMap<>();
+ msgCounts.put(flowFiles.get(0), 0);
+
+ final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap());
+
+ when(mockLease.complete()).thenReturn(result);
+
+ runner.run();
+ runner.assertAllFlowFilesTransferred(PublishKafkaRecord_1_0.REL_SUCCESS, 1);
+
+ verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(0)).poison();
+ verify(mockLease, times(1)).close();
+
+ final MockFlowFile mff = runner.getFlowFilesForRelationship(PublishKafkaRecord_1_0.REL_SUCCESS).get(0);
+ mff.assertAttributeEquals("msg.count", "0");
+ }
+
+
+ @Test
+ public void testSomeSuccessSomeFailure() throws IOException {
+ final List flowFiles = new ArrayList<>();
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+ flowFiles.add(runner.enqueue("John Doe, 48"));
+
+ final Map msgCounts = new HashMap<>();
+ msgCounts.put(flowFiles.get(0), 10);
+ msgCounts.put(flowFiles.get(1), 20);
+
+ final Map failureMap = new HashMap<>();
+ failureMap.put(flowFiles.get(2), new RuntimeException("Intentional Unit Test Exception"));
+ failureMap.put(flowFiles.get(3), new RuntimeException("Intentional Unit Test Exception"));
+
+ final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles.subList(0, 2)), failureMap);
+
+ when(mockLease.complete()).thenReturn(result);
+
+ runner.run();
+ runner.assertTransferCount(PublishKafkaRecord_1_0.REL_SUCCESS, 0);
+ runner.assertTransferCount(PublishKafkaRecord_1_0.REL_FAILURE, 4);
+
+ verify(mockLease, times(4)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), eq(null), eq(TOPIC_NAME));
+ verify(mockLease, times(1)).complete();
+ verify(mockLease, times(1)).close();
+
+ assertTrue(runner.getFlowFilesForRelationship(PublishKafkaRecord_1_0.REL_FAILURE).stream()
+ .noneMatch(ff -> ff.getAttribute("msg.count") != null));
+ }
+
+
+ private PublishResult createAllSuccessPublishResult(final FlowFile successfulFlowFile, final int msgCount) {
+ return createAllSuccessPublishResult(Collections.singleton(successfulFlowFile), msgCount);
+ }
+
+ private PublishResult createAllSuccessPublishResult(final Set successfulFlowFiles, final int msgCountPerFlowFile) {
+ final Map msgCounts = new HashMap<>();
+ for (final FlowFile ff : successfulFlowFiles) {
+ msgCounts.put(ff, msgCountPerFlowFile);
+ }
+ return createPublishResult(msgCounts, successfulFlowFiles, Collections.emptyMap());
+ }
+
+ private PublishResult createFailurePublishResult(final FlowFile failure) {
+ return createFailurePublishResult(Collections.singleton(failure));
+ }
+
+ private PublishResult createFailurePublishResult(final Set failures) {
+ final Map failureMap = failures.stream().collect(Collectors.toMap(ff -> ff, ff -> new RuntimeException("Intentional Unit Test Exception")));
+ return createPublishResult(Collections.emptyMap(), Collections.emptySet(), failureMap);
+ }
+
+ private PublishResult createPublishResult(final Map msgCounts, final Set successFlowFiles, final Map failures) {
+ // sanity check.
+ for (final FlowFile success : successFlowFiles) {
+ if (failures.containsKey(success)) {
+ throw new IllegalArgumentException("Found same FlowFile in both 'success' and 'failures' collections: " + success);
+ }
+ }
+
+ return new PublishResult() {
+
+ @Override
+ public int getSuccessfulMessageCount(FlowFile flowFile) {
+ Integer count = msgCounts.get(flowFile);
+ return count == null ? 0 : count.intValue();
+ }
+
+ @Override
+ public Exception getReasonForFailure(FlowFile flowFile) {
+ return failures.get(flowFile);
+ }
+
+ @Override
+ public boolean isFailure() {
+ return !failures.isEmpty();
+ }
+ };
+ }
+}
diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java
new file mode 100644
index 0000000000..54c122231e
--- /dev/null
+++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.kafka.pubsub;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.Producer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.util.MockFlowFile;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+
+public class TestPublisherLease {
+ private ComponentLog logger;
+ private Producer producer;
+
+ @Before
+ @SuppressWarnings("unchecked")
+ public void setup() {
+ logger = Mockito.mock(ComponentLog.class);
+ producer = Mockito.mock(Producer.class);
+ }
+
+ @Test
+ public void testPoisonOnException() throws IOException {
+ final AtomicInteger poisonCount = new AtomicInteger(0);
+
+ final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 1000L, logger, true, null, StandardCharsets.UTF_8) {
+ @Override
+ public void poison() {
+ poisonCount.incrementAndGet();
+ super.poison();
+ }
+ };
+
+ final FlowFile flowFile = new MockFlowFile(1L);
+ final String topic = "unit-test";
+ final byte[] messageKey = null;
+ final byte[] demarcatorBytes = null;
+
+ final InputStream failureInputStream = new InputStream() {
+ @Override
+ public int read() throws IOException {
+ throw new IOException("Intentional Unit Test Exception");
+ }
+ };
+
+ try {
+ lease.publish(flowFile, failureInputStream, messageKey, demarcatorBytes, topic);
+ Assert.fail("Expected IOException");
+ } catch (final IOException ioe) {
+ // expected
+ }
+
+ assertEquals(1, poisonCount.get());
+
+ final PublishResult result = lease.complete();
+ assertTrue(result.isFailure());
+ }
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void testPoisonOnFailure() throws IOException {
+ final AtomicInteger poisonCount = new AtomicInteger(0);
+
+ final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 1000L, logger, true, null, StandardCharsets.UTF_8) {
+ @Override
+ public void poison() {
+ poisonCount.incrementAndGet();
+ super.poison();
+ }
+ };
+
+ final FlowFile flowFile = new MockFlowFile(1L);
+ final String topic = "unit-test";
+ final byte[] messageKey = null;
+ final byte[] demarcatorBytes = null;
+
+ doAnswer(new Answer