NIFI-11248 Removed Kafka 2_0 Components

This closes #7010

Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
Nandor Soma Abonyi 2023-03-05 00:28:36 +01:00 committed by exceptionfactory
parent c7c1a245a9
commit a8cd33a243
No known key found for this signature in database
GPG Key ID: 29B6A52D2AAE8DBA
40 changed files with 0 additions and 9017 deletions

View File

@ -289,12 +289,6 @@ language governing permissions and limitations under the License. -->
<version>2.0.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kafka-2-0-nar</artifactId>
<version>2.0.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kafka-2-6-nar</artifactId>

View File

@ -1,41 +0,0 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kafka-bundle</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-kafka-2-0-nar</artifactId>
<packaging>nar</packaging>
<description>NiFi NAR for interacting with Apache Kafka 2.0</description>
<properties>
<maven.javadoc.skip>true</maven.javadoc.skip>
<source.skip>true</source.skip>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kafka-2-0-processors</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-services-api-nar</artifactId>
<version>2.0.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
</dependencies>
</project>

View File

@ -1,233 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
APACHE NIFI SUBCOMPONENTS:
The Apache NiFi project contains subcomponents with separate copyright
notices and license terms. Your use of the source code for the these
subcomponents is subject to the terms and conditions of the following
licenses.
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
under an MIT style license.
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -1,407 +0,0 @@
nifi-kafka-2-0-nar
Copyright 2014-2023 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
******************
Apache Software License v2
******************
The following binary components are provided under the Apache Software License v2
(ASLv2) Apache Commons Lang
The following NOTICE information applies:
Apache Commons Lang
Copyright 2001-2017 The Apache Software Foundation
This product includes software from the Spring Framework,
under the Apache License 2.0 (see: StringUtils.containsWhitespace())
(ASLv2) Apache Commons IO
The following NOTICE information applies:
Apache Commons IO
Copyright 2002-2016 The Apache Software Foundation
(ASLv2) Apache Commons Codec
The following NOTICE information applies:
Apache Commons Codec
Copyright 2002-2014 The Apache Software Foundation
src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
contains test data from http://aspell.net/test/orig/batch0.tab.
Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
===============================================================================
The content of package org.apache.commons.codec.language.bm has been translated
from the original php source code available at http://stevemorse.org/phoneticinfo.htm
with permission from the original authors.
Original source copyright:
Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
(ASLv2) Apache Kafka
The following NOTICE information applies:
Apache Kafka
Copyright 2012 The Apache Software Foundation.
(ASLv2) Snappy Java
The following NOTICE information applies:
This product includes software developed by Google
Snappy: http://code.google.com/p/snappy/ (New BSD License)
This product includes software developed by Apache
PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/
(Apache 2.0 license)
This library containd statically linked libstdc++. This inclusion is allowed by
"GCC RUntime Library Exception"
http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html
(ASLv2) Jackson JSON processor
The following NOTICE information applies:
# Jackson JSON processor
Jackson is a high-performance, Free/Open Source JSON processing library.
It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
been in development since 2007.
It is currently developed by a community of developers, as well as supported
commercially by FasterXML.com.
## Licensing
Jackson core and extension components may licensed under different licenses.
To find the details that apply to this artifact see the accompanying LICENSE file.
For more information, including possible other licensing options, contact
FasterXML.com (http://fasterxml.com).
## Credits
A list of contributors may be found from CREDITS file, which is included
in some artifacts (usually source distributions); but is always available
from the source code management (SCM) system project uses.
(ASLv2) aws-msk-iam-auth
The following NOTICE information applies:
aws-msk-iam-auth
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
(ASLv2) AWS SDK for Java
The following NOTICE information applies:
AWS SDK for Java
Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
This product includes software developed by
Amazon Technologies, Inc (http://www.amazon.com/).
**********************
THIRD PARTY COMPONENTS
**********************
This software includes third party software subject to the following copyrights:
- XML parsing and utility functions from JetS3t - Copyright 2006-2009 James Murty.
- PKCS#1 PEM encoded private key parsing and utility functions from oauth.googlecode.com - Copyright 1998-2010 AOL Inc.
The licenses for these third party components are included in LICENSE.txt
(ASLv2) AWS EventStream for Java
The following NOTICE information applies:
AWS EventStream for Java
Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
(ASLv2) Apache HttpComponents
The following NOTICE information applies:
Apache HttpComponents Client
Copyright 1999-2020 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Apache HttpComponents Core
Copyright 2005-2020 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
(ASLv2) Amazon Ion Java
The following NOTICE information applies:
Amazon Ion Java
Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
(ASLv2) Joda-Time
The following NOTICE information applies:
This product includes software developed by
Joda.org (http://www.joda.org/).
(ASLv2) The Netty Project
The following NOTICE information applies:
The Netty Project
=================
Please visit the Netty web site for more information:
* https://netty.io/
Copyright 2014 The Netty Project
The Netty Project licenses this file to you under the Apache License,
version 2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at:
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations
under the License.
Also, please refer to each LICENSE.<component>.txt file, which is located in
the 'license' directory of the distribution file, for the license terms of the
components that this product depends on.
-------------------------------------------------------------------------------
This product contains the extensions to Java Collections Framework which has
been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene:
* LICENSE:
* license/LICENSE.jsr166y.txt (Public Domain)
* HOMEPAGE:
* http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/
* http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/
This product contains a modified version of Robert Harder's Public Domain
Base64 Encoder and Decoder, which can be obtained at:
* LICENSE:
* license/LICENSE.base64.txt (Public Domain)
* HOMEPAGE:
* http://iharder.sourceforge.net/current/java/base64/
This product contains a modified portion of 'Webbit', an event based
WebSocket and HTTP server, which can be obtained at:
* LICENSE:
* license/LICENSE.webbit.txt (BSD License)
* HOMEPAGE:
* https://github.com/joewalnes/webbit
This product contains a modified portion of 'SLF4J', a simple logging
facade for Java, which can be obtained at:
* LICENSE:
* license/LICENSE.slf4j.txt (MIT License)
* HOMEPAGE:
* https://www.slf4j.org/
This product contains a modified portion of 'Apache Harmony', an open source
Java SE, which can be obtained at:
* NOTICE:
* license/NOTICE.harmony.txt
* LICENSE:
* license/LICENSE.harmony.txt (Apache License 2.0)
* HOMEPAGE:
* https://archive.apache.org/dist/harmony/
This product contains a modified portion of 'jbzip2', a Java bzip2 compression
and decompression library written by Matthew J. Francis. It can be obtained at:
* LICENSE:
* license/LICENSE.jbzip2.txt (MIT License)
* HOMEPAGE:
* https://code.google.com/p/jbzip2/
This product contains a modified portion of 'libdivsufsort', a C API library to construct
the suffix array and the Burrows-Wheeler transformed string for any input string of
a constant-size alphabet written by Yuta Mori. It can be obtained at:
* LICENSE:
* license/LICENSE.libdivsufsort.txt (MIT License)
* HOMEPAGE:
* https://github.com/y-256/libdivsufsort
This product contains a modified portion of Nitsan Wakart's 'JCTools', Java Concurrency Tools for the JVM,
which can be obtained at:
* LICENSE:
* license/LICENSE.jctools.txt (ASL2 License)
* HOMEPAGE:
* https://github.com/JCTools/JCTools
This product optionally depends on 'JZlib', a re-implementation of zlib in
pure Java, which can be obtained at:
* LICENSE:
* license/LICENSE.jzlib.txt (BSD style License)
* HOMEPAGE:
* http://www.jcraft.com/jzlib/
This product optionally depends on 'Compress-LZF', a Java library for encoding and
decoding data in LZF format, written by Tatu Saloranta. It can be obtained at:
* LICENSE:
* license/LICENSE.compress-lzf.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/ning/compress
This product optionally depends on 'lz4', a LZ4 Java compression
and decompression library written by Adrien Grand. It can be obtained at:
* LICENSE:
* license/LICENSE.lz4.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/jpountz/lz4-java
This product optionally depends on 'lzma-java', a LZMA Java compression
and decompression library, which can be obtained at:
* LICENSE:
* license/LICENSE.lzma-java.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/jponge/lzma-java
This product optionally depends on 'zstd-jni', a zstd-jni Java compression
and decompression library, which can be obtained at:
* LICENSE:
* license/LICENSE.zstd-jni.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/luben/zstd-jni
This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression
and decompression library written by William Kinney. It can be obtained at:
* LICENSE:
* license/LICENSE.jfastlz.txt (MIT License)
* HOMEPAGE:
* https://code.google.com/p/jfastlz/
This product contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data
interchange format, which can be obtained at:
* LICENSE:
* license/LICENSE.protobuf.txt (New BSD License)
* HOMEPAGE:
* https://github.com/google/protobuf
This product optionally depends on 'Bouncy Castle Crypto APIs' to generate
a temporary self-signed X.509 certificate when the JVM does not provide the
equivalent functionality. It can be obtained at:
* LICENSE:
* license/LICENSE.bouncycastle.txt (MIT License)
* HOMEPAGE:
* https://www.bouncycastle.org/
This product optionally depends on 'Snappy', a compression library produced
by Google Inc, which can be obtained at:
* LICENSE:
* license/LICENSE.snappy.txt (New BSD License)
* HOMEPAGE:
* https://github.com/google/snappy
This product optionally depends on 'JBoss Marshalling', an alternative Java
serialization API, which can be obtained at:
* LICENSE:
* license/LICENSE.jboss-marshalling.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/jboss-remoting/jboss-marshalling
This product optionally depends on 'Caliper', Google's micro-
benchmarking framework, which can be obtained at:
* LICENSE:
* license/LICENSE.caliper.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/google/caliper
This product optionally depends on 'Apache Commons Logging', a logging
framework, which can be obtained at:
* LICENSE:
* license/LICENSE.commons-logging.txt (Apache License 2.0)
* HOMEPAGE:
* https://commons.apache.org/logging/
This product optionally depends on 'Apache Log4J', a logging framework, which
can be obtained at:
* LICENSE:
* license/LICENSE.log4j.txt (Apache License 2.0)
* HOMEPAGE:
* https://logging.apache.org/log4j/
This product optionally depends on 'Aalto XML', an ultra-high performance
non-blocking XML processor, which can be obtained at:
* LICENSE:
* license/LICENSE.aalto-xml.txt (Apache License 2.0)
* HOMEPAGE:
* https://wiki.fasterxml.com/AaltoHome
This product contains a modified version of 'HPACK', a Java implementation of
the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at:
* LICENSE:
* license/LICENSE.hpack.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/twitter/hpack
This product contains a modified version of 'HPACK', a Java implementation of
the HTTP/2 HPACK algorithm written by Cory Benfield. It can be obtained at:
* LICENSE:
* license/LICENSE.hyper-hpack.txt (MIT License)
* HOMEPAGE:
* https://github.com/python-hyper/hpack/
This product contains a modified version of 'HPACK', a Java implementation of
the HTTP/2 HPACK algorithm written by Tatsuhiro Tsujikawa. It can be obtained at:
* LICENSE:
* license/LICENSE.nghttp2-hpack.txt (MIT License)
* HOMEPAGE:
* https://github.com/nghttp2/nghttp2/
This product contains a modified portion of 'Apache Commons Lang', a Java library
provides utilities for the java.lang API, which can be obtained at:
* LICENSE:
* license/LICENSE.commons-lang.txt (Apache License 2.0)
* HOMEPAGE:
* https://commons.apache.org/proper/commons-lang/
This product contains the Maven wrapper scripts from 'Maven Wrapper', that provides an easy way to ensure a user has everything necessary to run the Maven build.
* LICENSE:
* license/LICENSE.mvn-wrapper.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/takari/maven-wrapper
This product contains the dnsinfo.h header file, that provides a way to retrieve the system DNS configuration on MacOS.
This private header is also used by Apple's open source
mDNSResponder (https://opensource.apple.com/tarballs/mDNSResponder/).
* LICENSE:
* license/LICENSE.dnsinfo.txt (Apple Public Source License 2.0)
* HOMEPAGE:
* https://www.opensource.apple.com/source/configd/configd-453.19/dnsinfo/dnsinfo.h
This product optionally depends on 'Brotli4j', Brotli compression and
decompression for Java., which can be obtained at:
* LICENSE:
* license/LICENSE.brotli4j.txt (Apache License 2.0)
* HOMEPAGE:
* https://github.com/hyperxpro/Brotli4j
************************
Creative Commons Zero license version 1.0
************************
The following binary components are provided under the Creative Commons Zero license version 1.0. See project link for details.
(CC0v1.0) Reactive Streams (org.reactivestreams:reactive-streams:jar:1.0.3 - http://www.reactive-streams.org/)

View File

@ -1,134 +0,0 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kafka-bundle</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>nifi-kafka-2-0-processors</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record-serialization-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record-sink-api</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-utils</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-ssl-context-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kerberos-credentials-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kerberos-user-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-security-kerberos</artifactId>
<version>2.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-kafka-shared</artifactId>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka2.0.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>${kafka2.0.version}</version>
<scope>test</scope>
<exclusions>
<!-- Transitive dependencies excluded because they are located
in a legacy Maven repository, which Maven 3 doesn't support. -->
<exclusion>
<groupId>javax.jms</groupId>
<artifactId>jms</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jdmk</groupId>
<artifactId>jmxtools</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jmx</groupId>
<artifactId>jmxri</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-mock</artifactId>
<version>2.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-mock-record-utils</artifactId>
<version>2.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-record-path</artifactId>
<version>2.0.0-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
<profiles>
<profile>
<id>include-kafka-aws</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<dependencies>
<dependency>
<groupId>software.amazon.msk</groupId>
<artifactId>aws-msk-iam-auth</artifactId>
<version>${aws-msk-iam-auth.version}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@ -1,495 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.annotation.lifecycle.OnUnscheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyDescriptor.Builder;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.kafka.shared.attribute.KafkaFlowFileAttribute;
import org.apache.nifi.kafka.shared.component.KafkaClientComponent;
import org.apache.nifi.kafka.shared.property.KeyEncoding;
import org.apache.nifi.kafka.shared.property.provider.KafkaPropertyProvider;
import org.apache.nifi.kafka.shared.property.provider.StandardKafkaPropertyProvider;
import org.apache.nifi.kafka.shared.validation.DynamicPropertyValidator;
import org.apache.nifi.kafka.shared.validation.KafkaClientCustomValidationFunction;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.serialization.RecordReaderFactory;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import static org.apache.nifi.expression.ExpressionLanguageScope.NONE;
import static org.apache.nifi.expression.ExpressionLanguageScope.VARIABLE_REGISTRY;
@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 2.0 Consumer API. "
+ "The complementary NiFi processor for sending messages is PublishKafkaRecord_2_0. Please note that, at this time, the Processor assumes that "
+ "all records that are retrieved from a given partition have the same schema. If any of the Kafka messages are pulled but cannot be parsed or written with the "
+ "configured Record Reader or Record Writer, the contents of the message will be written to a separate FlowFile, and that FlowFile will be transferred to the "
+ "'parse.failure' relationship. Otherwise, each FlowFile is sent to the 'success' relationship and may contain many individual messages within the single FlowFile. "
+ "A 'record.count' attribute is added to indicate how many messages are contained in the FlowFile. No two Kafka messages will be placed into the same FlowFile if they "
+ "have different schemas, or if they have different values for a message header that is included by the <Headers to Add as Attributes> property.")
@Tags({"Kafka", "Get", "Record", "csv", "avro", "json", "Ingest", "Ingress", "Topic", "PubSub", "Consume", "2.0"})
@WritesAttributes({
@WritesAttribute(attribute = "record.count", description = "The number of records received"),
@WritesAttribute(attribute = "mime.type", description = "The MIME Type that is provided by the configured Record Writer"),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_PARTITION, description = "The partition of the topic the records are from"),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_TIMESTAMP, description = "The timestamp of the message in the partition of the topic."),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_TOPIC, description = "The topic records are from")
})
@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN)
@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration.",
expressionLanguageScope = VARIABLE_REGISTRY)
@SeeAlso({ConsumeKafka_2_0.class, PublishKafka_2_0.class, PublishKafkaRecord_2_0.class})
public class ConsumeKafkaRecord_2_0 extends AbstractProcessor implements KafkaClientComponent {
static final AllowableValue OFFSET_EARLIEST = new AllowableValue("earliest", "earliest", "Automatically reset the offset to the earliest offset");
static final AllowableValue OFFSET_LATEST = new AllowableValue("latest", "latest", "Automatically reset the offset to the latest offset");
static final AllowableValue OFFSET_NONE = new AllowableValue("none", "none", "Throw exception to the consumer if no previous offset is found for the consumer's group");
static final AllowableValue TOPIC_NAME = new AllowableValue("names", "names", "Topic is a full topic name or comma separated list of names");
static final AllowableValue TOPIC_PATTERN = new AllowableValue("pattern", "pattern", "Topic is a regex using the Java Pattern syntax");
static final PropertyDescriptor TOPICS = new Builder()
.name("topic")
.displayName("Topic Name(s)")
.description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma separated.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(VARIABLE_REGISTRY)
.build();
static final PropertyDescriptor TOPIC_TYPE = new Builder()
.name("topic_type")
.displayName("Topic Name Format")
.description("Specifies whether the Topic(s) provided are a comma separated list of names or a single regular expression")
.required(true)
.allowableValues(TOPIC_NAME, TOPIC_PATTERN)
.defaultValue(TOPIC_NAME.getValue())
.build();
static final PropertyDescriptor RECORD_READER = new Builder()
.name("record-reader")
.displayName("Record Reader")
.description("The Record Reader to use for incoming FlowFiles")
.identifiesControllerService(RecordReaderFactory.class)
.expressionLanguageSupported(NONE)
.required(true)
.build();
static final PropertyDescriptor RECORD_WRITER = new Builder()
.name("record-writer")
.displayName("Record Writer")
.description("The Record Writer to use in order to serialize the data before sending to Kafka")
.identifiesControllerService(RecordSetWriterFactory.class)
.expressionLanguageSupported(NONE)
.required(true)
.build();
static final PropertyDescriptor GROUP_ID = new Builder()
.name("group.id")
.displayName("Group ID")
.description("A Group ID is used to identify consumers that are within the same consumer group. Corresponds to Kafka's 'group.id' property.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(VARIABLE_REGISTRY)
.build();
static final PropertyDescriptor AUTO_OFFSET_RESET = new Builder()
.name("auto.offset.reset")
.displayName("Offset Reset")
.description("Allows you to manage the condition when there is no initial offset in Kafka or if the current offset does not exist any "
+ "more on the server (e.g. because that data has been deleted). Corresponds to Kafka's 'auto.offset.reset' property.")
.required(true)
.allowableValues(OFFSET_EARLIEST, OFFSET_LATEST, OFFSET_NONE)
.defaultValue(OFFSET_LATEST.getValue())
.build();
static final PropertyDescriptor MAX_POLL_RECORDS = new Builder()
.name("max.poll.records")
.displayName("Max Poll Records")
.description("Specifies the maximum number of records Kafka should return in a single poll.")
.required(false)
.defaultValue("10000")
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.build();
static final PropertyDescriptor MAX_UNCOMMITTED_TIME = new Builder()
.name("max-uncommit-offset-wait")
.displayName("Max Uncommitted Time")
.description("Specifies the maximum amount of time allowed to pass before offsets must be committed. "
+ "This value impacts how often offsets will be committed. Committing offsets less often increases "
+ "throughput but also increases the window of potential data duplication in the event of a rebalance "
+ "or JVM restart between commits. This value is also related to maximum poll records and the use "
+ "of a message demarcator. When using a message demarcator we can have far more uncommitted messages "
+ "than when we're not as there is much less for us to keep track of in memory.")
.required(false)
.defaultValue("1 secs")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.build();
static final PropertyDescriptor COMMS_TIMEOUT = new Builder()
.name("Communications Timeout")
.displayName("Communications Timeout")
.description("Specifies the timeout that the consumer should use when communicating with the Kafka Broker")
.required(true)
.defaultValue("60 secs")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.build();
static final PropertyDescriptor HONOR_TRANSACTIONS = new Builder()
.name("honor-transactions")
.displayName("Honor Transactions")
.description("Specifies whether or not NiFi should honor transactional guarantees when communicating with Kafka. If false, the Processor will use an \"isolation level\" of "
+ "read_uncomitted. This means that messages will be received as soon as they are written to Kafka but will be pulled, even if the producer cancels the transactions. If "
+ "this value is true, NiFi will not receive any messages for which the producer's transaction was canceled, but this can result in some latency since the consumer must wait "
+ "for the producer to finish its entire transaction instead of pulling as the messages become available.")
.expressionLanguageSupported(NONE)
.allowableValues("true", "false")
.defaultValue("true")
.required(true)
.build();
static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new Builder()
.name("message-header-encoding")
.displayName("Message Header Encoding")
.description("Any message header that is found on a Kafka message will be added to the outbound FlowFile as an attribute. "
+ "This property indicates the Character Encoding to use for deserializing the headers.")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.defaultValue("UTF-8")
.required(false)
.build();
static final PropertyDescriptor HEADER_NAME_REGEX = new Builder()
.name("header-name-regex")
.displayName("Headers to Add as Attributes (Regex)")
.description("A Regular Expression that is matched against all message headers. "
+ "Any message header whose name matches the regex will be added to the FlowFile as an Attribute. "
+ "If not specified, no Header values will be added as FlowFile attributes. If two messages have a different value for the same header and that header is selected by "
+ "the provided regex, then those two messages must be added to different FlowFiles. As a result, users should be cautious about using a regex like "
+ "\".*\" if messages are expected to have header values that are unique per message, such as an identifier or timestamp, because it will prevent NiFi from bundling "
+ "the messages together efficiently.")
.addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
.expressionLanguageSupported(NONE)
.required(false)
.build();
static final PropertyDescriptor SEPARATE_BY_KEY = new Builder()
.name("separate-by-key")
.displayName("Separate By Key")
.description("If true, two Records will only be added to the same FlowFile if both of the Kafka Messages have identical keys.")
.required(false)
.allowableValues("true", "false")
.defaultValue("false")
.build();
static final PropertyDescriptor KEY_ATTRIBUTE_ENCODING = new PropertyDescriptor.Builder()
.name("key-attribute-encoding")
.displayName("Key Attribute Encoding")
.description("If the <Separate By Key> property is set to true, FlowFiles that are emitted have an attribute named '" + KafkaFlowFileAttribute.KAFKA_KEY +
"'. This property dictates how the value of the attribute should be encoded.")
.required(true)
.defaultValue(KeyEncoding.UTF8.getValue())
.allowableValues(KeyEncoding.class)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("FlowFiles received from Kafka. Depending on demarcation strategy it is a flow file per message or a bundle of messages grouped by topic and partition.")
.build();
static final Relationship REL_PARSE_FAILURE = new Relationship.Builder()
.name("parse.failure")
.description("If a message from Kafka cannot be parsed using the configured Record Reader, the contents of the "
+ "message will be routed to this Relationship as its own individual FlowFile.")
.build();
static final List<PropertyDescriptor> DESCRIPTORS;
static final Set<Relationship> RELATIONSHIPS;
private volatile ConsumerPool consumerPool = null;
private final Set<ConsumerLease> activeLeases = Collections.synchronizedSet(new HashSet<>());
static {
List<PropertyDescriptor> descriptors = new ArrayList<>();
descriptors.add(BOOTSTRAP_SERVERS);
descriptors.add(TOPICS);
descriptors.add(TOPIC_TYPE);
descriptors.add(RECORD_READER);
descriptors.add(RECORD_WRITER);
descriptors.add(HONOR_TRANSACTIONS);
descriptors.add(SECURITY_PROTOCOL);
descriptors.add(SASL_MECHANISM);
descriptors.add(KERBEROS_CREDENTIALS_SERVICE);
descriptors.add(KERBEROS_SERVICE_NAME);
descriptors.add(KERBEROS_PRINCIPAL);
descriptors.add(KERBEROS_KEYTAB);
descriptors.add(SASL_USERNAME);
descriptors.add(SASL_PASSWORD);
descriptors.add(TOKEN_AUTHENTICATION);
descriptors.add(AWS_PROFILE_NAME);
descriptors.add(SSL_CONTEXT_SERVICE);
descriptors.add(GROUP_ID);
descriptors.add(SEPARATE_BY_KEY);
descriptors.add(KEY_ATTRIBUTE_ENCODING);
descriptors.add(AUTO_OFFSET_RESET);
descriptors.add(MESSAGE_HEADER_ENCODING);
descriptors.add(HEADER_NAME_REGEX);
descriptors.add(MAX_POLL_RECORDS);
descriptors.add(MAX_UNCOMMITTED_TIME);
descriptors.add(COMMS_TIMEOUT);
DESCRIPTORS = Collections.unmodifiableList(descriptors);
final Set<Relationship> rels = new HashSet<>();
rels.add(REL_SUCCESS);
rels.add(REL_PARSE_FAILURE);
RELATIONSHIPS = Collections.unmodifiableSet(rels);
}
@Override
public Set<Relationship> getRelationships() {
return RELATIONSHIPS;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return DESCRIPTORS;
}
@OnStopped
public void close() {
final ConsumerPool pool = consumerPool;
consumerPool = null;
if (pool != null) {
pool.close();
}
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
return new Builder()
.description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
.name(propertyDescriptorName)
.addValidator(new DynamicPropertyValidator(ConsumerConfig.class))
.dynamic(true)
.expressionLanguageSupported(VARIABLE_REGISTRY)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
final Collection<ValidationResult> validationResults = new KafkaClientCustomValidationFunction().apply(validationContext);
final ValidationResult consumerPartitionsResult = ConsumerPartitionsUtil.validateConsumePartitions(validationContext.getAllProperties());
validationResults.add(consumerPartitionsResult);
final boolean explicitPartitionMapping = ConsumerPartitionsUtil.isPartitionAssignmentExplicit(validationContext.getAllProperties());
if (explicitPartitionMapping) {
final String topicType = validationContext.getProperty(TOPIC_TYPE).getValue();
if (TOPIC_PATTERN.getValue().equals(topicType)) {
validationResults.add(new ValidationResult.Builder()
.subject(TOPIC_TYPE.getDisplayName())
.input(TOPIC_PATTERN.getDisplayName())
.valid(false)
.explanation("It is not valid to explicitly assign Topic Partitions and also use a Topic Pattern. "
+ "Topic Partitions may be assigned only if explicitly specifying topic names also.")
.build());
}
}
return validationResults;
}
private synchronized ConsumerPool getConsumerPool(final ProcessContext context) {
ConsumerPool pool = consumerPool;
if (pool != null) {
return pool;
}
final ConsumerPool consumerPool = createConsumerPool(context, getLogger());
final boolean explicitAssignment = ConsumerPartitionsUtil.isPartitionAssignmentExplicit(context.getAllProperties());
if (explicitAssignment) {
final int numAssignedPartitions = ConsumerPartitionsUtil.getPartitionAssignmentCount(context.getAllProperties());
// Request from Kafka the number of partitions for the topics that we are consuming from. Then ensure that we have
// all of the partitions assigned.
final int partitionCount = consumerPool.getPartitionCount();
if (partitionCount != numAssignedPartitions) {
context.yield();
consumerPool.close();
throw new ProcessException("Illegal Partition Assignment: There are " + numAssignedPartitions + " partitions statically assigned using the partitions.* property names, but the Kafka" +
" topic(s) have " + partitionCount + " partitions");
}
}
this.consumerPool = consumerPool;
return consumerPool;
}
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
final int maxLeases = context.getMaxConcurrentTasks();
final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
final KafkaPropertyProvider propertyProvider = new StandardKafkaPropertyProvider(ConsumerConfig.class);
final Map<String, Object> props = propertyProvider.getProperties(context);
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, Boolean.FALSE.toString());
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
final String topicListing = context.getProperty(ConsumeKafkaRecord_2_0.TOPICS).evaluateAttributeExpressions().getValue();
final String topicType = context.getProperty(ConsumeKafkaRecord_2_0.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
final List<String> topics = new ArrayList<>();
final String securityProtocol = context.getProperty(SECURITY_PROTOCOL).getValue();
final String bootstrapServers = context.getProperty(BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final boolean honorTransactions = context.getProperty(HONOR_TRANSACTIONS).asBoolean();
final int commsTimeoutMillis = context.getProperty(COMMS_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue();
props.put(ConsumerConfig.DEFAULT_API_TIMEOUT_MS_CONFIG, commsTimeoutMillis);
final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
final Charset charset = Charset.forName(charsetName);
final String headerNameRegex = context.getProperty(HEADER_NAME_REGEX).getValue();
final Pattern headerNamePattern = headerNameRegex == null ? null : Pattern.compile(headerNameRegex);
final boolean separateByKey = context.getProperty(SEPARATE_BY_KEY).asBoolean();
final String keyEncoding = context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue();
final int[] partitionsToConsume;
try {
partitionsToConsume = ConsumerPartitionsUtil.getPartitionsForHost(context.getAllProperties(), getLogger());
} catch (final UnknownHostException uhe) {
throw new ProcessException("Could not determine localhost's hostname", uhe);
}
if (topicType.equals(TOPIC_NAME.getValue())) {
for (final String topic : topicListing.split(",", 100)) {
final String trimmedName = topic.trim();
if (!trimmedName.isEmpty()) {
topics.add(trimmedName);
}
}
return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topics, maxUncommittedTime, securityProtocol,
bootstrapServers, log, honorTransactions, charset, headerNamePattern, separateByKey, keyEncoding, partitionsToConsume);
} else if (topicType.equals(TOPIC_PATTERN.getValue())) {
final Pattern topicPattern = Pattern.compile(topicListing.trim());
return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topicPattern, maxUncommittedTime, securityProtocol,
bootstrapServers, log, honorTransactions, charset, headerNamePattern, separateByKey, keyEncoding, partitionsToConsume);
} else {
getLogger().error("Subscription type has an unknown value {}", new Object[] {topicType});
return null;
}
}
@OnUnscheduled
public void interruptActiveThreads() {
// There are known issues with the Kafka client library that result in the client code hanging
// indefinitely when unable to communicate with the broker. In order to address this, we will wait
// up to 30 seconds for the Threads to finish and then will call Consumer.wakeup() to trigger the
// thread to wakeup when it is blocked, waiting on a response.
final long nanosToWait = TimeUnit.SECONDS.toNanos(5L);
final long start = System.nanoTime();
while (System.nanoTime() - start < nanosToWait && !activeLeases.isEmpty()) {
try {
Thread.sleep(100L);
} catch (final InterruptedException ie) {
Thread.currentThread().interrupt();
return;
}
}
if (!activeLeases.isEmpty()) {
int count = 0;
for (final ConsumerLease lease : activeLeases) {
getLogger().info("Consumer {} has not finished after waiting 30 seconds; will attempt to wake-up the lease", new Object[] {lease});
lease.wakeup();
count++;
}
getLogger().info("Woke up {} consumers", new Object[] {count});
}
activeLeases.clear();
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final ConsumerPool pool = getConsumerPool(context);
if (pool == null) {
context.yield();
return;
}
try (final ConsumerLease lease = pool.obtainConsumer(session, context)) {
if (lease == null) {
context.yield();
return;
}
activeLeases.add(lease);
try {
while (this.isScheduled() && lease.continuePolling()) {
lease.poll();
}
if (this.isScheduled() && !lease.commit()) {
context.yield();
}
} catch (final WakeupException we) {
getLogger().warn("Was interrupted while trying to communicate with Kafka with lease {}. "
+ "Will roll back session and discard any partially received data.", new Object[] {lease});
} catch (final KafkaException kex) {
getLogger().error("Exception while interacting with Kafka so will close the lease {} due to {}",
new Object[]{lease, kex}, kex);
} catch (final Throwable t) {
getLogger().error("Exception while processing data from kafka so will close the lease {} due to {}",
new Object[]{lease, t}, t);
} finally {
activeLeases.remove(lease);
}
}
}
}

View File

@ -1,479 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.annotation.lifecycle.OnUnscheduled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.Validator;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.kafka.shared.attribute.KafkaFlowFileAttribute;
import org.apache.nifi.kafka.shared.component.KafkaClientComponent;
import org.apache.nifi.kafka.shared.property.KeyEncoding;
import org.apache.nifi.kafka.shared.property.provider.KafkaPropertyProvider;
import org.apache.nifi.kafka.shared.property.provider.StandardKafkaPropertyProvider;
import org.apache.nifi.kafka.shared.validation.DynamicPropertyValidator;
import org.apache.nifi.kafka.shared.validation.KafkaClientCustomValidationFunction;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
@CapabilityDescription("Consumes messages from Apache Kafka specifically built against the Kafka 2.0 Consumer API. "
+ "The complementary NiFi processor for sending messages is PublishKafka_2_0.")
@Tags({"Kafka", "Get", "Ingest", "Ingress", "Topic", "PubSub", "Consume", "2.0"})
@WritesAttributes({
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_COUNT, description = "The number of messages written if more than one"),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_KEY, description = "The key of message if present and if single message. "
+ "How the key is encoded depends on the value of the 'Key Attribute Encoding' property."),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_OFFSET, description = "The offset of the message in the partition of the topic."),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_TIMESTAMP, description = "The timestamp of the message in the partition of the topic."),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_PARTITION, description = "The partition of the topic the message or message bundle is from"),
@WritesAttribute(attribute = KafkaFlowFileAttribute.KAFKA_TOPIC, description = "The topic the message or message bundle is from")
})
@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN)
@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ",
expressionLanguageScope = ExpressionLanguageScope.VARIABLE_REGISTRY)
public class ConsumeKafka_2_0 extends AbstractProcessor implements KafkaClientComponent {
static final AllowableValue OFFSET_EARLIEST = new AllowableValue("earliest", "earliest", "Automatically reset the offset to the earliest offset");
static final AllowableValue OFFSET_LATEST = new AllowableValue("latest", "latest", "Automatically reset the offset to the latest offset");
static final AllowableValue OFFSET_NONE = new AllowableValue("none", "none", "Throw exception to the consumer if no previous offset is found for the consumer's group");
static final AllowableValue TOPIC_NAME = new AllowableValue("names", "names", "Topic is a full topic name or comma separated list of names");
static final AllowableValue TOPIC_PATTERN = new AllowableValue("pattern", "pattern", "Topic is a regex using the Java Pattern syntax");
static final PropertyDescriptor TOPICS = new PropertyDescriptor.Builder()
.name("topic")
.displayName("Topic Name(s)")
.description("The name of the Kafka Topic(s) to pull from. More than one can be supplied if comma separated.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.build();
static final PropertyDescriptor TOPIC_TYPE = new PropertyDescriptor.Builder()
.name("topic_type")
.displayName("Topic Name Format")
.description("Specifies whether the Topic(s) provided are a comma separated list of names or a single regular expression")
.required(true)
.allowableValues(TOPIC_NAME, TOPIC_PATTERN)
.defaultValue(TOPIC_NAME.getValue())
.build();
static final PropertyDescriptor GROUP_ID = new PropertyDescriptor.Builder()
.name(ConsumerConfig.GROUP_ID_CONFIG)
.displayName("Group ID")
.description("A Group ID is used to identify consumers that are within the same consumer group. Corresponds to Kafka's 'group.id' property.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.build();
static final PropertyDescriptor AUTO_OFFSET_RESET = new PropertyDescriptor.Builder()
.name(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG)
.displayName("Offset Reset")
.description("Allows you to manage the condition when there is no initial offset in Kafka or if the current offset does not exist any "
+ "more on the server (e.g. because that data has been deleted). Corresponds to Kafka's 'auto.offset.reset' property.")
.required(true)
.allowableValues(OFFSET_EARLIEST, OFFSET_LATEST, OFFSET_NONE)
.defaultValue(OFFSET_LATEST.getValue())
.build();
static final PropertyDescriptor KEY_ATTRIBUTE_ENCODING = new PropertyDescriptor.Builder()
.name("key-attribute-encoding")
.displayName("Key Attribute Encoding")
.description("FlowFiles that are emitted have an attribute named '" + KafkaFlowFileAttribute.KAFKA_KEY + "'. This property dictates how the value of the attribute should be encoded.")
.required(true)
.defaultValue(KeyEncoding.UTF8.getValue())
.allowableValues(KeyEncoding.class)
.build();
static final PropertyDescriptor MESSAGE_DEMARCATOR = new PropertyDescriptor.Builder()
.name("message-demarcator")
.displayName("Message Demarcator")
.required(false)
.addValidator(Validator.VALID)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.description("Since KafkaConsumer receives messages in batches, you have an option to output FlowFiles which contains "
+ "all Kafka messages in a single batch for a given topic and partition and this property allows you to provide a string (interpreted as UTF-8) to use "
+ "for demarcating apart multiple Kafka messages. This is an optional property and if not provided each Kafka message received "
+ "will result in a single FlowFile which "
+ "time it is triggered. To enter special character such as 'new line' use CTRL+Enter or Shift+Enter depending on the OS")
.build();
static final PropertyDescriptor SEPARATE_BY_KEY = new PropertyDescriptor.Builder()
.name("separate-by-key")
.displayName("Separate By Key")
.description("If true, and the <Message Demarcator> property is set, two messages will only be added to the same FlowFile if both of the Kafka Messages have identical keys.")
.required(false)
.allowableValues("true", "false")
.defaultValue("false")
.build();
static final PropertyDescriptor HEADER_NAME_REGEX = new PropertyDescriptor.Builder()
.name("header-name-regex")
.displayName("Headers to Add as Attributes (Regex)")
.description("A Regular Expression that is matched against all message headers. "
+ "Any message header whose name matches the regex will be added to the FlowFile as an Attribute. "
+ "If not specified, no Header values will be added as FlowFile attributes. If two messages have a different value for the same header and that header is selected by "
+ "the provided regex, then those two messages must be added to different FlowFiles. As a result, users should be cautious about using a regex like "
+ "\".*\" if messages are expected to have header values that are unique per message, such as an identifier or timestamp, because it will prevent NiFi from bundling "
+ "the messages together efficiently.")
.addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.required(false)
.build();
static final PropertyDescriptor MAX_POLL_RECORDS = new PropertyDescriptor.Builder()
.name("max.poll.records")
.displayName("Max Poll Records")
.description("Specifies the maximum number of records Kafka should return in a single poll.")
.required(false)
.defaultValue("10000")
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.build();
static final PropertyDescriptor MAX_UNCOMMITTED_TIME = new PropertyDescriptor.Builder()
.name("max-uncommit-offset-wait")
.displayName("Max Uncommitted Time")
.description("Specifies the maximum amount of time allowed to pass before offsets must be committed. "
+ "This value impacts how often offsets will be committed. Committing offsets less often increases "
+ "throughput but also increases the window of potential data duplication in the event of a rebalance "
+ "or JVM restart between commits. This value is also related to maximum poll records and the use "
+ "of a message demarcator. When using a message demarcator we can have far more uncommitted messages "
+ "than when we're not as there is much less for us to keep track of in memory.")
.required(false)
.defaultValue("1 secs")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.build();
static final PropertyDescriptor COMMS_TIMEOUT = new PropertyDescriptor.Builder()
.name("Communications Timeout")
.displayName("Communications Timeout")
.description("Specifies the timeout that the consumer should use when communicating with the Kafka Broker")
.required(true)
.defaultValue("60 secs")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.build();
static final PropertyDescriptor HONOR_TRANSACTIONS = new PropertyDescriptor.Builder()
.name("honor-transactions")
.displayName("Honor Transactions")
.description("Specifies whether or not NiFi should honor transactional guarantees when communicating with Kafka. If false, the Processor will use an \"isolation level\" of "
+ "read_uncomitted. This means that messages will be received as soon as they are written to Kafka but will be pulled, even if the producer cancels the transactions. If "
+ "this value is true, NiFi will not receive any messages for which the producer's transaction was canceled, but this can result in some latency since the consumer must wait "
+ "for the producer to finish its entire transaction instead of pulling as the messages become available.")
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.allowableValues("true", "false")
.defaultValue("true")
.required(true)
.build();
static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new PropertyDescriptor.Builder()
.name("message-header-encoding")
.displayName("Message Header Encoding")
.description("Any message header that is found on a Kafka message will be added to the outbound FlowFile as an attribute. "
+ "This property indicates the Character Encoding to use for deserializing the headers.")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.defaultValue("UTF-8")
.required(false)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("FlowFiles received from Kafka. Depending on demarcation strategy it is a flow file per message or a bundle of messages grouped by topic and partition.")
.build();
static final List<PropertyDescriptor> DESCRIPTORS;
static final Set<Relationship> RELATIONSHIPS;
private volatile ConsumerPool consumerPool = null;
private final Set<ConsumerLease> activeLeases = Collections.synchronizedSet(new HashSet<>());
static {
List<PropertyDescriptor> descriptors = new ArrayList<>();
descriptors.add(BOOTSTRAP_SERVERS);
descriptors.add(SECURITY_PROTOCOL);
descriptors.add(SASL_MECHANISM);
descriptors.add(KERBEROS_SERVICE_NAME);
descriptors.add(KERBEROS_CREDENTIALS_SERVICE);
descriptors.add(KERBEROS_PRINCIPAL);
descriptors.add(KERBEROS_KEYTAB);
descriptors.add(SASL_USERNAME);
descriptors.add(SASL_PASSWORD);
descriptors.add(TOKEN_AUTHENTICATION);
descriptors.add(AWS_PROFILE_NAME);
descriptors.add(SSL_CONTEXT_SERVICE);
descriptors.add(TOPICS);
descriptors.add(TOPIC_TYPE);
descriptors.add(HONOR_TRANSACTIONS);
descriptors.add(GROUP_ID);
descriptors.add(AUTO_OFFSET_RESET);
descriptors.add(KEY_ATTRIBUTE_ENCODING);
descriptors.add(MESSAGE_DEMARCATOR);
descriptors.add(SEPARATE_BY_KEY);
descriptors.add(MESSAGE_HEADER_ENCODING);
descriptors.add(HEADER_NAME_REGEX);
descriptors.add(MAX_POLL_RECORDS);
descriptors.add(MAX_UNCOMMITTED_TIME);
descriptors.add(COMMS_TIMEOUT);
DESCRIPTORS = Collections.unmodifiableList(descriptors);
RELATIONSHIPS = Collections.singleton(REL_SUCCESS);
}
@Override
public Set<Relationship> getRelationships() {
return RELATIONSHIPS;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return DESCRIPTORS;
}
@OnStopped
public void close() {
final ConsumerPool pool = consumerPool;
consumerPool = null;
if (pool != null) {
pool.close();
}
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
return new PropertyDescriptor.Builder()
.description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
.name(propertyDescriptorName)
.addValidator(new DynamicPropertyValidator(ConsumerConfig.class))
.dynamic(true)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
final Collection<ValidationResult> validationResults = new KafkaClientCustomValidationFunction().apply(validationContext);
final ValidationResult consumerPartitionsResult = ConsumerPartitionsUtil.validateConsumePartitions(validationContext.getAllProperties());
validationResults.add(consumerPartitionsResult);
final boolean explicitPartitionMapping = ConsumerPartitionsUtil.isPartitionAssignmentExplicit(validationContext.getAllProperties());
if (explicitPartitionMapping) {
final String topicType = validationContext.getProperty(TOPIC_TYPE).getValue();
if (TOPIC_PATTERN.getValue().equals(topicType)) {
validationResults.add(new ValidationResult.Builder()
.subject(TOPIC_TYPE.getDisplayName())
.input(TOPIC_PATTERN.getDisplayName())
.valid(false)
.explanation("It is not valid to explicitly assign Topic Partitions and also use a Topic Pattern. "
+ "Topic Partitions may be assigned only if explicitly specifying topic names also.")
.build());
}
}
return validationResults;
}
private synchronized ConsumerPool getConsumerPool(final ProcessContext context) {
ConsumerPool pool = consumerPool;
if (pool != null) {
return pool;
}
final ConsumerPool consumerPool = createConsumerPool(context, getLogger());
final boolean explicitAssignment = ConsumerPartitionsUtil.isPartitionAssignmentExplicit(context.getAllProperties());
if (explicitAssignment) {
final int numAssignedPartitions = ConsumerPartitionsUtil.getPartitionAssignmentCount(context.getAllProperties());
// Request from Kafka the number of partitions for the topics that we are consuming from. Then ensure that we have
// all of the partitions assigned.
final int partitionCount = consumerPool.getPartitionCount();
if (partitionCount != numAssignedPartitions) {
context.yield();
consumerPool.close();
throw new ProcessException("Illegal Partition Assignment: There are " + numAssignedPartitions + " partitions statically assigned using the partitions.* property names, but the Kafka" +
" topic(s) have " + partitionCount + " partitions");
}
}
this.consumerPool = consumerPool;
return consumerPool;
}
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
final int maxLeases = context.getMaxConcurrentTasks();
final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
final byte[] demarcator = context.getProperty(ConsumeKafka_2_0.MESSAGE_DEMARCATOR).isSet()
? context.getProperty(ConsumeKafka_2_0.MESSAGE_DEMARCATOR).evaluateAttributeExpressions().getValue().getBytes(StandardCharsets.UTF_8)
: null;
final KafkaPropertyProvider propertyProvider = new StandardKafkaPropertyProvider(ConsumerConfig.class);
final Map<String, Object> props = propertyProvider.getProperties(context);
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, Boolean.FALSE.toString());
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
final String topicListing = context.getProperty(ConsumeKafka_2_0.TOPICS).evaluateAttributeExpressions().getValue();
final String topicType = context.getProperty(ConsumeKafka_2_0.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
final List<String> topics = new ArrayList<>();
final String keyEncoding = context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue();
final String securityProtocol = context.getProperty(SECURITY_PROTOCOL).getValue();
final String bootstrapServers = context.getProperty(BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
final boolean honorTransactions = context.getProperty(HONOR_TRANSACTIONS).asBoolean();
final int commsTimeoutMillis = context.getProperty(COMMS_TIMEOUT).asTimePeriod(TimeUnit.MILLISECONDS).intValue();
props.put(ConsumerConfig.DEFAULT_API_TIMEOUT_MS_CONFIG, commsTimeoutMillis);
final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
final Charset charset = Charset.forName(charsetName);
final String headerNameRegex = context.getProperty(HEADER_NAME_REGEX).getValue();
final Pattern headerNamePattern = headerNameRegex == null ? null : Pattern.compile(headerNameRegex);
final boolean separateByKey = context.getProperty(SEPARATE_BY_KEY).asBoolean();
final int[] partitionsToConsume;
try {
partitionsToConsume = ConsumerPartitionsUtil.getPartitionsForHost(context.getAllProperties(), getLogger());
} catch (final UnknownHostException uhe) {
throw new ProcessException("Could not determine localhost's hostname", uhe);
}
if (topicType.equals(TOPIC_NAME.getValue())) {
for (final String topic : topicListing.split(",", 100)) {
final String trimmedName = topic.trim();
if (!trimmedName.isEmpty()) {
topics.add(trimmedName);
}
}
return new ConsumerPool(maxLeases, demarcator, separateByKey, props, topics, maxUncommittedTime, keyEncoding, securityProtocol,
bootstrapServers, log, honorTransactions, charset, headerNamePattern, partitionsToConsume);
} else if (topicType.equals(TOPIC_PATTERN.getValue())) {
final Pattern topicPattern = Pattern.compile(topicListing.trim());
return new ConsumerPool(maxLeases, demarcator, separateByKey, props, topicPattern, maxUncommittedTime, keyEncoding, securityProtocol,
bootstrapServers, log, honorTransactions, charset, headerNamePattern, partitionsToConsume);
} else {
getLogger().error("Subscription type has an unknown value {}", topicType);
return null;
}
}
@OnUnscheduled
public void interruptActiveThreads() {
// There are known issues with the Kafka client library that result in the client code hanging
// indefinitely when unable to communicate with the broker. In order to address this, we will wait
// up to 30 seconds for the Threads to finish and then will call Consumer.wakeup() to trigger the
// thread to wakeup when it is blocked, waiting on a response.
final long nanosToWait = TimeUnit.SECONDS.toNanos(5L);
final long start = System.nanoTime();
while (System.nanoTime() - start < nanosToWait && !activeLeases.isEmpty()) {
try {
Thread.sleep(100L);
} catch (final InterruptedException ie) {
Thread.currentThread().interrupt();
return;
}
}
if (!activeLeases.isEmpty()) {
int count = 0;
for (final ConsumerLease lease : activeLeases) {
getLogger().info("Consumer {} has not finished after waiting 30 seconds; will attempt to wake-up the lease", lease);
lease.wakeup();
count++;
}
getLogger().info("Woke up {} consumers", count);
}
activeLeases.clear();
}
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final ConsumerPool pool = getConsumerPool(context);
if (pool == null) {
context.yield();
return;
}
try (final ConsumerLease lease = pool.obtainConsumer(session, context)) {
if (lease == null) {
context.yield();
return;
}
activeLeases.add(lease);
try {
while (this.isScheduled() && lease.continuePolling()) {
lease.poll();
}
if (this.isScheduled() && !lease.commit()) {
context.yield();
}
} catch (final WakeupException we) {
getLogger().warn("Was interrupted while trying to communicate with Kafka with lease {}. "
+ "Will roll back session and discard any partially received data.", lease);
} catch (final KafkaException kex) {
getLogger().error("Exception while interacting with Kafka so will close the lease {} due to {}",
new Object[]{lease, kex}, kex);
} catch (final Throwable t) {
getLogger().error("Exception while processing data from kafka so will close the lease {} due to {}",
new Object[]{lease, t}, t);
} finally {
activeLeases.remove(lease);
}
}
}
}

View File

@ -1,736 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.commons.codec.binary.Hex;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.header.Header;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.kafka.shared.attribute.KafkaFlowFileAttribute;
import org.apache.nifi.kafka.shared.attribute.StandardTransitUriProvider;
import org.apache.nifi.kafka.shared.property.KeyEncoding;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.serialization.MalformedRecordException;
import org.apache.nifi.serialization.RecordReader;
import org.apache.nifi.serialization.RecordReaderFactory;
import org.apache.nifi.serialization.RecordSetWriter;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.SchemaValidationException;
import org.apache.nifi.serialization.WriteResult;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordSchema;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_2_0.REL_PARSE_FAILURE;
import static org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_2_0.REL_SUCCESS;
/**
* This class represents a lease to access a Kafka Consumer object. The lease is
* intended to be obtained from a ConsumerPool. The lease is closeable to allow
* for the clean model of a try w/resources whereby non-exceptional cases mean
* the lease will be returned to the pool for future use by others. A given
* lease may only belong to a single thread a time.
*/
public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListener {
private final long maxWaitMillis;
private final Consumer<byte[], byte[]> kafkaConsumer;
private final ComponentLog logger;
private final byte[] demarcatorBytes;
private final String keyEncoding;
private final String securityProtocol;
private final String bootstrapServers;
private final RecordSetWriterFactory writerFactory;
private final RecordReaderFactory readerFactory;
private final Charset headerCharacterSet;
private final Pattern headerNamePattern;
private final boolean separateByKey;
private boolean poisoned = false;
//used for tracking demarcated flowfiles to their TopicPartition so we can append
//to them on subsequent poll calls
private final Map<BundleInformation, BundleTracker> bundleMap = new HashMap<>();
private final Map<TopicPartition, OffsetAndMetadata> uncommittedOffsetsMap = new HashMap<>();
private long leaseStartNanos = -1;
private boolean lastPollEmpty = false;
private int totalMessages = 0;
ConsumerLease(
final long maxWaitMillis,
final Consumer<byte[], byte[]> kafkaConsumer,
final byte[] demarcatorBytes,
final String keyEncoding,
final String securityProtocol,
final String bootstrapServers,
final RecordReaderFactory readerFactory,
final RecordSetWriterFactory writerFactory,
final ComponentLog logger,
final Charset headerCharacterSet,
final Pattern headerNamePattern,
final boolean separateByKey) {
this.maxWaitMillis = maxWaitMillis;
this.kafkaConsumer = kafkaConsumer;
this.demarcatorBytes = demarcatorBytes;
this.keyEncoding = keyEncoding;
this.securityProtocol = securityProtocol;
this.bootstrapServers = bootstrapServers;
this.readerFactory = readerFactory;
this.writerFactory = writerFactory;
this.logger = logger;
this.headerCharacterSet = headerCharacterSet;
this.headerNamePattern = headerNamePattern;
this.separateByKey = separateByKey;
}
/**
* clears out internal state elements excluding session and consumer as
* those are managed by the pool itself
*/
private void resetInternalState() {
bundleMap.clear();
uncommittedOffsetsMap.clear();
leaseStartNanos = -1;
lastPollEmpty = false;
totalMessages = 0;
}
/**
* Kafka will call this method whenever it is about to rebalance the
* consumers for the given partitions. We'll simply take this to mean that
* we need to quickly commit what we've got and will return the consumer to
* the pool. This method will be called during the poll() method call of
* this class and will be called by the same thread calling poll according
* to the Kafka API docs. After this method executes the session and kafka
* offsets are committed and this lease is closed.
*
* @param partitions partitions being reassigned
*/
@Override
public void onPartitionsRevoked(final Collection<TopicPartition> partitions) {
logger.debug("Rebalance Alert: Partitions '{}' revoked for lease '{}' with consumer '{}'", partitions, this, kafkaConsumer);
//force a commit here. Can reuse the session and consumer after this but must commit now to avoid duplicates if kafka reassigns partition
commit();
}
/**
* This will be called by Kafka when the rebalance has completed. We don't
* need to do anything with this information other than optionally log it as
* by this point we've committed what we've got and moved on.
*
* @param partitions topic partition set being reassigned
*/
@Override
public void onPartitionsAssigned(final Collection<TopicPartition> partitions) {
logger.debug("Rebalance Alert: Partitions '{}' assigned for lease '{}' with consumer '{}'", partitions, this, kafkaConsumer);
}
public List<TopicPartition> getAssignedPartitions() {
return null;
}
/**
* Executes a poll on the underlying Kafka Consumer and creates any new
* flowfiles necessary or appends to existing ones if in demarcation mode.
*/
void poll() {
/*
* Implementation note:
* Even if ConsumeKafka is not scheduled to poll due to downstream connection back-pressure is engaged,
* for longer than session.timeout.ms (defaults to 10 sec), Kafka consumer sends heartbeat from background thread.
* If this situation lasts longer than max.poll.interval.ms (defaults to 5 min), Kafka consumer sends
* Leave Group request to Group Coordinator. When ConsumeKafka processor is scheduled again, Kafka client checks
* if this client instance is still a part of consumer group. If not, it rejoins before polling messages.
* This behavior has been fixed via Kafka KIP-62 and available from Kafka client 0.10.1.0.
*/
try {
final ConsumerRecords<byte[], byte[]> records = kafkaConsumer.poll(Duration.ofMillis(10));
lastPollEmpty = records.count() == 0;
processRecords(records);
} catch (final ProcessException pe) {
throw pe;
} catch (final Throwable t) {
this.poison();
throw t;
}
}
/**
* Notifies Kafka to commit the offsets for the specified topic/partition
* pairs to the specified offsets w/the given metadata. This can offer
* higher performance than the other commitOffsets call as it allows the
* kafka client to collect more data from Kafka before committing the
* offsets.
* if false then we didn't do anything and should probably yield if true
* then we committed new data
*
*/
boolean commit() {
if (uncommittedOffsetsMap.isEmpty()) {
resetInternalState();
return false;
}
try {
/*
* Committing the nifi session then the offsets means we have an at
* least once guarantee here. If we reversed the order we'd have at
* most once.
*/
final Collection<FlowFile> bundledFlowFiles = getBundles();
if (!bundledFlowFiles.isEmpty()) {
getProcessSession().transfer(bundledFlowFiles, REL_SUCCESS);
}
getProcessSession().commitAsync(() -> {
final Map<TopicPartition, OffsetAndMetadata> offsetsMap = uncommittedOffsetsMap;
kafkaConsumer.commitSync(offsetsMap);
resetInternalState();
});
return true;
} catch (final IOException ioe) {
poison();
logger.error("Failed to finish writing out FlowFile bundle", ioe);
throw new ProcessException(ioe);
} catch (final KafkaException kex) {
poison();
logger.warn("Duplicates are likely as we were able to commit the process"
+ " session but received an exception from Kafka while committing"
+ " offsets.");
throw kex;
} catch (final Throwable t) {
poison();
throw t;
}
}
/**
* Indicates whether we should continue polling for data. If we are not
* writing data with a demarcator then we're writing individual flow files
* per kafka message therefore we must be very mindful of memory usage for
* the flow file objects (not their content) being held in memory. The
* content of kafka messages will be written to the content repository
* immediately upon each poll call but we must still be mindful of how much
* memory can be used in each poll call. We will indicate that we should
* stop polling our last poll call produced no new results or if we've
* polling and processing data longer than the specified maximum polling
* time or if we have reached out specified max flow file limit or if a
* rebalance has been initiated for one of the partitions we're watching;
* otherwise true.
*
* @return true if should keep polling; false otherwise
*/
boolean continuePolling() {
//stop if the last poll produced new no data
if (lastPollEmpty) {
return false;
}
//stop if we've gone past our desired max uncommitted wait time
if (leaseStartNanos < 0) {
leaseStartNanos = System.nanoTime();
}
final long durationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos);
if (durationMillis > maxWaitMillis) {
return false;
}
//stop if we've generated enough flowfiles that we need to be concerned about memory usage for the objects
if (bundleMap.size() > 200) { //a magic number - the number of simultaneous bundles to track
return false;
} else {
return totalMessages < 1000;//admittedlly a magic number - good candidate for processor property
}
}
/**
* Indicates that the underlying session and consumer should be immediately
* considered invalid. Once closed the session will be rolled back and the
* pool should destroy the underlying consumer. This is useful if due to
* external reasons, such as the processor no longer being scheduled, this
* lease should be terminated immediately.
*/
private void poison() {
poisoned = true;
}
/**
* @return true if this lease has been poisoned; false otherwise
*/
boolean isPoisoned() {
return poisoned;
}
/**
* Trigger the consumer's {@link KafkaConsumer#wakeup() wakeup()} method.
*/
public void wakeup() {
kafkaConsumer.wakeup();
}
/**
* Abstract method that is intended to be extended by the pool that created
* this ConsumerLease object. It should ensure that the session given to
* create this session is rolled back and that the underlying kafka consumer
* is either returned to the pool for continued use or destroyed if this
* lease has been poisoned. It can only be called once. Calling it more than
* once can result in undefined and non threadsafe behavior.
*/
@Override
public void close() {
resetInternalState();
}
public abstract ProcessSession getProcessSession();
public abstract void yield();
private void processRecords(final ConsumerRecords<byte[], byte[]> records) {
records.partitions().stream().forEach(partition -> {
List<ConsumerRecord<byte[], byte[]>> messages = records.records(partition);
if (!messages.isEmpty()) {
//update maximum offset map for this topic partition
long maxOffset = messages.stream()
.mapToLong(record -> record.offset())
.max()
.getAsLong();
//write records to content repository and session
if (demarcatorBytes != null) {
writeDemarcatedData(getProcessSession(), messages, partition);
} else if (readerFactory != null && writerFactory != null) {
writeRecordData(getProcessSession(), messages, partition);
} else {
messages.stream().forEach(message -> {
writeData(getProcessSession(), message, partition);
});
}
totalMessages += messages.size();
uncommittedOffsetsMap.put(partition, new OffsetAndMetadata(maxOffset + 1L));
}
});
}
private static String encodeKafkaKey(final byte[] key, final String encoding) {
if (key == null) {
return null;
}
if (KeyEncoding.HEX.getValue().equals(encoding)) {
return Hex.encodeHexString(key);
} else if (KeyEncoding.UTF8.getValue().equals(encoding)) {
return new String(key, StandardCharsets.UTF_8);
} else {
return null; // won't happen because it is guaranteed by the Allowable Values
}
}
private Collection<FlowFile> getBundles() throws IOException {
final List<FlowFile> flowFiles = new ArrayList<>();
for (final BundleTracker tracker : bundleMap.values()) {
final boolean includeBundle = processBundle(tracker);
if (includeBundle) {
flowFiles.add(tracker.flowFile);
}
}
return flowFiles;
}
private boolean processBundle(final BundleTracker bundle) throws IOException {
final RecordSetWriter writer = bundle.recordWriter;
if (writer != null) {
final WriteResult writeResult;
try {
writeResult = writer.finishRecordSet();
} finally {
writer.close();
}
if (writeResult.getRecordCount() == 0) {
getProcessSession().remove(bundle.flowFile);
return false;
}
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(writeResult.getAttributes());
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
bundle.flowFile = getProcessSession().putAllAttributes(bundle.flowFile, attributes);
}
populateAttributes(bundle);
return true;
}
private void writeData(final ProcessSession session, ConsumerRecord<byte[], byte[]> record, final TopicPartition topicPartition) {
FlowFile flowFile = session.create();
final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding);
tracker.incrementRecordCount(1);
final byte[] value = record.value();
if (value != null) {
flowFile = session.write(flowFile, out -> {
out.write(value);
});
}
flowFile = session.putAllAttributes(flowFile, getAttributes(record));
tracker.updateFlowFile(flowFile);
populateAttributes(tracker);
session.transfer(tracker.flowFile, REL_SUCCESS);
}
private void writeDemarcatedData(final ProcessSession session, final List<ConsumerRecord<byte[], byte[]>> records, final TopicPartition topicPartition) {
// Group the Records by their BundleInformation
final Map<BundleInformation, List<ConsumerRecord<byte[], byte[]>>> map = records.stream()
.collect(Collectors.groupingBy(rec -> new BundleInformation(topicPartition, null, getAttributes(rec), separateByKey ? rec.key() : null)));
for (final Map.Entry<BundleInformation, List<ConsumerRecord<byte[], byte[]>>> entry : map.entrySet()) {
final BundleInformation bundleInfo = entry.getKey();
final List<ConsumerRecord<byte[], byte[]>> recordList = entry.getValue();
final boolean demarcateFirstRecord;
BundleTracker tracker = bundleMap.get(bundleInfo);
FlowFile flowFile;
if (tracker == null) {
tracker = new BundleTracker(recordList.get(0), topicPartition, keyEncoding);
flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, bundleInfo.attributes);
tracker.updateFlowFile(flowFile);
demarcateFirstRecord = false; //have not yet written records for this topic/partition in this lease
} else {
demarcateFirstRecord = true; //have already been writing records for this topic/partition in this lease
}
flowFile = tracker.flowFile;
tracker.incrementRecordCount(recordList.size());
flowFile = session.append(flowFile, out -> {
boolean useDemarcator = demarcateFirstRecord;
for (final ConsumerRecord<byte[], byte[]> record : recordList) {
if (useDemarcator) {
out.write(demarcatorBytes);
}
final byte[] value = record.value();
if (value != null) {
out.write(record.value());
}
useDemarcator = true;
}
});
tracker.updateFlowFile(flowFile);
bundleMap.put(bundleInfo, tracker);
}
}
private void handleParseFailure(final ConsumerRecord<byte[], byte[]> consumerRecord, final ProcessSession session, final Exception cause) {
handleParseFailure(consumerRecord, session, cause, "Failed to parse message from Kafka using the configured Record Reader. "
+ "Will route message as its own FlowFile to the 'parse.failure' relationship");
}
private void handleParseFailure(final ConsumerRecord<byte[], byte[]> consumerRecord, final ProcessSession session, final Exception cause, final String message) {
// If we are unable to parse the data, we need to transfer it to 'parse failure' relationship
final Map<String, String> attributes = getAttributes(consumerRecord);
attributes.put(KafkaFlowFileAttribute.KAFKA_OFFSET, String.valueOf(consumerRecord.offset()));
attributes.put(KafkaFlowFileAttribute.KAFKA_TIMESTAMP, String.valueOf(consumerRecord.timestamp()));
attributes.put(KafkaFlowFileAttribute.KAFKA_PARTITION, String.valueOf(consumerRecord.partition()));
attributes.put(KafkaFlowFileAttribute.KAFKA_TOPIC, consumerRecord.topic());
FlowFile failureFlowFile = session.create();
final byte[] value = consumerRecord.value();
if (value != null) {
failureFlowFile = session.write(failureFlowFile, out -> out.write(value));
}
failureFlowFile = session.putAllAttributes(failureFlowFile, attributes);
final String transitUri = StandardTransitUriProvider.getTransitUri(securityProtocol, bootstrapServers, consumerRecord.topic());
session.getProvenanceReporter().receive(failureFlowFile, transitUri);
session.transfer(failureFlowFile, REL_PARSE_FAILURE);
if (cause == null) {
logger.error(message);
} else {
logger.error(message, cause);
}
session.adjustCounter("Parse Failures", 1, false);
}
private Map<String, String> getAttributes(final ConsumerRecord<?, ?> consumerRecord) {
final Map<String, String> attributes = new HashMap<>();
if (headerNamePattern == null) {
return attributes;
}
for (final Header header : consumerRecord.headers()) {
final String attributeName = header.key();
final byte[] attributeValue = header.value();
if (headerNamePattern.matcher(attributeName).matches() && attributeValue != null) {
attributes.put(attributeName, new String(attributeValue, headerCharacterSet));
}
}
return attributes;
}
private void writeRecordData(final ProcessSession session, final List<ConsumerRecord<byte[], byte[]>> records, final TopicPartition topicPartition) {
// In order to obtain a RecordReader from the RecordReaderFactory, we need to give it a FlowFile.
// We don't want to create a new FlowFile for each record that we receive, so we will just create
// a "temporary flowfile" that will be removed in the finally block below and use that to pass to
// the createRecordReader method.
RecordSetWriter writer = null;
try {
for (final ConsumerRecord<byte[], byte[]> consumerRecord : records) {
final Map<String, String> attributes = getAttributes(consumerRecord);
final byte[] recordBytes = consumerRecord.value() == null ? new byte[0] : consumerRecord.value();
try (final InputStream in = new ByteArrayInputStream(recordBytes)) {
final RecordReader reader;
try {
reader = readerFactory.createRecordReader(attributes, in, recordBytes.length, logger);
} catch (final IOException e) {
yield();
rollback(topicPartition);
handleParseFailure(consumerRecord, session, e, "Failed to parse message from Kafka due to comms failure. Will roll back session and try again momentarily.");
closeWriter(writer);
return;
} catch (final Exception e) {
handleParseFailure(consumerRecord, session, e);
continue;
}
try {
Record record;
while ((record = reader.nextRecord()) != null) {
// Determine the bundle for this record.
final RecordSchema recordSchema = record.getSchema();
final BundleInformation bundleInfo = new BundleInformation(topicPartition, recordSchema, attributes, separateByKey ? consumerRecord.key() : null);
BundleTracker tracker = bundleMap.get(bundleInfo);
if (tracker == null) {
FlowFile flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, attributes);
final OutputStream rawOut = session.write(flowFile);
final RecordSchema writeSchema;
try {
writeSchema = writerFactory.getSchema(flowFile.getAttributes(), recordSchema);
} catch (final Exception e) {
logger.error("Failed to obtain Schema for FlowFile. Will roll back the Kafka message offsets.", e);
rollback(topicPartition);
yield();
throw new ProcessException(e);
}
writer = writerFactory.createWriter(logger, writeSchema, rawOut, flowFile);
writer.beginRecordSet();
tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer);
tracker.updateFlowFile(flowFile);
bundleMap.put(bundleInfo, tracker);
} else {
writer = tracker.recordWriter;
}
try {
writer.write(record);
} catch (final RuntimeException re) {
handleParseFailure(consumerRecord, session, re, "Failed to write message from Kafka using the configured Record Writer. "
+ "Will route message as its own FlowFile to the 'parse.failure' relationship");
continue;
}
tracker.incrementRecordCount(1L);
session.adjustCounter("Records Received", 1L, false);
}
} catch (final IOException | MalformedRecordException | SchemaValidationException e) {
handleParseFailure(consumerRecord, session, e);
}
}
}
} catch (final Exception e) {
logger.error("Failed to properly receive messages from Kafka. Will roll back session and any un-committed offsets from Kafka.", e);
closeWriter(writer);
rollback(topicPartition);
throw new ProcessException(e);
}
}
private void closeWriter(final RecordSetWriter writer) {
try {
if (writer != null) {
writer.close();
}
} catch (final Exception ioe) {
logger.warn("Failed to close Record Writer", ioe);
}
}
private void rollback(final TopicPartition topicPartition) {
try {
OffsetAndMetadata offsetAndMetadata = uncommittedOffsetsMap.get(topicPartition);
if (offsetAndMetadata == null) {
offsetAndMetadata = kafkaConsumer.committed(topicPartition);
}
final long offset = offsetAndMetadata == null ? 0L : offsetAndMetadata.offset();
kafkaConsumer.seek(topicPartition, offset);
} catch (final Exception rollbackException) {
logger.warn("Attempted to rollback Kafka message offset but was unable to do so", rollbackException);
}
}
private void populateAttributes(final BundleTracker tracker) {
final Map<String, String> kafkaAttrs = new HashMap<>();
kafkaAttrs.put(KafkaFlowFileAttribute.KAFKA_OFFSET, String.valueOf(tracker.initialOffset));
kafkaAttrs.put(KafkaFlowFileAttribute.KAFKA_TIMESTAMP, String.valueOf(tracker.initialTimestamp));
// If we have a kafka key, we will add it as an attribute only if
// the FlowFile contains a single Record, or if the Records have been separated by Key,
// because we then know that even though there are multiple Records, they all have the same key.
if (tracker.key != null && (tracker.totalRecords == 1 || separateByKey)) {
if (!keyEncoding.equalsIgnoreCase(KeyEncoding.DO_NOT_ADD.getValue())) {
kafkaAttrs.put(KafkaFlowFileAttribute.KAFKA_KEY, tracker.key);
}
}
kafkaAttrs.put(KafkaFlowFileAttribute.KAFKA_PARTITION, String.valueOf(tracker.partition));
kafkaAttrs.put(KafkaFlowFileAttribute.KAFKA_TOPIC, tracker.topic);
if (tracker.totalRecords > 1) {
// Add a record.count attribute to remain consistent with other record-oriented processors. If not
// reading/writing records, then use "kafka.count" attribute.
if (tracker.recordWriter == null) {
kafkaAttrs.put(KafkaFlowFileAttribute.KAFKA_COUNT, String.valueOf(tracker.totalRecords));
} else {
kafkaAttrs.put("record.count", String.valueOf(tracker.totalRecords));
}
}
final FlowFile newFlowFile = getProcessSession().putAllAttributes(tracker.flowFile, kafkaAttrs);
final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos);
final String transitUri = StandardTransitUriProvider.getTransitUri(securityProtocol, bootstrapServers, tracker.topic);
getProcessSession().getProvenanceReporter().receive(newFlowFile, transitUri, executionDurationMillis);
tracker.updateFlowFile(newFlowFile);
}
private static class BundleTracker {
final long initialOffset;
final long initialTimestamp;
final int partition;
final String topic;
final String key;
final RecordSetWriter recordWriter;
FlowFile flowFile;
long totalRecords = 0;
private BundleTracker(final ConsumerRecord<byte[], byte[]> initialRecord, final TopicPartition topicPartition, final String keyEncoding) {
this(initialRecord, topicPartition, keyEncoding, null);
}
private BundleTracker(final ConsumerRecord<byte[], byte[]> initialRecord, final TopicPartition topicPartition, final String keyEncoding, final RecordSetWriter recordWriter) {
this.initialOffset = initialRecord.offset();
this.initialTimestamp = initialRecord.timestamp();
this.partition = topicPartition.partition();
this.topic = topicPartition.topic();
this.recordWriter = recordWriter;
this.key = encodeKafkaKey(initialRecord.key(), keyEncoding);
}
private void incrementRecordCount(final long count) {
totalRecords += count;
}
private void updateFlowFile(final FlowFile flowFile) {
this.flowFile = flowFile;
}
}
private static class BundleInformation {
private final TopicPartition topicPartition;
private final RecordSchema schema;
private final Map<String, String> attributes;
private final byte[] messageKey;
public BundleInformation(final TopicPartition topicPartition, final RecordSchema schema, final Map<String, String> attributes, final byte[] messageKey) {
this.topicPartition = topicPartition;
this.schema = schema;
this.attributes = attributes;
this.messageKey = messageKey;
}
@Override
public int hashCode() {
return 41 + Objects.hash(topicPartition, schema, attributes) + 37 * Arrays.hashCode(messageKey);
}
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (obj == null) {
return false;
}
if (!(obj instanceof BundleInformation)) {
return false;
}
final BundleInformation other = (BundleInformation) obj;
return Objects.equals(topicPartition, other.topicPartition) && Objects.equals(schema, other.schema) && Objects.equals(attributes, other.attributes)
&& Arrays.equals(this.messageKey, other.messageKey);
}
}
}

View File

@ -1,209 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.logging.ComponentLog;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class ConsumerPartitionsUtil {
public static final String PARTITION_PROPERTY_NAME_PREFIX = "partitions.";
public static int[] getPartitionsForHost(final Map<String, String> properties, final ComponentLog logger) throws UnknownHostException {
final Map<String, String> hostnameToPartitionString = mapHostnamesToPartitionStrings(properties);
final Map<String, int[]> partitionsByHost = mapPartitionValueToIntArrays(hostnameToPartitionString);
if (partitionsByHost.isEmpty()) {
// Explicit partitioning is not enabled.
logger.debug("No explicit Consumer Partitions have been declared.");
return null;
}
logger.info("Found the following mapping of hosts to partitions: {}", new Object[] {hostnameToPartitionString});
// Determine the partitions based on hostname/IP.
int[] partitionsForThisHost = getPartitionsForThisHost(partitionsByHost);
if (partitionsForThisHost == null) {
throw new IllegalArgumentException("Could not find a partition mapping for host " + InetAddress.getLocalHost().getCanonicalHostName());
}
return partitionsForThisHost;
}
private static Map<String, int[]> mapPartitionValueToIntArrays(final Map<String, String> partitionValues) {
final Map<String, int[]> partitionsByHost = new HashMap<>();
for (final Map.Entry<String, String> entry : partitionValues.entrySet()) {
final String host = entry.getKey();
final int[] partitions = parsePartitions(host, entry.getValue());
partitionsByHost.put(host, partitions);
}
return partitionsByHost;
}
private static int[] getPartitionsForThisHost(final Map<String, int[]> partitionsByHost) throws UnknownHostException {
// Determine the partitions based on hostname/IP.
final InetAddress localhost = InetAddress.getLocalHost();
int[] partitionsForThisHost = partitionsByHost.get(localhost.getCanonicalHostName());
if (partitionsForThisHost != null) {
return partitionsForThisHost;
}
partitionsForThisHost = partitionsByHost.get(localhost.getHostName());
if (partitionsForThisHost != null) {
return partitionsForThisHost;
}
return partitionsByHost.get(localhost.getHostAddress());
}
private static Map<String, String> mapHostnamesToPartitionStrings(final Map<String, String> properties) {
final Map<String, String> hostnameToPartitionString = new HashMap<>();
for (final Map.Entry<String, String> entry : properties.entrySet()) {
final String propertyName = entry.getKey();
if (!propertyName.startsWith(PARTITION_PROPERTY_NAME_PREFIX)) {
continue;
}
if (propertyName.length() <= PARTITION_PROPERTY_NAME_PREFIX.length()) {
continue;
}
final String propertyNameAfterPrefix = propertyName.substring(PARTITION_PROPERTY_NAME_PREFIX.length());
hostnameToPartitionString.put(propertyNameAfterPrefix, entry.getValue());
}
return hostnameToPartitionString;
}
private static int[] parsePartitions(final String hostname, final String propertyValue) {
final String[] splits = propertyValue.split(",");
final List<Integer> partitionList = new ArrayList<>();
for (final String split : splits) {
if (split.trim().isEmpty()) {
continue;
}
try {
final int partition = Integer.parseInt(split.trim());
if (partition < 0) {
throw new IllegalArgumentException("Found invalid value for the partitions for hostname " + hostname + ": " + split + " is negative");
}
partitionList.add(partition);
} catch (final NumberFormatException nfe) {
throw new IllegalArgumentException("Found invalid value for the partitions for hostname " + hostname + ": " + split + " is not an integer");
}
}
// Map out List<Integer> to int[]
return partitionList.stream().mapToInt(Integer::intValue).toArray();
}
public static ValidationResult validateConsumePartitions(final Map<String, String> properties) {
final Map<String, String> hostnameToPartitionMapping = mapHostnamesToPartitionStrings(properties);
if (hostnameToPartitionMapping.isEmpty()) {
// Partitions are not being explicitly assigned.
return new ValidationResult.Builder().valid(true).build();
}
final Set<Integer> partitionsClaimed = new HashSet<>();
final Set<Integer> duplicatePartitions = new HashSet<>();
for (final Map.Entry<String, String> entry : hostnameToPartitionMapping.entrySet()) {
final int[] partitions = parsePartitions(entry.getKey(), entry.getValue());
for (final int partition : partitions) {
final boolean added = partitionsClaimed.add(partition);
if (!added) {
duplicatePartitions.add(partition);
}
}
}
final List<Integer> partitionsMissing = new ArrayList<>();
for (int i=0; i < partitionsClaimed.size(); i++) {
if (!partitionsClaimed.contains(i)) {
partitionsMissing.add(i);
}
}
if (!partitionsMissing.isEmpty()) {
return new ValidationResult.Builder()
.subject("Partitions")
.input(partitionsClaimed.toString())
.valid(false)
.explanation("The following partitions were not mapped to any node: " + partitionsMissing.toString())
.build();
}
if (!duplicatePartitions.isEmpty()) {
return new ValidationResult.Builder()
.subject("Partitions")
.input(partitionsClaimed.toString())
.valid(false)
.explanation("The following partitions were mapped to multiple nodes: " + duplicatePartitions.toString())
.build();
}
final Map<String, int[]> partitionsByHost = mapPartitionValueToIntArrays(hostnameToPartitionMapping);
final int[] partitionsForThisHost;
try {
partitionsForThisHost = getPartitionsForThisHost(partitionsByHost);
} catch (UnknownHostException e) {
return new ValidationResult.Builder()
.valid(false)
.subject("Partition Assignment")
.explanation("Unable to determine hostname of localhost")
.build();
}
if (partitionsForThisHost == null) {
return new ValidationResult.Builder()
.subject("Partition Assignment")
.valid(false)
.explanation("No assignment was given for this host")
.build();
}
return new ValidationResult.Builder().valid(true).build();
}
public static boolean isPartitionAssignmentExplicit(final Map<String, String> properties) {
final Map<String, String> hostnameToPartitionMapping = mapHostnamesToPartitionStrings(properties);
return !hostnameToPartitionMapping.isEmpty();
}
public static int getPartitionAssignmentCount(final Map<String, String> properties) {
final Map<String, String> hostnameToPartitionMapping = mapHostnamesToPartitionStrings(properties);
final Map<String, int[]> partitions = mapPartitionValueToIntArrays(hostnameToPartitionMapping);
int count = 0;
for (final int[] partitionArray : partitions.values()) {
count += partitionArray.length;
}
return count;
}
}

View File

@ -1,497 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.serialization.RecordReaderFactory;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import java.io.Closeable;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
/**
* A pool of Kafka Consumers for a given topic. Consumers can be obtained by
* calling 'obtainConsumer'. Once closed the pool is ready to be immediately
* used again.
*/
public class ConsumerPool implements Closeable {
private final BlockingQueue<SimpleConsumerLease> pooledLeases;
private final List<String> topics;
private final Pattern topicPattern;
private final Map<String, Object> kafkaProperties;
private final long maxWaitMillis;
private final ComponentLog logger;
private final byte[] demarcatorBytes;
private final String keyEncoding;
private final String securityProtocol;
private final String bootstrapServers;
private final boolean honorTransactions;
private final RecordReaderFactory readerFactory;
private final RecordSetWriterFactory writerFactory;
private final Charset headerCharacterSet;
private final Pattern headerNamePattern;
private final boolean separateByKey;
private final int[] partitionsToConsume;
private final AtomicLong consumerCreatedCountRef = new AtomicLong();
private final AtomicLong consumerClosedCountRef = new AtomicLong();
private final AtomicLong leasesObtainedCountRef = new AtomicLong();
private final Queue<List<TopicPartition>> availableTopicPartitions = new LinkedBlockingQueue<>();
/**
* Creates a pool of KafkaConsumer objects that will grow up to the maximum
* indicated threads from the given context. Consumers are lazily
* initialized. We may elect to not create up to the maximum number of
* configured consumers if the broker reported lag time for all topics is
* below a certain threshold.
*
* @param maxConcurrentLeases max allowable consumers at once
* @param demarcator bytes to use as demarcator between messages; null or
* empty means no demarcator
* @param kafkaProperties properties to use to initialize kafka consumers
* @param topics the topics to subscribe to
* @param maxWaitMillis maximum time to wait for a given lease to acquire
* data before committing
* @param keyEncoding the encoding to use for the key of a kafka message if
* found
* @param securityProtocol the security protocol used
* @param bootstrapServers the bootstrap servers
* @param logger the logger to report any errors/warnings
*/
public ConsumerPool(
final int maxConcurrentLeases,
final byte[] demarcator,
final boolean separateByKey,
final Map<String, Object> kafkaProperties,
final List<String> topics,
final long maxWaitMillis,
final String keyEncoding,
final String securityProtocol,
final String bootstrapServers,
final ComponentLog logger,
final boolean honorTransactions,
final Charset headerCharacterSet,
final Pattern headerNamePattern,
final int[] partitionsToConsume) {
this.pooledLeases = new LinkedBlockingQueue<>();
this.maxWaitMillis = maxWaitMillis;
this.logger = logger;
this.demarcatorBytes = demarcator;
this.keyEncoding = keyEncoding;
this.securityProtocol = securityProtocol;
this.bootstrapServers = bootstrapServers;
this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
this.topics = Collections.unmodifiableList(topics);
this.topicPattern = null;
this.readerFactory = null;
this.writerFactory = null;
this.honorTransactions = honorTransactions;
this.headerCharacterSet = headerCharacterSet;
this.headerNamePattern = headerNamePattern;
this.separateByKey = separateByKey;
this.partitionsToConsume = partitionsToConsume;
enqueueAssignedPartitions(partitionsToConsume);
}
public ConsumerPool(
final int maxConcurrentLeases,
final byte[] demarcator,
final boolean separateByKey,
final Map<String, Object> kafkaProperties,
final Pattern topics,
final long maxWaitMillis,
final String keyEncoding,
final String securityProtocol,
final String bootstrapServers,
final ComponentLog logger,
final boolean honorTransactions,
final Charset headerCharacterSet,
final Pattern headerNamePattern,
final int[] partitionsToConsume) {
this.pooledLeases = new LinkedBlockingQueue<>();
this.maxWaitMillis = maxWaitMillis;
this.logger = logger;
this.demarcatorBytes = demarcator;
this.keyEncoding = keyEncoding;
this.securityProtocol = securityProtocol;
this.bootstrapServers = bootstrapServers;
this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
this.topics = null;
this.topicPattern = topics;
this.readerFactory = null;
this.writerFactory = null;
this.honorTransactions = honorTransactions;
this.headerCharacterSet = headerCharacterSet;
this.headerNamePattern = headerNamePattern;
this.separateByKey = separateByKey;
this.partitionsToConsume = partitionsToConsume;
enqueueAssignedPartitions(partitionsToConsume);
}
public ConsumerPool(
final int maxConcurrentLeases,
final RecordReaderFactory readerFactory,
final RecordSetWriterFactory writerFactory,
final Map<String, Object> kafkaProperties,
final Pattern topics,
final long maxWaitMillis,
final String securityProtocol,
final String bootstrapServers,
final ComponentLog logger,
final boolean honorTransactions,
final Charset headerCharacterSet,
final Pattern headerNamePattern,
final boolean separateByKey,
final String keyEncoding,
final int[] partitionsToConsume) {
this.pooledLeases = new LinkedBlockingQueue<>();
this.maxWaitMillis = maxWaitMillis;
this.logger = logger;
this.demarcatorBytes = null;
this.readerFactory = readerFactory;
this.writerFactory = writerFactory;
this.securityProtocol = securityProtocol;
this.bootstrapServers = bootstrapServers;
this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
this.topics = null;
this.topicPattern = topics;
this.honorTransactions = honorTransactions;
this.headerCharacterSet = headerCharacterSet;
this.headerNamePattern = headerNamePattern;
this.separateByKey = separateByKey;
this.keyEncoding = keyEncoding;
this.partitionsToConsume = partitionsToConsume;
enqueueAssignedPartitions(partitionsToConsume);
}
public ConsumerPool(
final int maxConcurrentLeases,
final RecordReaderFactory readerFactory,
final RecordSetWriterFactory writerFactory,
final Map<String, Object> kafkaProperties,
final List<String> topics,
final long maxWaitMillis,
final String securityProtocol,
final String bootstrapServers,
final ComponentLog logger,
final boolean honorTransactions,
final Charset headerCharacterSet,
final Pattern headerNamePattern,
final boolean separateByKey,
final String keyEncoding,
final int[] partitionsToConsume) {
this.pooledLeases = new LinkedBlockingQueue<>();
this.maxWaitMillis = maxWaitMillis;
this.logger = logger;
this.demarcatorBytes = null;
this.readerFactory = readerFactory;
this.writerFactory = writerFactory;
this.securityProtocol = securityProtocol;
this.bootstrapServers = bootstrapServers;
this.kafkaProperties = Collections.unmodifiableMap(kafkaProperties);
this.topics = topics;
this.topicPattern = null;
this.honorTransactions = honorTransactions;
this.headerCharacterSet = headerCharacterSet;
this.headerNamePattern = headerNamePattern;
this.separateByKey = separateByKey;
this.keyEncoding = keyEncoding;
this.partitionsToConsume = partitionsToConsume;
enqueueAssignedPartitions(partitionsToConsume);
}
public int getPartitionCount() {
// If using regex for topic names, just return -1
if (topics == null || topics.isEmpty()) {
return -1;
}
int partitionsEachTopic = 0;
try (final Consumer<byte[], byte[]> consumer = createKafkaConsumer()) {
for (final String topicName : topics) {
final List<PartitionInfo> partitionInfos = consumer.partitionsFor(topicName);
final int partitionsThisTopic = partitionInfos.size();
if (partitionsEachTopic != 0 && partitionsThisTopic != partitionsEachTopic) {
throw new IllegalStateException("The specific topic names do not have the same number of partitions");
}
partitionsEachTopic = partitionsThisTopic;
}
}
return partitionsEachTopic;
}
/**
* Obtains a consumer from the pool if one is available or lazily
* initializes a new one if deemed necessary.
*
* @param session the session for which the consumer lease will be
* associated
* @param processContext the ProcessContext for which the consumer
* lease will be associated
* @return consumer to use or null if not available or necessary
*/
public ConsumerLease obtainConsumer(final ProcessSession session, final ProcessContext processContext) {
// If there are any partition assignments that do not have leases in our pool, create the leases and add them to the pool.
// This is not necessary for us to handle if using automatic subscriptions because the Kafka protocol will ensure that each consumer
// has the appropriate partitions. However, if we are using explicit assignment, it's important to create these leases and add them
// to our pool in order to avoid starvation. E.g., if we have only a single concurrent task and 5 partitions assigned, we cannot simply
// wait until pooledLeases.poll() returns null to create a new ConsumerLease, as doing so may result in constantly pulling from only a
// single partition (since we'd get a Lease for Partition 1, then use it, and put it back in the pool).
recreateAssignedConsumers();
SimpleConsumerLease lease = pooledLeases.poll();
if (lease == null) {
lease = createConsumerLease();
if (lease == null) {
return null;
}
}
lease.setProcessSession(session, processContext);
leasesObtainedCountRef.incrementAndGet();
return lease;
}
private void recreateAssignedConsumers() {
List<TopicPartition> topicPartitions;
while ((topicPartitions = availableTopicPartitions.poll()) != null) {
final SimpleConsumerLease simpleConsumerLease = createConsumerLease(topicPartitions);
pooledLeases.add(simpleConsumerLease);
}
}
private SimpleConsumerLease createConsumerLease() {
if (partitionsToConsume != null) {
logger.debug("Cannot obtain lease to communicate with Kafka. Since partitions are explicitly assigned, cannot create a new lease.");
return null;
}
final Consumer<byte[], byte[]> consumer = createKafkaConsumer();
consumerCreatedCountRef.incrementAndGet();
/*
* For now return a new consumer lease. But we could later elect to
* have this return null if we determine the broker indicates that
* the lag time on all topics being monitored is sufficiently low.
* For now we should encourage conservative use of threads because
* having too many means we'll have at best useless threads sitting
* around doing frequent network calls and at worst having consumers
* sitting idle which could prompt excessive rebalances.
*/
final SimpleConsumerLease lease = new SimpleConsumerLease(consumer, null);
// This subscription tightly couples the lease to the given
// consumer. They cannot be separated from then on.
if (topics == null) {
consumer.subscribe(topicPattern, lease);
} else {
consumer.subscribe(topics, lease);
}
return lease;
}
private SimpleConsumerLease createConsumerLease(final List<TopicPartition> topicPartitions) {
final Consumer<byte[], byte[]> consumer = createKafkaConsumer();
consumerCreatedCountRef.incrementAndGet();
consumer.assign(topicPartitions);
final SimpleConsumerLease lease = new SimpleConsumerLease(consumer, topicPartitions);
return lease;
}
private void enqueueAssignedPartitions(final int[] partitionsToConsume) {
if (partitionsToConsume == null) {
return;
}
for (final int partition : partitionsToConsume) {
final List<TopicPartition> topicPartitions = createTopicPartitions(partition);
availableTopicPartitions.offer(topicPartitions);
}
}
private List<TopicPartition> createTopicPartitions(final int partition) {
final List<TopicPartition> topicPartitions = new ArrayList<>();
for (final String topic : topics) {
final TopicPartition topicPartition = new TopicPartition(topic, partition);
topicPartitions.add(topicPartition);
}
return topicPartitions;
}
/**
* Exposed as protected method for easier unit testing
*
* @return consumer
* @throws KafkaException if unable to subscribe to the given topics
*/
protected Consumer<byte[], byte[]> createKafkaConsumer() {
final Map<String, Object> properties = new HashMap<>(kafkaProperties);
if (honorTransactions) {
properties.put("isolation.level", "read_committed");
} else {
properties.put("isolation.level", "read_uncommitted");
}
final Consumer<byte[], byte[]> consumer = new KafkaConsumer<>(properties);
return consumer;
}
/**
* Closes all consumers in the pool. Can be safely called repeatedly.
*/
@Override
public void close() {
final List<SimpleConsumerLease> leases = new ArrayList<>();
pooledLeases.drainTo(leases);
leases.forEach((lease) -> {
lease.close(true);
});
}
private void closeConsumer(final Consumer<?, ?> consumer) {
consumerClosedCountRef.incrementAndGet();
try {
consumer.unsubscribe();
} catch (Exception e) {
logger.warn("Failed while unsubscribing " + consumer, e);
}
try {
consumer.close();
} catch (Exception e) {
logger.warn("Failed while closing " + consumer, e);
}
}
PoolStats getPoolStats() {
return new PoolStats(consumerCreatedCountRef.get(), consumerClosedCountRef.get(), leasesObtainedCountRef.get());
}
private class SimpleConsumerLease extends ConsumerLease {
private final Consumer<byte[], byte[]> consumer;
private final List<TopicPartition> assignedPartitions;
private volatile ProcessSession session;
private volatile ProcessContext processContext;
private volatile boolean closedConsumer;
private SimpleConsumerLease(final Consumer<byte[], byte[]> consumer, final List<TopicPartition> assignedPartitions) {
super(maxWaitMillis, consumer, demarcatorBytes, keyEncoding, securityProtocol, bootstrapServers,
readerFactory, writerFactory, logger, headerCharacterSet, headerNamePattern, separateByKey);
this.consumer = consumer;
this.assignedPartitions = assignedPartitions;
}
void setProcessSession(final ProcessSession session, final ProcessContext context) {
this.session = session;
this.processContext = context;
}
@Override
public List<TopicPartition> getAssignedPartitions() {
return assignedPartitions;
}
@Override
public void yield() {
if (processContext != null) {
processContext.yield();
}
}
@Override
public ProcessSession getProcessSession() {
return session;
}
@Override
public void close() {
super.close();
close(false);
}
public void close(final boolean forceClose) {
if (closedConsumer) {
return;
}
super.close();
if (session != null) {
session.rollback();
setProcessSession(null, null);
}
if (forceClose || isPoisoned() || !pooledLeases.offer(this)) {
closedConsumer = true;
closeConsumer(consumer);
// If explicit topic/partition assignment is used, make the assignments for this Lease available again.
if (assignedPartitions != null) {
logger.debug("Adding partitions {} back to the pool", assignedPartitions);
availableTopicPartitions.offer(assignedPartitions);
}
}
}
}
static final class PoolStats {
final long consumerCreatedCount;
final long consumerClosedCount;
final long leasesObtainedCount;
PoolStats(
final long consumerCreatedCount,
final long consumerClosedCount,
final long leasesObtainedCount
) {
this.consumerCreatedCount = consumerCreatedCount;
this.consumerClosedCount = consumerClosedCount;
this.leasesObtainedCount = leasesObtainedCount;
}
@Override
public String toString() {
return "Created Consumers [" + consumerCreatedCount + "]\n"
+ "Closed Consumers [" + consumerClosedCount + "]\n"
+ "Leases Obtained [" + leasesObtainedCount + "]\n";
}
}
}

View File

@ -1,187 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
public class InFlightMessageTracker {
private final ConcurrentMap<FlowFile, Counts> messageCountsByFlowFile = new ConcurrentHashMap<>();
private final ConcurrentMap<FlowFile, Exception> failures = new ConcurrentHashMap<>();
private final ConcurrentMap<FlowFile, Set<Exception>> encounteredFailures = new ConcurrentHashMap<>();
private final Object progressMutex = new Object();
private final ComponentLog logger;
public InFlightMessageTracker(final ComponentLog logger) {
this.logger = logger;
}
public void incrementAcknowledgedCount(final FlowFile flowFile) {
final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts());
counter.incrementAcknowledgedCount();
synchronized (progressMutex) {
progressMutex.notify();
}
}
/**
* This method guarantees that the specified FlowFile to be transferred to
* 'success' relationship even if it did not derive any Kafka message.
*/
public void trackEmpty(final FlowFile flowFile) {
messageCountsByFlowFile.putIfAbsent(flowFile, new Counts());
}
public int getAcknowledgedCount(final FlowFile flowFile) {
final Counts counter = messageCountsByFlowFile.get(flowFile);
return (counter == null) ? 0 : counter.getAcknowledgedCount();
}
public void incrementSentCount(final FlowFile flowFile) {
final Counts counter = messageCountsByFlowFile.computeIfAbsent(flowFile, ff -> new Counts());
counter.incrementSentCount();
}
public int getSentCount(final FlowFile flowFile) {
final Counts counter = messageCountsByFlowFile.get(flowFile);
return (counter == null) ? 0 : counter.getSentCount();
}
public void fail(final FlowFile flowFile, final Exception exception) {
failures.putIfAbsent(flowFile, exception);
boolean newException = encounteredFailures
.computeIfAbsent(flowFile, (k) -> ConcurrentHashMap.newKeySet())
.add(exception);
if (newException) {
logger.error("Failed to send {} to Kafka", flowFile, exception);
}
synchronized (progressMutex) {
progressMutex.notify();
}
}
public Exception getFailure(final FlowFile flowFile) {
return failures.get(flowFile);
}
public boolean isFailed(final FlowFile flowFile) {
return getFailure(flowFile) != null;
}
public void reset() {
messageCountsByFlowFile.clear();
failures.clear();
encounteredFailures.clear();
}
public PublishResult failOutstanding(final Exception exception) {
messageCountsByFlowFile.keySet().stream()
.filter(ff -> !isComplete(ff))
.filter(ff -> !failures.containsKey(ff))
.forEach(ff -> failures.put(ff, exception));
return createPublishResult();
}
private boolean isComplete(final FlowFile flowFile) {
final Counts counts = messageCountsByFlowFile.get(flowFile);
if (counts.getAcknowledgedCount() == counts.getSentCount()) {
// all messages received successfully.
return true;
}
if (failures.containsKey(flowFile)) {
// FlowFile failed so is complete
return true;
}
return false;
}
private boolean isComplete() {
return messageCountsByFlowFile.keySet().stream()
.allMatch(flowFile -> isComplete(flowFile));
}
void awaitCompletion(final long millis) throws InterruptedException, TimeoutException {
final long startTime = System.nanoTime();
final long maxTime = startTime + TimeUnit.MILLISECONDS.toNanos(millis);
while (System.nanoTime() < maxTime) {
synchronized (progressMutex) {
if (isComplete()) {
return;
}
progressMutex.wait(millis);
}
}
throw new TimeoutException();
}
PublishResult createPublishResult() {
return new PublishResult() {
@Override
public boolean isFailure() {
return !failures.isEmpty();
}
@Override
public int getSuccessfulMessageCount(final FlowFile flowFile) {
return getAcknowledgedCount(flowFile);
}
@Override
public Exception getReasonForFailure(final FlowFile flowFile) {
return getFailure(flowFile);
}
};
}
public static class Counts {
private final AtomicInteger sentCount = new AtomicInteger(0);
private final AtomicInteger acknowledgedCount = new AtomicInteger(0);
public void incrementSentCount() {
sentCount.incrementAndGet();
}
public void incrementAcknowledgedCount() {
acknowledgedCount.incrementAndGet();
}
public int getAcknowledgedCount() {
return acknowledgedCount.get();
}
public int getSentCount() {
return sentCount.get();
}
}
}

View File

@ -1,98 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import java.util.Map;
/**
* Collection of implementation of common Kafka {@link Partitioner}s.
*/
final public class Partitioners {
private Partitioners() {
}
/**
* {@link Partitioner} that implements 'round-robin' mechanism which evenly
* distributes load between all available partitions.
*/
public static class RoundRobinPartitioner implements Partitioner {
private volatile int index;
@Override
public void configure(Map<String, ?> configs) {
// noop
}
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
return this.next(cluster.availablePartitionsForTopic(topic).size());
}
@Override
public void close() {
// noop
}
private synchronized int next(int numberOfPartitions) {
if (this.index >= numberOfPartitions) {
this.index = 0;
}
return index++;
}
}
public static class RecordPathPartitioner implements Partitioner {
@Override
public int partition(final String topic, final Object key, final byte[] keyBytes, final Object value, final byte[] valueBytes, final Cluster cluster) {
// When this partitioner is used, it is always overridden by creating the ProducerRecord with the Partition directly specified. However, we must have a unique value
// to set in the Producer's config, so this class exists
return 0;
}
@Override
public void close() {
}
@Override
public void configure(final Map<String, ?> configs) {
}
}
public static class ExpressionLanguagePartitioner implements Partitioner {
@Override
public int partition(final String topic, final Object key, final byte[] keyBytes, final Object value, final byte[] valueBytes, final Cluster cluster) {
// When this partitioner is used, it is always overridden by creating the ProducerRecord with the Partition directly specified. However, we must have a unique value
// to set in the Producer's config, so this class exists
return 0;
}
@Override
public void close() {
}
@Override
public void configure(final Map<String, ?> configs) {
}
}
}

View File

@ -1,35 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessSession;
import java.util.List;
/**
* Strategy for allowing multiple implementations of handling failure scenarios when publishing data to Kafka
*/
public interface PublishFailureStrategy {
/**
* Routes the FlowFiles to the appropriate destination
* @param session the process session that the flowfiles belong to
* @param flowFiles the flowfiles to transfer
*/
void routeFlowFiles(ProcessSession session, List<FlowFile> flowFiles);
}

View File

@ -1,571 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.errors.AuthorizationException;
import org.apache.kafka.common.errors.OutOfOrderSequenceException;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.SeeAlso;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyDescriptor.Builder;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.kafka.shared.attribute.StandardTransitUriProvider;
import org.apache.nifi.kafka.shared.component.KafkaPublishComponent;
import org.apache.nifi.kafka.shared.property.FailureStrategy;
import org.apache.nifi.kafka.shared.property.provider.KafkaPropertyProvider;
import org.apache.nifi.kafka.shared.property.provider.StandardKafkaPropertyProvider;
import org.apache.nifi.kafka.shared.transaction.TransactionIdSupplier;
import org.apache.nifi.kafka.shared.validation.DynamicPropertyValidator;
import org.apache.nifi.kafka.shared.validation.KafkaClientCustomValidationFunction;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.DataUnit;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.FlowFileFilters;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.record.path.RecordPath;
import org.apache.nifi.record.path.RecordPathResult;
import org.apache.nifi.record.path.util.RecordPathCache;
import org.apache.nifi.record.path.validation.RecordPathValidator;
import org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.serialization.MalformedRecordException;
import org.apache.nifi.serialization.RecordReader;
import org.apache.nifi.serialization.RecordReaderFactory;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.RecordSet;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAccumulator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import static org.apache.nifi.expression.ExpressionLanguageScope.FLOWFILE_ATTRIBUTES;
import static org.apache.nifi.expression.ExpressionLanguageScope.NONE;
import static org.apache.nifi.expression.ExpressionLanguageScope.VARIABLE_REGISTRY;
@Tags({"Apache", "Kafka", "Record", "csv", "json", "avro", "logs", "Put", "Send", "Message", "PubSub", "2.0"})
@CapabilityDescription("Sends the contents of a FlowFile as individual records to Apache Kafka using the Kafka 2.0 Producer API. "
+ "The contents of the FlowFile are expected to be record-oriented data that can be read by the configured Record Reader. "
+ "The complementary NiFi processor for fetching messages is ConsumeKafkaRecord_2_0.")
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ",
expressionLanguageScope = VARIABLE_REGISTRY)
@WritesAttribute(attribute = "msg.count", description = "The number of messages that were sent to Kafka for this FlowFile. This attribute is added only to "
+ "FlowFiles that are routed to success.")
@SeeAlso({PublishKafka_2_0.class, ConsumeKafka_2_0.class, ConsumeKafkaRecord_2_0.class})
public class PublishKafkaRecord_2_0 extends AbstractProcessor implements KafkaPublishComponent {
protected static final String MSG_COUNT = "msg.count";
static final AllowableValue DELIVERY_REPLICATED = new AllowableValue("all", "Guarantee Replicated Delivery",
"FlowFile will be routed to failure unless the message is replicated to the appropriate "
+ "number of Kafka Nodes according to the Topic configuration");
static final AllowableValue DELIVERY_ONE_NODE = new AllowableValue("1", "Guarantee Single Node Delivery",
"FlowFile will be routed to success if the message is received by a single Kafka node, "
+ "whether or not it is replicated. This is faster than <Guarantee Replicated Delivery> "
+ "but can result in data loss if a Kafka node crashes");
static final AllowableValue DELIVERY_BEST_EFFORT = new AllowableValue("0", "Best Effort",
"FlowFile will be routed to success after successfully sending the content to a Kafka node, "
+ "without waiting for any acknowledgment from the node at all. This provides the best performance but may result in data loss.");
static final AllowableValue ROUND_ROBIN_PARTITIONING = new AllowableValue(Partitioners.RoundRobinPartitioner.class.getName(),
Partitioners.RoundRobinPartitioner.class.getSimpleName(),
"Messages will be assigned partitions in a round-robin fashion, sending the first message to Partition 1, "
+ "the next Partition to Partition 2, and so on, wrapping as necessary.");
static final AllowableValue RANDOM_PARTITIONING = new AllowableValue("org.apache.kafka.clients.producer.internals.DefaultPartitioner",
"DefaultPartitioner", "Messages will be assigned to random partitions.");
static final AllowableValue RECORD_PATH_PARTITIONING = new AllowableValue(Partitioners.RecordPathPartitioner.class.getName(),
"RecordPath Partitioner", "Interprets the <Partition> property as a RecordPath that will be evaluated against each Record to determine which partition the Record will go to. All Records " +
"that have the same value for the given RecordPath will go to the same Partition.");
static final AllowableValue EXPRESSION_LANGUAGE_PARTITIONING = new AllowableValue(Partitioners.ExpressionLanguagePartitioner.class.getName(), "Expression Language Partitioner",
"Interprets the <Partition> property as Expression Language that will be evaluated against each FlowFile. This Expression will be evaluated once against the FlowFile, " +
"so all Records in a given FlowFile will go to the same partition.");
static final PropertyDescriptor TOPIC = new Builder()
.name("topic")
.displayName("Topic Name")
.description("The name of the Kafka Topic to publish to.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
.build();
static final PropertyDescriptor RECORD_READER = new Builder()
.name("record-reader")
.displayName("Record Reader")
.description("The Record Reader to use for incoming FlowFiles")
.identifiesControllerService(RecordReaderFactory.class)
.expressionLanguageSupported(NONE)
.required(true)
.build();
static final PropertyDescriptor RECORD_WRITER = new Builder()
.name("record-writer")
.displayName("Record Writer")
.description("The Record Writer to use in order to serialize the data before sending to Kafka")
.identifiesControllerService(RecordSetWriterFactory.class)
.expressionLanguageSupported(NONE)
.required(true)
.build();
static final PropertyDescriptor MESSAGE_KEY_FIELD = new Builder()
.name("message-key-field")
.displayName("Message Key Field")
.description("The name of a field in the Input Records that should be used as the Key for the Kafka message.")
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
.required(false)
.build();
static final PropertyDescriptor DELIVERY_GUARANTEE = new Builder()
.name("acks")
.displayName("Delivery Guarantee")
.description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.")
.required(true)
.expressionLanguageSupported(NONE)
.allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED)
.defaultValue(DELIVERY_REPLICATED.getValue())
.build();
static final PropertyDescriptor METADATA_WAIT_TIME = new Builder()
.name("max.block.ms")
.displayName("Max Metadata Wait Time")
.description("The amount of time publisher will wait to obtain metadata or wait for the buffer to flush during the 'send' call before failing the "
+ "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property")
.required(true)
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.expressionLanguageSupported(VARIABLE_REGISTRY)
.defaultValue("5 sec")
.build();
static final PropertyDescriptor ACK_WAIT_TIME = new Builder()
.name("ack.wait.time")
.displayName("Acknowledgment Wait Time")
.description("After sending a message to Kafka, this indicates the amount of time that we are willing to wait for a response from Kafka. "
+ "If Kafka does not acknowledge the message within this time period, the FlowFile will be routed to 'failure'.")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.expressionLanguageSupported(NONE)
.required(true)
.defaultValue("5 secs")
.build();
static final PropertyDescriptor MAX_REQUEST_SIZE = new Builder()
.name("max.request.size")
.displayName("Max Request Size")
.description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).")
.required(true)
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
.defaultValue("1 MB")
.build();
static final PropertyDescriptor PARTITION_CLASS = new Builder()
.name("partitioner.class")
.displayName("Partitioner class")
.description("Specifies which class to use to compute a partition id for a message. Corresponds to Kafka's 'partitioner.class' property.")
.allowableValues(ROUND_ROBIN_PARTITIONING, RANDOM_PARTITIONING, RECORD_PATH_PARTITIONING, EXPRESSION_LANGUAGE_PARTITIONING)
.defaultValue(RANDOM_PARTITIONING.getValue())
.required(false)
.build();
static final PropertyDescriptor PARTITION = new Builder()
.name("partition")
.displayName("Partition")
.description("Specifies which Partition Records will go to. How this value is interpreted is dictated by the <Partitioner class> property.")
.required(false)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
.build();
static final PropertyDescriptor COMPRESSION_CODEC = new Builder()
.name("compression.type")
.displayName("Compression Type")
.description("This parameter allows you to specify the compression codec for all data generated by this producer.")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.allowableValues("none", "gzip", "snappy", "lz4")
.defaultValue("none")
.build();
static final PropertyDescriptor ATTRIBUTE_NAME_REGEX = new Builder()
.name("attribute-name-regex")
.displayName("Attributes to Send as Headers (Regex)")
.description("A Regular Expression that is matched against all FlowFile attribute names. "
+ "Any attribute whose name matches the regex will be added to the Kafka messages as a Header. "
+ "If not specified, no FlowFile attributes will be added as headers.")
.addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
.expressionLanguageSupported(NONE)
.required(false)
.build();
static final PropertyDescriptor USE_TRANSACTIONS = new Builder()
.name("use-transactions")
.displayName("Use Transactions")
.description("Specifies whether or not NiFi should provide Transactional guarantees when communicating with Kafka. If there is a problem sending data to Kafka, "
+ "and this property is set to false, then the messages that have already been sent to Kafka will continue on and be delivered to consumers. "
+ "If this is set to true, then the Kafka transaction will be rolled back so that those messages are not available to consumers. Setting this to true "
+ "requires that the <Delivery Guarantee> property be set to \"Guarantee Replicated Delivery.\"")
.expressionLanguageSupported(NONE)
.allowableValues("true", "false")
.defaultValue("true")
.required(true)
.build();
static final PropertyDescriptor TRANSACTIONAL_ID_PREFIX = new Builder()
.name("transactional-id-prefix")
.displayName("Transactional Id Prefix")
.description("When Use Transaction is set to true, KafkaProducer config 'transactional.id' will be a generated UUID and will be prefixed with this string.")
.expressionLanguageSupported(VARIABLE_REGISTRY)
.addValidator(StandardValidators.NON_EMPTY_EL_VALIDATOR)
.required(false)
.build();
static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new Builder()
.name("message-header-encoding")
.displayName("Message Header Encoding")
.description("For any attribute that is added as a message header, as configured via the <Attributes to Send as Headers> property, "
+ "this property indicates the Character Encoding to use for serializing the headers.")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.defaultValue("UTF-8")
.required(false)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("FlowFiles for which all content was sent to Kafka.")
.build();
static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("Any FlowFile that cannot be sent to Kafka will be routed to this Relationship")
.build();
private static final List<PropertyDescriptor> PROPERTIES;
private static final Set<Relationship> RELATIONSHIPS;
private volatile PublisherPool publisherPool = null;
private final RecordPathCache recordPathCache = new RecordPathCache(25);
static {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(BOOTSTRAP_SERVERS);
properties.add(TOPIC);
properties.add(RECORD_READER);
properties.add(RECORD_WRITER);
properties.add(USE_TRANSACTIONS);
properties.add(FAILURE_STRATEGY);
properties.add(TRANSACTIONAL_ID_PREFIX);
properties.add(DELIVERY_GUARANTEE);
properties.add(ATTRIBUTE_NAME_REGEX);
properties.add(MESSAGE_HEADER_ENCODING);
properties.add(SECURITY_PROTOCOL);
properties.add(SASL_MECHANISM);
properties.add(KERBEROS_CREDENTIALS_SERVICE);
properties.add(KERBEROS_SERVICE_NAME);
properties.add(KERBEROS_PRINCIPAL);
properties.add(KERBEROS_KEYTAB);
properties.add(SASL_USERNAME);
properties.add(SASL_PASSWORD);
properties.add(TOKEN_AUTHENTICATION);
properties.add(AWS_PROFILE_NAME);
properties.add(SSL_CONTEXT_SERVICE);
properties.add(MESSAGE_KEY_FIELD);
properties.add(MAX_REQUEST_SIZE);
properties.add(ACK_WAIT_TIME);
properties.add(METADATA_WAIT_TIME);
properties.add(PARTITION_CLASS);
properties.add(PARTITION);
properties.add(COMPRESSION_CODEC);
PROPERTIES = Collections.unmodifiableList(properties);
final Set<Relationship> relationships = new HashSet<>();
relationships.add(REL_SUCCESS);
relationships.add(REL_FAILURE);
RELATIONSHIPS = Collections.unmodifiableSet(relationships);
}
@Override
public Set<Relationship> getRelationships() {
return RELATIONSHIPS;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return PROPERTIES;
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
return new Builder()
.description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
.name(propertyDescriptorName)
.addValidator(new DynamicPropertyValidator(ProducerConfig.class))
.dynamic(true)
.expressionLanguageSupported(VARIABLE_REGISTRY)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
final List<ValidationResult> results = new ArrayList<>(new KafkaClientCustomValidationFunction().apply(validationContext));
final boolean useTransactions = validationContext.getProperty(USE_TRANSACTIONS).asBoolean();
if (useTransactions) {
final String deliveryGuarantee = validationContext.getProperty(DELIVERY_GUARANTEE).getValue();
if (!DELIVERY_REPLICATED.getValue().equals(deliveryGuarantee)) {
results.add(new ValidationResult.Builder()
.subject("Delivery Guarantee")
.valid(false)
.explanation("In order to use Transactions, the Delivery Guarantee must be \"Guarantee Replicated Delivery.\" "
+ "Either change the <Use Transactions> property or the <Delivery Guarantee> property.")
.build());
}
}
final String partitionClass = validationContext.getProperty(PARTITION_CLASS).getValue();
if (RECORD_PATH_PARTITIONING.getValue().equals(partitionClass)) {
final String rawRecordPath = validationContext.getProperty(PARTITION).getValue();
if (rawRecordPath == null) {
results.add(new ValidationResult.Builder()
.subject("Partition")
.valid(false)
.explanation("The <Partition> property must be specified if using the RecordPath Partitioning class")
.build());
} else if (!validationContext.isExpressionLanguagePresent(rawRecordPath)) {
final ValidationResult result = new RecordPathValidator().validate(PARTITION.getDisplayName(), rawRecordPath, validationContext);
if (result != null) {
results.add(result);
}
}
} else if (EXPRESSION_LANGUAGE_PARTITIONING.getValue().equals(partitionClass)) {
final String rawRecordPath = validationContext.getProperty(PARTITION).getValue();
if (rawRecordPath == null) {
results.add(new ValidationResult.Builder()
.subject("Partition")
.valid(false)
.explanation("The <Partition> property must be specified if using the Expression Language Partitioning class")
.build());
}
}
return results;
}
private synchronized PublisherPool getPublisherPool(final ProcessContext context) {
PublisherPool pool = publisherPool;
if (pool != null) {
return pool;
}
return publisherPool = createPublisherPool(context);
}
protected PublisherPool createPublisherPool(final ProcessContext context) {
final int maxMessageSize = context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue();
final long maxAckWaitMillis = context.getProperty(ACK_WAIT_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
final String attributeNameRegex = context.getProperty(ATTRIBUTE_NAME_REGEX).getValue();
final Pattern attributeNamePattern = attributeNameRegex == null ? null : Pattern.compile(attributeNameRegex);
final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
final String transactionalIdPrefix = context.getProperty(TRANSACTIONAL_ID_PREFIX).evaluateAttributeExpressions().getValue();
Supplier<String> transactionalIdSupplier = new TransactionIdSupplier(transactionalIdPrefix);
final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
final Charset charset = Charset.forName(charsetName);
final KafkaPropertyProvider propertyProvider = new StandardKafkaPropertyProvider(ProducerConfig.class);
final Map<String, Object> kafkaProperties = propertyProvider.getProperties(context);
kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
kafkaProperties.put("max.request.size", String.valueOf(maxMessageSize));
return new PublisherPool(kafkaProperties, getLogger(), maxMessageSize, maxAckWaitMillis, useTransactions, transactionalIdSupplier, attributeNamePattern, charset);
}
@OnStopped
public void closePool() {
if (publisherPool != null) {
publisherPool.close();
}
publisherPool = null;
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
if (flowFiles.isEmpty()) {
return;
}
final PublisherPool pool = getPublisherPool(context);
if (pool == null) {
context.yield();
return;
}
final String securityProtocol = context.getProperty(SECURITY_PROTOCOL).getValue();
final String bootstrapServers = context.getProperty(BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
final PublishFailureStrategy failureStrategy = getFailureStrategy(context);
final long startTime = System.nanoTime();
try (final PublisherLease lease = pool.obtainPublisher()) {
try {
if (useTransactions) {
lease.beginTransaction();
}
// Send each FlowFile to Kafka asynchronously.
final Iterator<FlowFile> itr = flowFiles.iterator();
while (itr.hasNext()) {
final FlowFile flowFile = itr.next();
if (!isScheduled()) {
// If stopped, re-queue FlowFile instead of sending it
if (useTransactions) {
session.rollback();
lease.rollback();
return;
}
session.transfer(flowFile);
itr.remove();
continue;
}
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
final Function<Record, Integer> partitioner = getPartitioner(context, flowFile);
try {
session.read(flowFile, in -> {
try {
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger());
final RecordSet recordSet = reader.createRecordSet();
final RecordSchema schema = writerFactory.getSchema(flowFile.getAttributes(), recordSet.getSchema());
lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic, partitioner);
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException(e);
}
});
} catch (final Exception e) {
// The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
lease.fail(flowFile, e);
}
}
// Complete the send
final PublishResult publishResult = lease.complete();
if (publishResult.isFailure()) {
getLogger().info("Failed to send FlowFile to kafka; transferring to specified failure strategy");
failureStrategy.routeFlowFiles(session, flowFiles);
return;
}
// Transfer any successful FlowFiles.
final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
for (FlowFile success : flowFiles) {
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
final int msgCount = publishResult.getSuccessfulMessageCount(success);
success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
session.adjustCounter("Messages Sent", msgCount, true);
final String transitUri = StandardTransitUriProvider.getTransitUri(securityProtocol, bootstrapServers, topic);
session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
session.transfer(success, REL_SUCCESS);
}
} catch (final ProducerFencedException | OutOfOrderSequenceException | AuthorizationException e) {
lease.poison();
getLogger().error("Failed to send messages to Kafka; will yield Processor and transfer FlowFiles to specified failure strategy");
failureStrategy.routeFlowFiles(session, flowFiles);
context.yield();
}
}
}
private Function<Record, Integer> getPartitioner(final ProcessContext context, final FlowFile flowFile) {
final String partitionClass = context.getProperty(PARTITION_CLASS).getValue();
if (RECORD_PATH_PARTITIONING.getValue().equals(partitionClass)) {
final String recordPath = context.getProperty(PARTITION).evaluateAttributeExpressions(flowFile).getValue();
final RecordPath compiled = recordPathCache.getCompiled(recordPath);
return record -> evaluateRecordPath(compiled, record);
} else if (EXPRESSION_LANGUAGE_PARTITIONING.getValue().equals(partitionClass)) {
final String partition = context.getProperty(PARTITION).evaluateAttributeExpressions(flowFile).getValue();
final int hash = Objects.hashCode(partition);
return (record) -> hash;
}
return null;
}
private Integer evaluateRecordPath(final RecordPath recordPath, final Record record) {
final RecordPathResult result = recordPath.evaluate(record);
final LongAccumulator accumulator = new LongAccumulator(Long::sum, 0);
result.getSelectedFields().forEach(fieldValue -> {
final Object value = fieldValue.getValue();
final long hash = Objects.hashCode(value);
accumulator.accumulate(hash);
});
return accumulator.intValue();
}
private PublishFailureStrategy getFailureStrategy(final ProcessContext context) {
final String strategy = context.getProperty(FAILURE_STRATEGY).getValue();
if (FailureStrategy.ROLLBACK.getValue().equals(strategy)) {
return (session, flowFiles) -> session.rollback();
} else {
return (session, flowFiles) -> session.transfer(flowFiles, REL_FAILURE);
}
}
}

View File

@ -1,548 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.errors.AuthorizationException;
import org.apache.kafka.common.errors.OutOfOrderSequenceException;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnStopped;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyDescriptor.Builder;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.kafka.shared.attribute.KafkaFlowFileAttribute;
import org.apache.nifi.kafka.shared.attribute.StandardTransitUriProvider;
import org.apache.nifi.kafka.shared.component.KafkaPublishComponent;
import org.apache.nifi.kafka.shared.property.FailureStrategy;
import org.apache.nifi.kafka.shared.property.KeyEncoding;
import org.apache.nifi.kafka.shared.property.provider.KafkaPropertyProvider;
import org.apache.nifi.kafka.shared.property.provider.StandardKafkaPropertyProvider;
import org.apache.nifi.kafka.shared.transaction.TransactionIdSupplier;
import org.apache.nifi.kafka.shared.validation.DynamicPropertyValidator;
import org.apache.nifi.kafka.shared.validation.KafkaClientCustomValidationFunction;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.DataUnit;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.FlowFileFilters;
import org.apache.nifi.processor.util.StandardValidators;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import static org.apache.nifi.expression.ExpressionLanguageScope.FLOWFILE_ATTRIBUTES;
import static org.apache.nifi.expression.ExpressionLanguageScope.NONE;
import static org.apache.nifi.expression.ExpressionLanguageScope.VARIABLE_REGISTRY;
@Tags({"Apache", "Kafka", "Put", "Send", "Message", "PubSub", "2.0"})
@CapabilityDescription("Sends the contents of a FlowFile as a message to Apache Kafka using the Kafka 2.0 Producer API."
+ "The messages to send may be individual FlowFiles or may be delimited, using a "
+ "user-specified delimiter, such as a new-line. "
+ "The complementary NiFi processor for fetching messages is ConsumeKafka_2_0.")
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ",
expressionLanguageScope = VARIABLE_REGISTRY)
@WritesAttribute(attribute = "msg.count", description = "The number of messages that were sent to Kafka for this FlowFile. This attribute is added only to "
+ "FlowFiles that are routed to success. If the <Message Demarcator> Property is not set, this will always be 1, but if the Property is set, it may "
+ "be greater than 1.")
public class PublishKafka_2_0 extends AbstractProcessor implements KafkaPublishComponent {
protected static final String MSG_COUNT = "msg.count";
static final AllowableValue DELIVERY_REPLICATED = new AllowableValue("all", "Guarantee Replicated Delivery",
"FlowFile will be routed to failure unless the message is replicated to the appropriate "
+ "number of Kafka Nodes according to the Topic configuration");
static final AllowableValue DELIVERY_ONE_NODE = new AllowableValue("1", "Guarantee Single Node Delivery",
"FlowFile will be routed to success if the message is received by a single Kafka node, "
+ "whether or not it is replicated. This is faster than <Guarantee Replicated Delivery> "
+ "but can result in data loss if a Kafka node crashes");
static final AllowableValue DELIVERY_BEST_EFFORT = new AllowableValue("0", "Best Effort",
"FlowFile will be routed to success after successfully sending the content to a Kafka node, "
+ "without waiting for any acknowledgment from the node at all. This provides the best performance but may result in data loss.");
static final AllowableValue ROUND_ROBIN_PARTITIONING = new AllowableValue(Partitioners.RoundRobinPartitioner.class.getName(),
Partitioners.RoundRobinPartitioner.class.getSimpleName(),
"Messages will be assigned partitions in a round-robin fashion, sending the first message to Partition 1, "
+ "the next Partition to Partition 2, and so on, wrapping as necessary.");
static final AllowableValue RANDOM_PARTITIONING = new AllowableValue("org.apache.kafka.clients.producer.internals.DefaultPartitioner",
"DefaultPartitioner", "Messages will be assigned to random partitions.");
static final AllowableValue EXPRESSION_LANGUAGE_PARTITIONING = new AllowableValue(Partitioners.ExpressionLanguagePartitioner.class.getName(), "Expression Language Partitioner",
"Interprets the <Partition> property as Expression Language that will be evaluated against each FlowFile. This Expression will be evaluated once against the FlowFile, " +
"so all Records in a given FlowFile will go to the same partition.");
static final PropertyDescriptor TOPIC = new Builder()
.name("topic")
.displayName("Topic Name")
.description("The name of the Kafka Topic to publish to.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
.build();
static final PropertyDescriptor DELIVERY_GUARANTEE = new Builder()
.name(ProducerConfig.ACKS_CONFIG)
.displayName("Delivery Guarantee")
.description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.")
.required(true)
.expressionLanguageSupported(NONE)
.allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED)
.defaultValue(DELIVERY_REPLICATED.getValue())
.build();
static final PropertyDescriptor METADATA_WAIT_TIME = new Builder()
.name(ProducerConfig.MAX_BLOCK_MS_CONFIG)
.displayName("Max Metadata Wait Time")
.description("The amount of time publisher will wait to obtain metadata or wait for the buffer to flush during the 'send' call before failing the "
+ "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property")
.required(true)
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.expressionLanguageSupported(VARIABLE_REGISTRY)
.defaultValue("5 sec")
.build();
static final PropertyDescriptor ACK_WAIT_TIME = new Builder()
.name("ack.wait.time")
.displayName("Acknowledgment Wait Time")
.description("After sending a message to Kafka, this indicates the amount of time that we are willing to wait for a response from Kafka. "
+ "If Kafka does not acknowledge the message within this time period, the FlowFile will be routed to 'failure'.")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.expressionLanguageSupported(NONE)
.required(true)
.defaultValue("5 secs")
.build();
static final PropertyDescriptor MAX_REQUEST_SIZE = new Builder()
.name("max.request.size")
.displayName("Max Request Size")
.description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).")
.required(true)
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
.defaultValue("1 MB")
.build();
static final PropertyDescriptor KEY = new Builder()
.name("kafka-key")
.displayName("Kafka Key")
.description("The Key to use for the Message. "
+ "If not specified, the flow file attribute 'kafka.key' is used as the message key, if it is present."
+ "Beware that setting Kafka key and demarcating at the same time may potentially lead to many Kafka messages with the same key."
+ "Normally this is not a problem as Kafka does not enforce or assume message and key uniqueness. Still, setting the demarcator and Kafka key at the same time poses a risk of "
+ "data loss on Kafka. During a topic compaction on Kafka, messages will be deduplicated based on this key.")
.required(false)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
.build();
static final PropertyDescriptor KEY_ATTRIBUTE_ENCODING = new Builder()
.name("key-attribute-encoding")
.displayName("Key Attribute Encoding")
.description("FlowFiles that are emitted have an attribute named '" + KafkaFlowFileAttribute.KAFKA_KEY + "'. This property dictates how the value of the attribute should be encoded.")
.required(true)
.defaultValue(KeyEncoding.UTF8.getValue())
.allowableValues(KeyEncoding.class)
.build();
static final PropertyDescriptor MESSAGE_DEMARCATOR = new Builder()
.name("message-demarcator")
.displayName("Message Demarcator")
.required(false)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
.description("Specifies the string (interpreted as UTF-8) to use for demarcating multiple messages within "
+ "a single FlowFile. If not specified, the entire content of the FlowFile will be used as a single message. If specified, the "
+ "contents of the FlowFile will be split on this delimiter and each section sent as a separate Kafka message. "
+ "To enter special character such as 'new line' use CTRL+Enter or Shift+Enter, depending on your OS.")
.build();
static final PropertyDescriptor PARTITION_CLASS = new Builder()
.name(ProducerConfig.PARTITIONER_CLASS_CONFIG)
.displayName("Partitioner class")
.description("Specifies which class to use to compute a partition id for a message. Corresponds to Kafka's 'partitioner.class' property.")
.allowableValues(ROUND_ROBIN_PARTITIONING, RANDOM_PARTITIONING, EXPRESSION_LANGUAGE_PARTITIONING)
.defaultValue(RANDOM_PARTITIONING.getValue())
.required(false)
.build();
static final PropertyDescriptor PARTITION = new Builder()
.name("partition")
.displayName("Partition")
.description("Specifies which Partition Records will go to.")
.required(false)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(FLOWFILE_ATTRIBUTES)
.build();
static final PropertyDescriptor COMPRESSION_CODEC = new Builder()
.name(ProducerConfig.COMPRESSION_TYPE_CONFIG)
.displayName("Compression Type")
.description("This parameter allows you to specify the compression codec for all data generated by this producer.")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.allowableValues("none", "gzip", "snappy", "lz4")
.defaultValue("none")
.build();
static final PropertyDescriptor ATTRIBUTE_NAME_REGEX = new Builder()
.name("attribute-name-regex")
.displayName("Attributes to Send as Headers (Regex)")
.description("A Regular Expression that is matched against all FlowFile attribute names. "
+ "Any attribute whose name matches the regex will be added to the Kafka messages as a Header. "
+ "If not specified, no FlowFile attributes will be added as headers.")
.addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
.expressionLanguageSupported(NONE)
.required(false)
.build();
static final PropertyDescriptor USE_TRANSACTIONS = new Builder()
.name("use-transactions")
.displayName("Use Transactions")
.description("Specifies whether or not NiFi should provide Transactional guarantees when communicating with Kafka. If there is a problem sending data to Kafka, "
+ "and this property is set to false, then the messages that have already been sent to Kafka will continue on and be delivered to consumers. "
+ "If this is set to true, then the Kafka transaction will be rolled back so that those messages are not available to consumers. Setting this to true "
+ "requires that the <Delivery Guarantee> property be set to \"Guarantee Replicated Delivery.\"")
.expressionLanguageSupported(NONE)
.allowableValues("true", "false")
.defaultValue("true")
.required(true)
.build();
static final PropertyDescriptor TRANSACTIONAL_ID_PREFIX = new Builder()
.name("transactional-id-prefix")
.displayName("Transactional Id Prefix")
.description("When Use Transaction is set to true, KafkaProducer config 'transactional.id' will be a generated UUID and will be prefixed with this string.")
.expressionLanguageSupported(VARIABLE_REGISTRY)
.addValidator(StandardValidators.NON_EMPTY_EL_VALIDATOR)
.required(false)
.build();
static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new Builder()
.name("message-header-encoding")
.displayName("Message Header Encoding")
.description("For any attribute that is added as a message header, as configured via the <Attributes to Send as Headers> property, "
+ "this property indicates the Character Encoding to use for serializing the headers.")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.defaultValue("UTF-8")
.required(false)
.build();
static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("FlowFiles for which all content was sent to Kafka.")
.build();
static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description("Any FlowFile that cannot be sent to Kafka will be routed to this Relationship")
.build();
private static final List<PropertyDescriptor> PROPERTIES;
private static final Set<Relationship> RELATIONSHIPS;
private volatile PublisherPool publisherPool = null;
static {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(BOOTSTRAP_SERVERS);
properties.add(SECURITY_PROTOCOL);
properties.add(SASL_MECHANISM);
properties.add(KERBEROS_SERVICE_NAME);
properties.add(KERBEROS_CREDENTIALS_SERVICE);
properties.add(KERBEROS_PRINCIPAL);
properties.add(KERBEROS_KEYTAB);
properties.add(SASL_USERNAME);
properties.add(SASL_PASSWORD);
properties.add(TOKEN_AUTHENTICATION);
properties.add(AWS_PROFILE_NAME);
properties.add(SSL_CONTEXT_SERVICE);
properties.add(TOPIC);
properties.add(DELIVERY_GUARANTEE);
properties.add(FAILURE_STRATEGY);
properties.add(USE_TRANSACTIONS);
properties.add(TRANSACTIONAL_ID_PREFIX);
properties.add(ATTRIBUTE_NAME_REGEX);
properties.add(MESSAGE_HEADER_ENCODING);
properties.add(KEY);
properties.add(KEY_ATTRIBUTE_ENCODING);
properties.add(MESSAGE_DEMARCATOR);
properties.add(MAX_REQUEST_SIZE);
properties.add(ACK_WAIT_TIME);
properties.add(METADATA_WAIT_TIME);
properties.add(PARTITION_CLASS);
properties.add(PARTITION);
properties.add(COMPRESSION_CODEC);
PROPERTIES = Collections.unmodifiableList(properties);
final Set<Relationship> relationships = new HashSet<>();
relationships.add(REL_SUCCESS);
relationships.add(REL_FAILURE);
RELATIONSHIPS = Collections.unmodifiableSet(relationships);
}
@Override
public Set<Relationship> getRelationships() {
return RELATIONSHIPS;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return PROPERTIES;
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
return new Builder()
.description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
.name(propertyDescriptorName)
.addValidator(new DynamicPropertyValidator(ProducerConfig.class))
.dynamic(true)
.expressionLanguageSupported(VARIABLE_REGISTRY)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
final List<ValidationResult> results = new ArrayList<>(new KafkaClientCustomValidationFunction().apply(validationContext));
final boolean useTransactions = validationContext.getProperty(USE_TRANSACTIONS).asBoolean();
if (useTransactions) {
final String deliveryGuarantee = validationContext.getProperty(DELIVERY_GUARANTEE).getValue();
if (!DELIVERY_REPLICATED.getValue().equals(deliveryGuarantee)) {
results.add(new ValidationResult.Builder()
.subject("Delivery Guarantee")
.valid(false)
.explanation("In order to use Transactions, the Delivery Guarantee must be \"Guarantee Replicated Delivery.\" "
+ "Either change the <Use Transactions> property or the <Delivery Guarantee> property.")
.build());
}
}
final String partitionClass = validationContext.getProperty(PARTITION_CLASS).getValue();
if (EXPRESSION_LANGUAGE_PARTITIONING.getValue().equals(partitionClass)) {
final String rawRecordPath = validationContext.getProperty(PARTITION).getValue();
if (rawRecordPath == null) {
results.add(new ValidationResult.Builder()
.subject("Partition")
.valid(false)
.explanation("The <Partition> property must be specified if using the Expression Language Partitioning class")
.build());
}
}
return results;
}
private synchronized PublisherPool getPublisherPool(final ProcessContext context) {
PublisherPool pool = publisherPool;
if (pool != null) {
return pool;
}
return publisherPool = createPublisherPool(context);
}
protected PublisherPool createPublisherPool(final ProcessContext context) {
final int maxMessageSize = context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue();
final long maxAckWaitMillis = context.getProperty(ACK_WAIT_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
final String attributeNameRegex = context.getProperty(ATTRIBUTE_NAME_REGEX).getValue();
final Pattern attributeNamePattern = attributeNameRegex == null ? null : Pattern.compile(attributeNameRegex);
final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
final String transactionalIdPrefix = context.getProperty(TRANSACTIONAL_ID_PREFIX).evaluateAttributeExpressions().getValue();
Supplier<String> transactionalIdSupplier = new TransactionIdSupplier(transactionalIdPrefix);
final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
final Charset charset = Charset.forName(charsetName);
final KafkaPropertyProvider propertyProvider = new StandardKafkaPropertyProvider(ProducerConfig.class);
final Map<String, Object> kafkaProperties = propertyProvider.getProperties(context);
kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
kafkaProperties.put("max.request.size", String.valueOf(maxMessageSize));
return new PublisherPool(kafkaProperties, getLogger(), maxMessageSize, maxAckWaitMillis, useTransactions, transactionalIdSupplier, attributeNamePattern, charset);
}
@OnStopped
public void closePool() {
if (publisherPool != null) {
publisherPool.close();
}
publisherPool = null;
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final boolean useDemarcator = context.getProperty(MESSAGE_DEMARCATOR).isSet();
final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(250, DataUnit.KB, 500));
if (flowFiles.isEmpty()) {
return;
}
final PublisherPool pool = getPublisherPool(context);
if (pool == null) {
context.yield();
return;
}
final String securityProtocol = context.getProperty(SECURITY_PROTOCOL).getValue();
final String bootstrapServers = context.getProperty(BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
final PublishFailureStrategy failureStrategy = getFailureStrategy(context);
final long startTime = System.nanoTime();
try (final PublisherLease lease = pool.obtainPublisher()) {
try {
if (useTransactions) {
lease.beginTransaction();
}
// Send each FlowFile to Kafka asynchronously.
for (final FlowFile flowFile : flowFiles) {
if (!isScheduled()) {
// If stopped, re-queue FlowFile instead of sending it
if (useTransactions) {
session.rollback();
lease.rollback();
return;
}
session.transfer(flowFile);
continue;
}
final byte[] messageKey = getMessageKey(flowFile, context);
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
final byte[] demarcatorBytes;
if (useDemarcator) {
demarcatorBytes = context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions(flowFile).getValue().getBytes(StandardCharsets.UTF_8);
} else {
demarcatorBytes = null;
}
final Integer partition = getPartition(context, flowFile);
session.read(flowFile, rawIn -> {
try (final InputStream in = new BufferedInputStream(rawIn)) {
lease.publish(flowFile, in, messageKey, demarcatorBytes, topic, partition);
}
});
}
// Complete the send
final PublishResult publishResult = lease.complete();
if (publishResult.isFailure()) {
getLogger().info("Failed to send FlowFile to kafka; transferring to specified failure strategy");
failureStrategy.routeFlowFiles(session, flowFiles);
return;
}
// Transfer any successful FlowFiles.
final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
for (FlowFile success : flowFiles) {
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
final int msgCount = publishResult.getSuccessfulMessageCount(success);
success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
session.adjustCounter("Messages Sent", msgCount, true);
final String transitUri = StandardTransitUriProvider.getTransitUri(securityProtocol, bootstrapServers, topic);
session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
session.transfer(success, REL_SUCCESS);
}
} catch (final ProducerFencedException | OutOfOrderSequenceException | AuthorizationException e) {
lease.poison();
getLogger().error("Failed to send messages to Kafka; will yield Processor and transfer FlowFiles to specified failure strategy");
failureStrategy.routeFlowFiles(session, flowFiles);
context.yield();
}
}
}
private PublishFailureStrategy getFailureStrategy(final ProcessContext context) {
final String strategy = context.getProperty(FAILURE_STRATEGY).getValue();
if (FailureStrategy.ROLLBACK.getValue().equals(strategy)) {
return (session, flowFiles) -> session.rollback();
} else {
return (session, flowFiles) -> session.transfer(flowFiles, REL_FAILURE);
}
}
private byte[] getMessageKey(final FlowFile flowFile, final ProcessContext context) {
final String uninterpretedKey;
if (context.getProperty(KEY).isSet()) {
uninterpretedKey = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue();
} else {
uninterpretedKey = flowFile.getAttribute(KafkaFlowFileAttribute.KAFKA_KEY);
}
if (uninterpretedKey == null) {
return null;
}
final String keyEncoding = context.getProperty(KEY_ATTRIBUTE_ENCODING).getValue();
if (KeyEncoding.UTF8.getValue().equals(keyEncoding)) {
return uninterpretedKey.getBytes(StandardCharsets.UTF_8);
}
try {
return Hex.decodeHex(uninterpretedKey);
} catch (final DecoderException e) {
throw new RuntimeException("Hexadecimal decoding failed", e);
}
}
private Integer getPartition(final ProcessContext context, final FlowFile flowFile) {
final String partitionClass = context.getProperty(PARTITION_CLASS).getValue();
if (EXPRESSION_LANGUAGE_PARTITIONING.getValue().equals(partitionClass)) {
final String partition = context.getProperty(PARTITION).evaluateAttributeExpressions(flowFile).getValue();
final int hash = Objects.hashCode(partition);
return hash;
}
return null;
}
}

View File

@ -1,46 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.nifi.flowfile.FlowFile;
public interface PublishResult {
boolean isFailure();
int getSuccessfulMessageCount(FlowFile flowFile);
Exception getReasonForFailure(FlowFile flowFile);
public static PublishResult EMPTY = new PublishResult() {
@Override
public boolean isFailure() {
return false;
}
@Override
public int getSuccessfulMessageCount(FlowFile flowFile) {
return 0;
}
@Override
public Exception getReasonForFailure(FlowFile flowFile) {
return null;
}
};
}

View File

@ -1,311 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.header.Headers;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.serialization.RecordSetWriter;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.WriteResult;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.RecordSet;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.stream.io.exception.TokenTooLargeException;
import org.apache.nifi.stream.io.util.StreamDemarcator;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import java.util.regex.Pattern;
public class PublisherLease implements Closeable {
private final ComponentLog logger;
private final Producer<byte[], byte[]> producer;
private final int maxMessageSize;
private final long maxAckWaitMillis;
private final boolean useTransactions;
private final Pattern attributeNameRegex;
private final Charset headerCharacterSet;
private volatile boolean poisoned = false;
private final AtomicLong messagesSent = new AtomicLong(0L);
private volatile boolean transactionsInitialized = false;
private volatile boolean activeTransaction = false;
private InFlightMessageTracker tracker;
public PublisherLease(final Producer<byte[], byte[]> producer, final int maxMessageSize, final long maxAckWaitMillis, final ComponentLog logger,
final boolean useTransactions, final Pattern attributeNameRegex, final Charset headerCharacterSet) {
this.producer = producer;
this.maxMessageSize = maxMessageSize;
this.logger = logger;
this.maxAckWaitMillis = maxAckWaitMillis;
this.useTransactions = useTransactions;
this.attributeNameRegex = attributeNameRegex;
this.headerCharacterSet = headerCharacterSet;
}
protected void poison() {
this.poisoned = true;
}
public boolean isPoisoned() {
return poisoned;
}
void beginTransaction() {
if (!useTransactions) {
return;
}
try {
if (!transactionsInitialized) {
producer.initTransactions();
transactionsInitialized = true;
}
producer.beginTransaction();
activeTransaction = true;
} catch (final Exception e) {
poison();
throw e;
}
}
void rollback() {
if (!useTransactions || !activeTransaction) {
return;
}
try {
producer.abortTransaction();
} catch (final Exception e) {
poison();
throw e;
}
activeTransaction = false;
}
void fail(final FlowFile flowFile, final Exception cause) {
getTracker().fail(flowFile, cause);
rollback();
}
void publish(final FlowFile flowFile, final InputStream flowFileContent, final byte[] messageKey, final byte[] demarcatorBytes, final String topic, final Integer partition) throws IOException {
if (tracker == null) {
tracker = new InFlightMessageTracker(logger);
}
try {
byte[] messageContent;
if (demarcatorBytes == null || demarcatorBytes.length == 0) {
if (flowFile.getSize() > maxMessageSize) {
tracker.fail(flowFile, new TokenTooLargeException("A message in the stream exceeds the maximum allowed message size of " + maxMessageSize + " bytes."));
return;
}
// Send FlowFile content as it is, to support sending 0 byte message.
messageContent = new byte[(int) flowFile.getSize()];
StreamUtils.fillBuffer(flowFileContent, messageContent);
publish(flowFile, messageKey, messageContent, topic, tracker, partition);
return;
}
try (final StreamDemarcator demarcator = new StreamDemarcator(flowFileContent, demarcatorBytes, maxMessageSize)) {
while ((messageContent = demarcator.nextToken()) != null) {
publish(flowFile, messageKey, messageContent, topic, tracker, partition);
if (tracker.isFailed(flowFile)) {
// If we have a failure, don't try to send anything else.
return;
}
}
} catch (final TokenTooLargeException ttle) {
tracker.fail(flowFile, ttle);
}
} catch (final Exception e) {
tracker.fail(flowFile, e);
poison();
throw e;
}
}
void publish(final FlowFile flowFile, final RecordSet recordSet, final RecordSetWriterFactory writerFactory, final RecordSchema schema,
final String messageKeyField, final String topic, final Function<Record, Integer> partitioner) throws IOException {
if (tracker == null) {
tracker = new InFlightMessageTracker(logger);
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
Record record;
int recordCount = 0;
try {
while ((record = recordSet.next()) != null) {
recordCount++;
baos.reset();
Map<String, String> additionalAttributes = Collections.emptyMap();
try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos, flowFile)) {
final WriteResult writeResult = writer.write(record);
additionalAttributes = writeResult.getAttributes();
writer.flush();
}
final byte[] messageContent = baos.toByteArray();
final String key = messageKeyField == null ? null : record.getAsString(messageKeyField);
final byte[] messageKey = (key == null) ? null : key.getBytes(StandardCharsets.UTF_8);
final Integer partition = partitioner == null ? null : partitioner.apply(record);
publish(flowFile, additionalAttributes, messageKey, messageContent, topic, tracker, partition);
if (tracker.isFailed(flowFile)) {
// If we have a failure, don't try to send anything else.
return;
}
}
if (recordCount == 0) {
tracker.trackEmpty(flowFile);
}
} catch (final TokenTooLargeException ttle) {
tracker.fail(flowFile, ttle);
} catch (final SchemaNotFoundException snfe) {
throw new IOException(snfe);
} catch (final Exception e) {
tracker.fail(flowFile, e);
poison();
throw e;
}
}
private void addHeaders(final FlowFile flowFile, final Map<String, String> additionalAttributes, final ProducerRecord<?, ?> record) {
if (attributeNameRegex == null) {
return;
}
final Headers headers = record.headers();
for (final Map.Entry<String, String> entry : flowFile.getAttributes().entrySet()) {
if (attributeNameRegex.matcher(entry.getKey()).matches()) {
headers.add(entry.getKey(), entry.getValue().getBytes(headerCharacterSet));
}
}
for (final Map.Entry<String, String> entry : additionalAttributes.entrySet()) {
if (attributeNameRegex.matcher(entry.getKey()).matches()) {
headers.add(entry.getKey(), entry.getValue().getBytes(headerCharacterSet));
}
}
}
protected void publish(final FlowFile flowFile, final byte[] messageKey, final byte[] messageContent, final String topic, final InFlightMessageTracker tracker, final Integer partition) {
publish(flowFile, Collections.emptyMap(), messageKey, messageContent, topic, tracker, partition);
}
protected void publish(final FlowFile flowFile, final Map<String, String> additionalAttributes, final byte[] messageKey, final byte[] messageContent,
final String topic, final InFlightMessageTracker tracker, final Integer partition) {
final Integer moddedPartition = partition == null ? null : Math.abs(partition) % (producer.partitionsFor(topic).size());
final ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(topic, moddedPartition, messageKey, messageContent);
addHeaders(flowFile, additionalAttributes, record);
producer.send(record, new Callback() {
@Override
public void onCompletion(final RecordMetadata metadata, final Exception exception) {
if (exception == null) {
tracker.incrementAcknowledgedCount(flowFile);
} else {
tracker.fail(flowFile, exception);
poison();
}
}
});
messagesSent.incrementAndGet();
tracker.incrementSentCount(flowFile);
}
public PublishResult complete() {
if (tracker == null) {
if (messagesSent.get() == 0L) {
return PublishResult.EMPTY;
}
rollback();
throw new IllegalStateException("Cannot complete publishing to Kafka because Publisher Lease was already closed");
}
try {
producer.flush();
if (activeTransaction) {
producer.commitTransaction();
activeTransaction = false;
}
} catch (final Exception e) {
poison();
throw e;
}
try {
tracker.awaitCompletion(maxAckWaitMillis);
return tracker.createPublishResult();
} catch (final InterruptedException e) {
logger.warn("Interrupted while waiting for an acknowledgement from Kafka; some FlowFiles may be transferred to 'failure' even though they were received by Kafka");
Thread.currentThread().interrupt();
return tracker.failOutstanding(e);
} catch (final TimeoutException e) {
logger.warn("Timed out while waiting for an acknowledgement from Kafka; some FlowFiles may be transferred to 'failure' even though they were received by Kafka");
return tracker.failOutstanding(e);
} finally {
tracker = null;
}
}
@Override
public void close() {
producer.close(maxAckWaitMillis, TimeUnit.MILLISECONDS);
tracker = null;
}
public InFlightMessageTracker getTracker() {
if (tracker == null) {
tracker = new InFlightMessageTracker(logger);
}
return tracker;
}
}

View File

@ -1,116 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.nifi.logging.ComponentLog;
import java.io.Closeable;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.function.Supplier;
import java.util.regex.Pattern;
public class PublisherPool implements Closeable {
private final ComponentLog logger;
private final BlockingQueue<PublisherLease> publisherQueue;
private final Map<String, Object> kafkaProperties;
private final int maxMessageSize;
private final long maxAckWaitMillis;
private final boolean useTransactions;
private final Pattern attributeNameRegex;
private final Charset headerCharacterSet;
private Supplier<String> transactionalIdSupplier;
private volatile boolean closed = false;
PublisherPool(final Map<String, Object> kafkaProperties, final ComponentLog logger, final int maxMessageSize, final long maxAckWaitMillis,
final boolean useTransactions, final Supplier<String> transactionalIdSupplier, final Pattern attributeNameRegex, final Charset headerCharacterSet) {
this.logger = logger;
this.publisherQueue = new LinkedBlockingQueue<>();
this.kafkaProperties = kafkaProperties;
this.maxMessageSize = maxMessageSize;
this.maxAckWaitMillis = maxAckWaitMillis;
this.useTransactions = useTransactions;
this.attributeNameRegex = attributeNameRegex;
this.headerCharacterSet = headerCharacterSet;
this.transactionalIdSupplier = transactionalIdSupplier;
}
public PublisherLease obtainPublisher() {
if (isClosed()) {
throw new IllegalStateException("Connection Pool is closed");
}
PublisherLease lease = publisherQueue.poll();
if (lease != null) {
return lease;
}
lease = createLease();
return lease;
}
private PublisherLease createLease() {
final Map<String, Object> properties = new HashMap<>(kafkaProperties);
if (useTransactions) {
properties.put("transactional.id", transactionalIdSupplier.get());
}
final Producer<byte[], byte[]> producer = new KafkaProducer<>(properties);
final PublisherLease lease = new PublisherLease(producer, maxMessageSize, maxAckWaitMillis, logger, useTransactions, attributeNameRegex, headerCharacterSet) {
@Override
public void close() {
if (isPoisoned() || isClosed()) {
super.close();
} else {
publisherQueue.offer(this);
}
}
};
return lease;
}
public synchronized boolean isClosed() {
return closed;
}
@Override
public synchronized void close() {
closed = true;
PublisherLease lease;
while ((lease = publisherQueue.poll()) != null) {
lease.close();
}
}
/**
* Returns the number of leases that are currently available
*
* @return the number of leases currently available
*/
protected int available() {
return publisherQueue.size();
}
}

View File

@ -1,311 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.record.sink.kafka;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnDisabled;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.controller.ControllerServiceInitializationContext;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.kafka.shared.component.KafkaPublishComponent;
import org.apache.nifi.kafka.shared.property.provider.KafkaPropertyProvider;
import org.apache.nifi.kafka.shared.property.provider.StandardKafkaPropertyProvider;
import org.apache.nifi.kafka.shared.validation.DynamicPropertyValidator;
import org.apache.nifi.kafka.shared.validation.KafkaClientCustomValidationFunction;
import org.apache.nifi.processor.DataUnit;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.record.sink.RecordSinkService;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.serialization.RecordSetWriter;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.WriteResult;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.RecordSet;
import org.apache.nifi.stream.io.ByteCountingOutputStream;
import org.apache.nifi.stream.io.exception.TokenTooLargeException;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
@Tags({"kafka", "record", "sink"})
@CapabilityDescription("Provides a service to write records to a Kafka 2.x topic.")
@DynamicProperty(name = "The name of a Kafka configuration property.", value = "The value of a given Kafka configuration property.",
description = "These properties will be added on the Kafka configuration after loading any provided configuration properties."
+ " In the event a dynamic property represents a property that was already set, its value will be ignored and WARN message logged."
+ " For the list of available Kafka properties please refer to: http://kafka.apache.org/documentation.html#configuration. ",
expressionLanguageScope = ExpressionLanguageScope.VARIABLE_REGISTRY)
public class KafkaRecordSink_2_0 extends AbstractControllerService implements RecordSinkService, KafkaPublishComponent {
static final AllowableValue DELIVERY_REPLICATED = new AllowableValue("all", "Guarantee Replicated Delivery",
"Records are considered 'transmitted unsuccessfully' unless the message is replicated to the appropriate "
+ "number of Kafka Nodes according to the Topic configuration.");
static final AllowableValue DELIVERY_ONE_NODE = new AllowableValue("1", "Guarantee Single Node Delivery",
"Records are considered 'transmitted successfully' if the message is received by a single Kafka node, "
+ "whether or not it is replicated. This is faster than <Guarantee Replicated Delivery> "
+ "but can result in data loss if a Kafka node crashes.");
static final AllowableValue DELIVERY_BEST_EFFORT = new AllowableValue("0", "Best Effort",
"Records are considered 'transmitted successfully' after successfully writing the content to a Kafka node, "
+ "without waiting for a response. This provides the best performance but may result in data loss.");
static final PropertyDescriptor TOPIC = new PropertyDescriptor.Builder()
.name("topic")
.displayName("Topic Name")
.description("The name of the Kafka Topic to publish to.")
.required(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.build();
static final PropertyDescriptor DELIVERY_GUARANTEE = new PropertyDescriptor.Builder()
.name("acks")
.displayName("Delivery Guarantee")
.description("Specifies the requirement for guaranteeing that a message is sent to Kafka. Corresponds to Kafka's 'acks' property.")
.required(true)
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.allowableValues(DELIVERY_BEST_EFFORT, DELIVERY_ONE_NODE, DELIVERY_REPLICATED)
.defaultValue(DELIVERY_BEST_EFFORT.getValue())
.build();
static final PropertyDescriptor METADATA_WAIT_TIME = new PropertyDescriptor.Builder()
.name("max.block.ms")
.displayName("Max Metadata Wait Time")
.description("The amount of time publisher will wait to obtain metadata or wait for the buffer to flush during the 'send' call before failing the "
+ "entire 'send' call. Corresponds to Kafka's 'max.block.ms' property")
.required(true)
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.defaultValue("5 sec")
.build();
static final PropertyDescriptor ACK_WAIT_TIME = new PropertyDescriptor.Builder()
.name("ack.wait.time")
.displayName("Acknowledgment Wait Time")
.description("After sending a message to Kafka, this indicates the amount of time that we are willing to wait for a response from Kafka. "
+ "If Kafka does not acknowledge the message within this time period, the FlowFile will be routed to 'failure'.")
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
.required(true)
.defaultValue("5 secs")
.build();
static final PropertyDescriptor MAX_REQUEST_SIZE = new PropertyDescriptor.Builder()
.name("max.request.size")
.displayName("Max Request Size")
.description("The maximum size of a request in bytes. Corresponds to Kafka's 'max.request.size' property and defaults to 1 MB (1048576).")
.required(true)
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
.defaultValue("1 MB")
.build();
static final PropertyDescriptor COMPRESSION_CODEC = new PropertyDescriptor.Builder()
.name("compression.type")
.displayName("Compression Type")
.description("This parameter allows you to specify the compression codec for all data generated by this producer.")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.allowableValues("none", "gzip", "snappy", "lz4")
.defaultValue("none")
.build();
static final PropertyDescriptor MESSAGE_HEADER_ENCODING = new PropertyDescriptor.Builder()
.name("message-header-encoding")
.displayName("Message Header Encoding")
.description("For any attribute that is added as a message header, as configured via the <Attributes to Send as Headers> property, "
+ "this property indicates the Character Encoding to use for serializing the headers.")
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
.defaultValue("UTF-8")
.required(false)
.build();
private List<PropertyDescriptor> properties;
private volatile RecordSetWriterFactory writerFactory;
private volatile int maxMessageSize;
private volatile long maxAckWaitMillis;
private volatile String topic;
private volatile Producer<byte[], byte[]> producer;
@Override
protected void init(final ControllerServiceInitializationContext context) {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(BOOTSTRAP_SERVERS);
properties.add(TOPIC);
properties.add(RecordSinkService.RECORD_WRITER_FACTORY);
properties.add(DELIVERY_GUARANTEE);
properties.add(MESSAGE_HEADER_ENCODING);
properties.add(SECURITY_PROTOCOL);
properties.add(SASL_MECHANISM);
properties.add(KERBEROS_CREDENTIALS_SERVICE);
properties.add(KERBEROS_SERVICE_NAME);
properties.add(SSL_CONTEXT_SERVICE);
properties.add(MAX_REQUEST_SIZE);
properties.add(ACK_WAIT_TIME);
properties.add(METADATA_WAIT_TIME);
properties.add(COMPRESSION_CODEC);
this.properties = Collections.unmodifiableList(properties);
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return properties;
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) {
return new PropertyDescriptor.Builder()
.description("Specifies the value for '" + propertyDescriptorName + "' Kafka Configuration.")
.name(propertyDescriptorName)
.addValidator(new DynamicPropertyValidator(ProducerConfig.class))
.dynamic(true)
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
.build();
}
@Override
protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) {
return new KafkaClientCustomValidationFunction().apply(validationContext);
}
@OnEnabled
public void onEnabled(final ConfigurationContext context) throws InitializationException {
topic = context.getProperty(TOPIC).evaluateAttributeExpressions().getValue();
writerFactory = context.getProperty(RecordSinkService.RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
maxMessageSize = context.getProperty(MAX_REQUEST_SIZE).asDataSize(DataUnit.B).intValue();
maxAckWaitMillis = context.getProperty(ACK_WAIT_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
final KafkaPropertyProvider propertyProvider = new StandardKafkaPropertyProvider(ProducerConfig.class);
final Map<String, Object> kafkaProperties = propertyProvider.getProperties(context);
kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
kafkaProperties.put("max.request.size", String.valueOf(maxMessageSize));
try {
producer = createProducer(kafkaProperties);
} catch (Exception e) {
getLogger().error("Could not create Kafka producer due to {}", new Object[]{e.getMessage()}, e);
throw new InitializationException(e);
}
}
@Override
public WriteResult sendData(final RecordSet recordSet, final Map<String, String> attributes, final boolean sendZeroResults) throws IOException {
try {
WriteResult writeResult;
final RecordSchema writeSchema = getWriterFactory().getSchema(null, recordSet.getSchema());
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final ByteCountingOutputStream out = new ByteCountingOutputStream(baos);
int recordCount = 0;
try (final RecordSetWriter writer = getWriterFactory().createWriter(getLogger(), writeSchema, out, attributes)) {
writer.beginRecordSet();
Record record;
while ((record = recordSet.next()) != null) {
writer.write(record);
recordCount++;
if (out.getBytesWritten() > maxMessageSize) {
throw new TokenTooLargeException("The query's result set size exceeds the maximum allowed message size of " + maxMessageSize + " bytes.");
}
}
writeResult = writer.finishRecordSet();
if (out.getBytesWritten() > maxMessageSize) {
throw new TokenTooLargeException("The query's result set size exceeds the maximum allowed message size of " + maxMessageSize + " bytes.");
}
recordCount = writeResult.getRecordCount();
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.put("record.count", Integer.toString(recordCount));
attributes.putAll(writeResult.getAttributes());
}
if (recordCount > 0 || sendZeroResults) {
final ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(topic, null, null, baos.toByteArray());
try {
producer.send(record, (metadata, exception) -> {
if (exception != null) {
throw new KafkaSendException(exception);
}
}).get(maxAckWaitMillis, TimeUnit.MILLISECONDS);
} catch (KafkaSendException kse) {
Throwable t = kse.getCause();
if (t instanceof IOException) {
throw (IOException) t;
} else {
throw new IOException(t);
}
} catch (final InterruptedException e) {
getLogger().warn("Interrupted while waiting for an acknowledgement from Kafka");
Thread.currentThread().interrupt();
} catch (final TimeoutException e) {
getLogger().warn("Timed out while waiting for an acknowledgement from Kafka");
}
} else {
writeResult = WriteResult.EMPTY;
}
return writeResult;
} catch (IOException ioe) {
throw ioe;
} catch (Exception e) {
throw new IOException("Failed to write metrics using record writer: " + e.getMessage(), e);
}
}
@OnDisabled
public void stop() {
if (producer != null) {
producer.close(maxAckWaitMillis, TimeUnit.MILLISECONDS);
}
}
// this getter is intended explicitly for testing purposes
protected RecordSetWriterFactory getWriterFactory() {
return this.writerFactory;
}
protected Producer<byte[], byte[]> createProducer(Map<String, Object> kafkaProperties) {
return new KafkaProducer<>(kafkaProperties);
}
private static class KafkaSendException extends RuntimeException {
KafkaSendException(Throwable cause) {
super(cause);
}
}
}

View File

@ -1,15 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.record.sink.kafka.KafkaRecordSink_2_0

View File

@ -1,18 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.processors.kafka.pubsub.PublishKafka_2_0
org.apache.nifi.processors.kafka.pubsub.PublishKafkaRecord_2_0
org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_2_0
org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_2_0

View File

@ -1,260 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<head>
<meta charset="utf-8" />
<title>ConsumeKafkaRecord</title>
<link rel="stylesheet" href="../../../../../css/component-usage.css" type="text/css" />
</head>
<body>
<h2>Description</h2>
<p>
This Processor polls <a href="http://kafka.apache.org/">Apache Kafka</a>
for data using KafkaConsumer API available with Kafka 2.0. When a message is received
from Kafka, the message will be deserialized using the configured Record Reader, and then
written to a FlowFile by serializing the message with the configured Record Writer.
</p>
<h2>Consumer Partition Assignment</h2>
<p>
By default, this processor will subscribe to one or more Kafka topics in such a way that the topics to consume from are randomly
assigned to the nodes in the NiFi cluster. Consider a scenario where a single Kafka topic has 8 partitions and the consuming
NiFi cluster has 3 nodes. In this scenario, Node 1 may be assigned partitions 0, 1, and 2. Node 2 may be assigned partitions 3, 4, and 5.
Node 3 will then be assigned partitions 6 and 7.
</p>
<p>
In this scenario, if Node 3 somehow fails or stops pulling data from Kafka, partitions 6 and 7 may then be reassigned to the other two nodes.
For most use cases, this is desirable. It provides fault tolerance and allows the remaining nodes to pick up the slack. However, there are cases
where this is undesirable.
</p>
<p>
One such case is when using NiFi to consume Change Data Capture (CDC) data from Kafka. Consider again the above scenario. Consider that Node 3
has pulled 1,000 messages from Kafka but has not yet delivered them to their final destination. NiFi is then stopped and restarted, and that takes
15 minutes to complete. In the meantime, Partitions 6 and 7 have been reassigned to the other nodes. Those nodes then proceeded to pull data from
Kafka and deliver it to the desired destination. After 15 minutes, Node 3 rejoins the cluster and then continues to deliver its 1,000 messages that
it has already pulled from Kafka to the destination system. Now, those records have been delivered out of order.
</p>
<p>
The solution for this, then, is to assign partitions statically instead of dynamically. In this way, we can assign Partitions 6 and 7 to Node 3 specifically.
Then, if Node 3 is restarted, the other nodes will not pull data from Partitions 6 and 7. The data will remain queued in Kafka until Node 3 is restarted. By
using this approach, we can ensure that the data that already was pulled can be processed (assuming First In First Out Prioritizers are used) before newer messages
are handled.
</p>
<p>
In order to provide a static mapping of node to Kafka partition(s), one or more user-defined properties must be added using the naming scheme
<code>partitions.&lt;hostname&gt;</code> with the value being a comma-separated list of Kafka partitions to use. For example,
<code>partitions.nifi-01=0, 3, 6, 9</code>, <code>partitions.nifi-02=1, 4, 7, 10</code>, and <code>partitions.nifi-03=2, 5, 8, 11</code>.
The hostname that is used can be the fully qualified hostname, the "simple" hostname, or the IP address. There must be an entry for each node in
the cluster, or the Processor will become invalid. If it is desirable for a node to not have any partitions assigned to it, a Property may be
added for the hostname with an empty string as the value.
</p>
<p>
NiFi cannot readily validate that all Partitions have been assigned before the Processor is scheduled to run. However, it can validate that no
partitions have been skipped. As such, if partitions 0, 1, and 3 are assigned but not partition 2, the Processor will not be valid. However,
if partitions 0, 1, and 2 are assigned, the Processor will become valid, even if there are 4 partitions on the Topic. When the Processor is
started, the Processor will immediately start to fail, logging errors, and avoid pulling any data until the Processor is updated to account
for all partitions. Once running, if the number of partitions is changed, the Processor will continue to run but not pull data from the newly
added partitions. Once stopped, it will begin to error until all partitions have been assigned. Additionally, if partitions that are assigned
do not exist (e.g., partitions 0, 1, 2, 3, 4, 5, 6, and 7 are assigned, but the Topic has only 4 partitions), then the Processor will begin
to log errors on startup and will not pull data.
</p>
<p>
In order to use a static mapping of Kafka partitions, the "Topic Name Format" must be set to "names" rather than "pattern." Additionally, all
Topics that are to be consumed must have the same number of partitions. If multiple Topics are to be consumed and have a different number of
partitions, multiple Processors must be used so that each Processor consumes only from Topics with the same number of partitions.
</p>
<h2>Security Configuration:</h2>
<p>
The Security Protocol property allows the user to specify the protocol for communicating
with the Kafka broker. The following sections describe each of the protocols in further detail.
</p>
<h3>PLAINTEXT</h3>
<p>
This option provides an unsecured connection to the broker, with no client authentication and no encryption.
In order to use this option the broker must be configured with a listener of the form:
<pre>
PLAINTEXT://host.name:port
</pre>
</p>
<h3>SSL</h3>
<p>
This option provides an encrypted connection to the broker, with optional client authentication. In order
to use this option the broker must be configured with a listener of the form:
<pre>
SSL://host.name:port
</pre>
In addition, the processor must have an SSL Context Service selected.
</p>
<p>
If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
not be required to present a certificate. In this case, the SSL Context Service selected may specify only
a truststore containing the public key of the certificate authority used to sign the broker's key.
</p>
<p>
If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
a truststore as described above.
</p>
<h3>SASL_PLAINTEXT</h3>
<p>
This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_PLAINTEXT://host.name:port
</pre>
In addition, the Kerberos Service Name must be specified in the processor.
</p>
<h4>SASL_PLAINTEXT - GSSAPI</h4>
<p>
If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate.
</p>
<p>
An example of the JAAS config file would be the following:
<pre>
KafkaClient {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
storeKey=true
keyTab="/path/to/nifi.keytab"
serviceName="kafka"
principal="nifi@YOURREALM.COM";
};
</pre>
<b>NOTE:</b> The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
</p>
<p>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
storeKey=true
keyTab="/path/to/nifi.keytab"
serviceName="kafka"
principal="nifi@YOURREALM.COM";
</pre>
</ol>
</p>
<p>
Alternatively, the JAAS
configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
directly in the processor properties. This will dynamically create a JAAS configuration like above, and
will take precedence over the java.security.auth.login.config system property.
</p>
<h4>SASL_PLAINTEXT - PLAIN</h4>
<p>
If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
</p>
<p>
<b>NOTE:</b> It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
the username and password unencrypted.
</p>
<p>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of PLAIN. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<p>
<b>NOTE:</b> Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
it visible to components in other NARs that may access the providers. There is currently a known issue
where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
</p>
<h4>SASL_PLAINTEXT - SCRAM</h4>
<p>
If the SASL mechanism is SCRAM, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's ScramLoginModule. Ensure that you add user defined attribute 'sasl.mechanism' and assign 'SCRAM-SHA-256' or 'SCRAM-SHA-512' based on kafka broker configurations. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.scram.ScramLoginModule required
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.scram.ScramLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
<p>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of SCRAM-SHA-256 or SCRAM-SHA-512. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<h3>SASL_SSL</h3>
<p>
This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_SSL://host.name:port
</pre>
</p>
<p>
See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
depending on the SASL mechanism (GSSAPI or PLAIN).
</p>
<p>
See the SSL section for a description of how to configure the SSL Context Service based on the
ssl.client.auth property.
</p>
</body>
</html>

View File

@ -1,260 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<head>
<meta charset="utf-8" />
<title>ConsumeKafka</title>
<link rel="stylesheet" href="../../../../../css/component-usage.css" type="text/css" />
</head>
<body>
<h2>Description</h2>
<p>
This Processor polls <a href="http://kafka.apache.org/">Apache Kafka</a>
for data using KafkaConsumer API available with Kafka 2.0. When a message is received
from Kafka, this Processor emits a FlowFile where the content of the FlowFile is the value
of the Kafka message.
</p>
<h2>Consumer Partition Assignment</h2>
<p>
By default, this processor will subscribe to one or more Kafka topics in such a way that the topics to consume from are randomly
assigned to the nodes in the NiFi cluster. Consider a scenario where a single Kafka topic has 8 partitions and the consuming
NiFi cluster has 3 nodes. In this scenario, Node 1 may be assigned partitions 0, 1, and 2. Node 2 may be assigned partitions 3, 4, and 5.
Node 3 will then be assigned partitions 6 and 7.
</p>
<p>
In this scenario, if Node 3 somehow fails or stops pulling data from Kafka, partitions 6 and 7 may then be reassigned to the other two nodes.
For most use cases, this is desirable. It provides fault tolerance and allows the remaining nodes to pick up the slack. However, there are cases
where this is undesirable.
</p>
<p>
One such case is when using NiFi to consume Change Data Capture (CDC) data from Kafka. Consider again the above scenario. Consider that Node 3
has pulled 1,000 messages from Kafka but has not yet delivered them to their final destination. NiFi is then stopped and restarted, and that takes
15 minutes to complete. In the meantime, Partitions 6 and 7 have been reassigned to the other nodes. Those nodes then proceeded to pull data from
Kafka and deliver it to the desired destination. After 15 minutes, Node 3 rejoins the cluster and then continues to deliver its 1,000 messages that
it has already pulled from Kafka to the destination system. Now, those records have been delivered out of order.
</p>
<p>
The solution for this, then, is to assign partitions statically instead of dynamically. In this way, we can assign Partitions 6 and 7 to Node 3 specifically.
Then, if Node 3 is restarted, the other nodes will not pull data from Partitions 6 and 7. The data will remain queued in Kafka until Node 3 is restarted. By
using this approach, we can ensure that the data that already was pulled can be processed (assuming First In First Out Prioritizers are used) before newer messages
are handled.
</p>
<p>
In order to provide a static mapping of node to Kafka partition(s), one or more user-defined properties must be added using the naming scheme
<code>partitions.&lt;hostname&gt;</code> with the value being a comma-separated list of Kafka partitions to use. For example,
<code>partitions.nifi-01=0, 3, 6, 9</code>, <code>partitions.nifi-02=1, 4, 7, 10</code>, and <code>partitions.nifi-03=2, 5, 8, 11</code>.
The hostname that is used can be the fully qualified hostname, the "simple" hostname, or the IP address. There must be an entry for each node in
the cluster, or the Processor will become invalid. If it is desirable for a node to not have any partitions assigned to it, a Property may be
added for the hostname with an empty string as the value.
</p>
<p>
NiFi cannot readily validate that all Partitions have been assigned before the Processor is scheduled to run. However, it can validate that no
partitions have been skipped. As such, if partitions 0, 1, and 3 are assigned but not partition 2, the Processor will not be valid. However,
if partitions 0, 1, and 2 are assigned, the Processor will become valid, even if there are 4 partitions on the Topic. When the Processor is
started, the Processor will immediately start to fail, logging errors, and avoid pulling any data until the Processor is updated to account
for all partitions. Once running, if the number of partitions is changed, the Processor will continue to run but not pull data from the newly
added partitions. Once stopped, it will begin to error until all partitions have been assigned. Additionally, if partitions that are assigned
do not exist (e.g., partitions 0, 1, 2, 3, 4, 5, 6, and 7 are assigned, but the Topic has only 4 partitions), then the Processor will begin
to log errors on startup and will not pull data.
</p>
<p>
In order to use a static mapping of Kafka partitions, the "Topic Name Format" must be set to "names" rather than "pattern." Additionally, all
Topics that are to be consumed must have the same number of partitions. If multiple Topics are to be consumed and have a different number of
partitions, multiple Processors must be used so that each Processor consumes only from Topics with the same number of partitions.
</p>
<h2>Security Configuration</h2>
<p>
The Security Protocol property allows the user to specify the protocol for communicating
with the Kafka broker. The following sections describe each of the protocols in further detail.
</p>
<h3>PLAINTEXT</h3>
<p>
This option provides an unsecured connection to the broker, with no client authentication and no encryption.
In order to use this option the broker must be configured with a listener of the form:
<pre>
PLAINTEXT://host.name:port
</pre>
</p>
<h3>SSL</h3>
<p>
This option provides an encrypted connection to the broker, with optional client authentication. In order
to use this option the broker must be configured with a listener of the form:
<pre>
SSL://host.name:port
</pre>
In addition, the processor must have an SSL Context Service selected.
</p>
<p>
If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
not be required to present a certificate. In this case, the SSL Context Service selected may specify only
a truststore containing the public key of the certificate authority used to sign the broker's key.
</p>
<p>
If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
a truststore as described above.
</p>
<h3>SASL_PLAINTEXT</h3>
<p>
This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_PLAINTEXT://host.name:port
</pre>
In addition, the Kerberos Service Name must be specified in the processor.
</p>
<h4>SASL_PLAINTEXT - GSSAPI</h4>
<p>
If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate.
</p>
<p>
An example of the JAAS config file would be the following:
<pre>
KafkaClient {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
storeKey=true
keyTab="/path/to/nifi.keytab"
serviceName="kafka"
principal="nifi@YOURREALM.COM";
};
</pre>
<b>NOTE:</b> The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
</p>
<p>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
storeKey=true
keyTab="/path/to/nifi.keytab"
serviceName="kafka"
principal="nifi@YOURREALM.COM";
</pre>
</ol>
</p>
<p>
Alternatively, the JAAS
configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
directly in the processor properties. This will dynamically create a JAAS configuration like above, and
will take precedence over the java.security.auth.login.config system property.
</p>
<h4>SASL_PLAINTEXT - PLAIN</h4>
<p>
If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
</p>
<p>
<b>NOTE:</b> It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
the username and password unencrypted.
</p>
<p>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of PLAIN. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<p>
<b>NOTE:</b> Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
it visible to components in other NARs that may access the providers. There is currently a known issue
where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
</p>
<h4>SASL_PLAINTEXT - SCRAM</h4>
<p>
If the SASL mechanism is SCRAM, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's ScramLoginModule. Ensure that you add user defined attribute 'sasl.mechanism' and assign 'SCRAM-SHA-256' or 'SCRAM-SHA-512' based on kafka broker configurations. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.scram.ScramLoginModule required
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.scram.ScramLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
<p>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of SCRAM-SHA-256 or SCRAM-SHA-512. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<h3>SASL_SSL</h3>
<p>
This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_SSL://host.name:port
</pre>
</p>
<p>
See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
depending on the SASL mechanism (GSSAPI or PLAIN).
</p>
<p>
See the SSL section for a description of how to configure the SSL Context Service based on the
ssl.client.auth property.
</p>
</body>
</html>

View File

@ -1,193 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<head>
<meta charset="utf-8" />
<title>PublishKafkaRecord</title>
<link rel="stylesheet" href="../../../../../css/component-usage.css" type="text/css" />
</head>
<body>
<h2>Description</h2>
<p>
This Processor puts the contents of a FlowFile to a Topic in
<a href="http://kafka.apache.org/">Apache Kafka</a> using KafkaProducer API available
with Kafka 2.0 API. The contents of the incoming FlowFile will be read using the
configured Record Reader. Each record will then be serialized using the configured
Record Writer, and this serialized form will be the content of a Kafka message.
This message is optionally assigned a key by using the &lt;Kafka Key&gt; Property.
</p>
<h2>Security Configuration</h2>
<p>
The Security Protocol property allows the user to specify the protocol for communicating
with the Kafka broker. The following sections describe each of the protocols in further detail.
</p>
<h3>PLAINTEXT</h3>
<p>
This option provides an unsecured connection to the broker, with no client authentication and no encryption.
In order to use this option the broker must be configured with a listener of the form:
<pre>
PLAINTEXT://host.name:port
</pre>
</p>
<h3>SSL</h3>
<p>
This option provides an encrypted connection to the broker, with optional client authentication. In order
to use this option the broker must be configured with a listener of the form:
<pre>
SSL://host.name:port
</pre>
In addition, the processor must have an SSL Context Service selected.
</p>
<p>
If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
not be required to present a certificate. In this case, the SSL Context Service selected may specify only
a truststore containing the public key of the certificate authority used to sign the broker's key.
</p>
<p>
If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
a truststore as described above.
</p>
<h3>SASL_PLAINTEXT</h3>
<p>
This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_PLAINTEXT://host.name:port
</pre>
In addition, the Kerberos Service Name must be specified in the processor.
</p>
<h4>SASL_PLAINTEXT - GSSAPI</h4>
<p>
If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate. The
JAAS configuration can be provided by specifying the java.security.auth.login.config system property in
NiFi's bootstrap.conf, such as:
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
</p>
<p>
An example of the JAAS config file would be the following:
<pre>
KafkaClient {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
storeKey=true
keyTab="/path/to/nifi.keytab"
serviceName="kafka"
principal="nifi@YOURREALM.COM";
};
</pre>
<b>NOTE:</b> The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
</p>
<p>
Alternatively, the JAAS
configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
directly in the processor properties. This will dynamically create a JAAS configuration like above, and
will take precedence over the java.security.auth.login.config system property.
</p>
<h4>SASL_PLAINTEXT - PLAIN</h4>
<p>
If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
</p>
<p>
<b>NOTE:</b> It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
the username and password unencrypted.
</p>
<p>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of PLAIN. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<p>
<b>NOTE:</b> Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
it visible to components in other NARs that may access the providers. There is currently a known issue
where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
</p>
<h4>SASL_PLAINTEXT - SCRAM</h4>
<p>
If the SASL mechanism is SSL, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's ScramLoginModule. Ensure that you add user defined attribute 'sasl.mechanism' and assign 'SCRAM-SHA-256' or 'SCRAM-SHA-512' based on kafka broker configurations. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.scram.ScramLoginModule
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.scram.ScramLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of SCRAM-SHA-256 or SCRAM-SHA-512. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<h3>SASL_SSL</h3>
<p>
This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_SSL://host.name:port
</pre>
</p>
<p>
See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
depending on the SASL mechanism (GSSAPI or PLAIN).
</p>
<p>
See the SSL section for a description of how to configure the SSL Context Service based on the
ssl.client.auth property.
</p>
</body>
</html>

View File

@ -1,205 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<head>
<meta charset="utf-8" />
<title>PublishKafka</title>
<link rel="stylesheet" href="../../../../../css/component-usage.css" type="text/css" />
</head>
<body>
<h2>Description</h2>
<p>
This Processor puts the contents of a FlowFile to a Topic in
<a href="http://kafka.apache.org/">Apache Kafka</a> using KafkaProducer API available
with Kafka 2.0 API. The content of a FlowFile becomes the contents of a Kafka message.
This message is optionally assigned a key by using the &lt;Kafka Key&gt; Property.
</p>
<p>
The Processor allows the user to configure an optional Message Demarcator that
can be used to send many messages per FlowFile. For example, a <i>\n</i> could be used
to indicate that the contents of the FlowFile should be used to send one message
per line of text. It also supports multi-char demarcators (e.g., 'my custom demarcator').
If the property is not set, the entire contents of the FlowFile
will be sent as a single message. When using the demarcator, if some messages are
successfully sent but other messages fail to send, the resulting FlowFile will be
considered a failed FlowFile and will have additional attributes to that effect.
One of such attributes is 'failed.last.idx' which indicates the index of the last message
that was successfully ACKed by Kafka. (if no demarcator is used the value of this index will be -1).
This will allow PublishKafka to only re-send un-ACKed messages on the next re-try.
</p>
<h2>Security Configuration</h2>
<p>
The Security Protocol property allows the user to specify the protocol for communicating
with the Kafka broker. The following sections describe each of the protocols in further detail.
</p>
<h3>PLAINTEXT</h3>
<p>
This option provides an unsecured connection to the broker, with no client authentication and no encryption.
In order to use this option the broker must be configured with a listener of the form:
<pre>
PLAINTEXT://host.name:port
</pre>
</p>
<h3>SSL</h3>
<p>
This option provides an encrypted connection to the broker, with optional client authentication. In order
to use this option the broker must be configured with a listener of the form:
<pre>
SSL://host.name:port
</pre>
In addition, the processor must have an SSL Context Service selected.
</p>
<p>
If the broker specifies ssl.client.auth=none, or does not specify ssl.client.auth, then the client will
not be required to present a certificate. In this case, the SSL Context Service selected may specify only
a truststore containing the public key of the certificate authority used to sign the broker's key.
</p>
<p>
If the broker specifies ssl.client.auth=required then the client will be required to present a certificate.
In this case, the SSL Context Service must also specify a keystore containing a client key, in addition to
a truststore as described above.
</p>
<h3>SASL_PLAINTEXT</h3>
<p>
This option uses SASL with a PLAINTEXT transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_PLAINTEXT://host.name:port
</pre>
In addition, the Kerberos Service Name must be specified in the processor.
</p>
<h4>SASL_PLAINTEXT - GSSAPI</h4>
<p>
If the SASL mechanism is GSSAPI, then the client must provide a JAAS configuration to authenticate. The
JAAS configuration can be provided by specifying the java.security.auth.login.config system property in
NiFi's bootstrap.conf, such as:
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
</p>
<p>
An example of the JAAS config file would be the following:
<pre>
KafkaClient {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
storeKey=true
keyTab="/path/to/nifi.keytab"
serviceName="kafka"
principal="nifi@YOURREALM.COM";
};
</pre>
<b>NOTE:</b> The serviceName in the JAAS file must match the Kerberos Service Name in the processor.
</p>
<p>
Alternatively, the JAAS
configuration when using GSSAPI can be provided by specifying the Kerberos Principal and Kerberos Keytab
directly in the processor properties. This will dynamically create a JAAS configuration like above, and
will take precedence over the java.security.auth.login.config system property.
</p>
<h4>SASL_PLAINTEXT - PLAIN</h4>
<p>
If the SASL mechanism is PLAIN, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's PlainLoginModule. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.plain.PlainLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
</p>
<p>
<b>NOTE:</b> It is not recommended to use a SASL mechanism of PLAIN with SASL_PLAINTEXT, as it would transmit
the username and password unencrypted.
</p>
<p>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of PLAIN. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<p>
<b>NOTE:</b> Using the PlainLoginModule will cause it be registered in the JVM's static list of Providers, making
it visible to components in other NARs that may access the providers. There is currently a known issue
where Kafka processors using the PlainLoginModule will cause HDFS processors with Keberos to no longer work.
</p>
<h4>SASL_PLAINTEXT - SCRAM</h4>
<p>
If the SASL mechanism is SSL, then client must provide a JAAS configuration to authenticate, but
the JAAS configuration must use Kafka's ScramLoginModule. Ensure that you add user defined attribute 'sasl.mechanism' and assign 'SCRAM-SHA-256' or 'SCRAM-SHA-512' based on kafka broker configurations. An example of the JAAS config file would
be the following:
<pre>
KafkaClient {
org.apache.kafka.common.security.scram.ScramLoginModule
username="nifi"
password="nifi-password";
};
</pre>
The JAAS configuration can be provided by either of below ways
<ol type="1">
<li>specify the java.security.auth.login.config system property in
NiFi's bootstrap.conf. This limits you to use only one user credential across the cluster.</li>
<pre>
java.arg.16=-Djava.security.auth.login.config=/path/to/kafka_client_jaas.conf
</pre>
<li>add user attribute 'sasl.jaas.config' in the processor configurations. This method allows one to have multiple consumers with different user credentials or gives flexibility to consume from multiple kafka clusters.</li>
<pre>
sasl.jaas.config : org.apache.kafka.common.security.scram.ScramLoginModule required
username="nifi"
password="nifi-password";
</pre>
<b>NOTE:</b> The dynamic properties of this processor are not secured and as a result the password entered when utilizing sasl.jaas.config will be stored in the flow.xml.gz file in plain-text, and will be saved to NiFi Registry if using versioned flows.
</ol>
<b>NOTE:</b> The Kerberos Service Name is not required for SASL mechanism of SCRAM-SHA-256 or SCRAM-SHA-512. However, processor warns saying this attribute has to be filled with non empty string. You can choose to fill any random string, such as "null".
</p>
<h3>SASL_SSL</h3>
<p>
This option uses SASL with an SSL/TLS transport layer to authenticate to the broker. In order to use this
option the broker must be configured with a listener of the form:
<pre>
SASL_SSL://host.name:port
</pre>
</p>
<p>
See the SASL_PLAINTEXT section for a description of how to provide the proper JAAS configuration
depending on the SASL mechanism (GSSAPI or PLAIN).
</p>
<p>
See the SSL section for a description of how to configure the SSL Context Service based on the
ssl.client.auth property.
</p>
</body>
</html>

View File

@ -1,319 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.TopicPartition;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processors.kafka.pubsub.ConsumerPool.PoolStats;
import org.apache.nifi.provenance.ProvenanceReporter;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.regex.Pattern;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNotSame;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class ConsumerPoolTest {
private Consumer<byte[], byte[]> consumer = null;
private ProcessSession mockSession = null;
private ProcessContext mockContext = Mockito.mock(ProcessContext.class);
private ProvenanceReporter mockReporter = null;
private ConsumerPool testPool = null;
private ConsumerPool testDemarcatedPool = null;
private ComponentLog logger = null;
@BeforeEach
@SuppressWarnings("unchecked")
public void setup() {
consumer = mock(Consumer.class);
logger = mock(ComponentLog.class);
mockSession = mock(ProcessSession.class);
mockReporter = mock(ProvenanceReporter.class);
when(mockSession.getProvenanceReporter()).thenReturn(mockReporter);
testPool = new ConsumerPool(
1,
null,
false,
Collections.emptyMap(),
Collections.singletonList("nifi"),
100L,
"utf-8",
"ssl",
"localhost",
logger,
true,
StandardCharsets.UTF_8,
null,
null) {
@Override
protected Consumer<byte[], byte[]> createKafkaConsumer() {
return consumer;
}
};
testDemarcatedPool = new ConsumerPool(
1,
"--demarcator--".getBytes(StandardCharsets.UTF_8),
false,
Collections.emptyMap(),
Collections.singletonList("nifi"),
100L,
"utf-8",
"ssl",
"localhost",
logger,
true,
StandardCharsets.UTF_8,
Pattern.compile(".*"),
null) {
@Override
protected Consumer<byte[], byte[]> createKafkaConsumer() {
return consumer;
}
};
}
@Test
public void validatePoolSimpleCreateClose() {
when(consumer.poll(any(Duration.class))).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
lease.poll();
}
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
lease.poll();
}
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
lease.poll();
}
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
lease.poll();
}
testPool.close();
verify(mockSession, times(0)).create();
verify(mockSession, times(0)).commit();
final PoolStats stats = testPool.getPoolStats();
assertEquals(1, stats.consumerCreatedCount);
assertEquals(1, stats.consumerClosedCount);
assertEquals(4, stats.leasesObtainedCount);
}
@Test
@SuppressWarnings("unchecked")
public void validatePoolSimpleCreatePollClose() {
final byte[][] firstPassValues = new byte[][]{
"Hello-1".getBytes(StandardCharsets.UTF_8),
"Hello-2".getBytes(StandardCharsets.UTF_8),
"Hello-3".getBytes(StandardCharsets.UTF_8)
};
final ConsumerRecords<byte[], byte[]> firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues);
when(consumer.poll(any(Duration.class))).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
lease.poll();
lease.commit();
}
testPool.close();
verify(mockSession, times(3)).create();
verify(mockSession, times(1)).commitAsync(Mockito.any(Runnable.class));
final PoolStats stats = testPool.getPoolStats();
assertEquals(1, stats.consumerCreatedCount);
assertEquals(1, stats.consumerClosedCount);
assertEquals(1, stats.leasesObtainedCount);
}
@Test
public void testConsumerCreatedOnDemand() {
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
final List<ConsumerLease> created = new ArrayList<>();
try {
for (int i = 0; i < 3; i++) {
final ConsumerLease newLease = testPool.obtainConsumer(mockSession, mockContext);
created.add(newLease);
assertNotSame(lease, newLease);
}
} finally {
created.forEach(ConsumerLease::close);
}
}
}
@Test
public void testConsumerNotCreatedOnDemandWhenUsingStaticAssignment() {
final ConsumerPool staticAssignmentPool = new ConsumerPool(
1,
null,
false,
Collections.emptyMap(),
Collections.singletonList("nifi"),
100L,
"utf-8",
"ssl",
"localhost",
logger,
true,
StandardCharsets.UTF_8,
null,
new int[] {1, 2, 3}) {
@Override
protected Consumer<byte[], byte[]> createKafkaConsumer() {
return consumer;
}
};
try (final ConsumerLease lease = staticAssignmentPool.obtainConsumer(mockSession, mockContext)) {
ConsumerLease partition2Lease = null;
ConsumerLease partition3Lease = null;
try {
partition2Lease = staticAssignmentPool.obtainConsumer(mockSession, mockContext);
assertNotSame(lease, partition2Lease);
assertEquals(1, partition2Lease.getAssignedPartitions().size());
assertEquals(2, partition2Lease.getAssignedPartitions().get(0).partition());
partition3Lease = staticAssignmentPool.obtainConsumer(mockSession, mockContext);
assertNotSame(lease, partition3Lease);
assertNotSame(partition2Lease, partition3Lease);
assertEquals(1, partition3Lease.getAssignedPartitions().size());
assertEquals(3, partition3Lease.getAssignedPartitions().get(0).partition());
final ConsumerLease nullLease = staticAssignmentPool.obtainConsumer(mockSession, mockContext);
assertNull(nullLease);
// Close the lease for Partition 2. We should now be able to get another Lease for Partition 2.
partition2Lease.close();
partition2Lease = staticAssignmentPool.obtainConsumer(mockSession, mockContext);
assertNotNull(partition2Lease);
assertEquals(1, partition2Lease.getAssignedPartitions().size());
assertEquals(2, partition2Lease.getAssignedPartitions().get(0).partition());
assertNull(staticAssignmentPool.obtainConsumer(mockSession, mockContext));
} finally {
closeLeases(partition2Lease, partition3Lease);
}
}
}
private void closeLeases(final ConsumerLease... leases) {
for (final ConsumerLease lease : leases) {
if (lease != null) {
lease.close();
}
}
}
@Test
public void validatePoolSimpleBatchCreateClose() {
when(consumer.poll(any(Duration.class))).thenReturn(createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
for (int i = 0; i < 100; i++) {
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
for (int j = 0; j < 100; j++) {
lease.poll();
}
}
}
testPool.close();
verify(mockSession, times(0)).create();
verify(mockSession, times(0)).commit();
final PoolStats stats = testPool.getPoolStats();
assertEquals(1, stats.consumerCreatedCount);
assertEquals(1, stats.consumerClosedCount);
assertEquals(100, stats.leasesObtainedCount);
}
@Test
@SuppressWarnings("unchecked")
public void validatePoolBatchCreatePollClose() {
final byte[][] firstPassValues = new byte[][]{
"Hello-1".getBytes(StandardCharsets.UTF_8),
"Hello-2".getBytes(StandardCharsets.UTF_8),
"Hello-3".getBytes(StandardCharsets.UTF_8)
};
final ConsumerRecords<byte[], byte[]> firstRecs = createConsumerRecords("foo", 1, 1L, firstPassValues);
when(consumer.poll(any(Duration.class))).thenReturn(firstRecs, createConsumerRecords("nifi", 0, 0L, new byte[][]{}));
try (final ConsumerLease lease = testDemarcatedPool.obtainConsumer(mockSession, mockContext)) {
lease.poll();
lease.commit();
}
testDemarcatedPool.close();
verify(mockSession, times(1)).create();
verify(mockSession, times(1)).commitAsync(Mockito.any(Runnable.class));
final PoolStats stats = testDemarcatedPool.getPoolStats();
assertEquals(1, stats.consumerCreatedCount);
assertEquals(1, stats.consumerClosedCount);
assertEquals(1, stats.leasesObtainedCount);
}
@Test
public void validatePoolConsumerFails() {
when(consumer.poll(any(Duration.class))).thenThrow(new KafkaException("oops"));
try (final ConsumerLease lease = testPool.obtainConsumer(mockSession, mockContext)) {
assertThrows(KafkaException.class, () -> lease.poll());
}
testPool.close();
verify(mockSession, times(0)).create();
verify(mockSession, times(0)).commit();
final PoolStats stats = testPool.getPoolStats();
assertEquals(1, stats.consumerCreatedCount);
assertEquals(1, stats.consumerClosedCount);
assertEquals(1, stats.leasesObtainedCount);
}
@SuppressWarnings({"rawtypes", "unchecked"})
static ConsumerRecords<byte[], byte[]> createConsumerRecords(final String topic, final int partition, final long startingOffset, final byte[][] rawRecords) {
final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> map = new HashMap<>();
final TopicPartition tPart = new TopicPartition(topic, partition);
final List<ConsumerRecord<byte[], byte[]>> records = new ArrayList<>();
long offset = startingOffset;
for (final byte[] rawRecord : rawRecords) {
final ConsumerRecord<byte[], byte[]> rec = new ConsumerRecord(topic, partition, offset++, UUID.randomUUID().toString().getBytes(), rawRecord);
records.add(rec);
}
map.put(tPart, records);
return new ConsumerRecords(map);
}
}

View File

@ -1,135 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.when;
public class ITConsumeKafka_2_0 {
ConsumerLease mockLease = null;
ConsumerPool mockConsumerPool = null;
@BeforeEach
public void setup() {
mockLease = mock(ConsumerLease.class);
mockConsumerPool = mock(ConsumerPool.class);
}
@Test
public void validateGetAllMessages() {
String groupName = "validateGetAllMessages";
when(mockConsumerPool.obtainConsumer(any(), any())).thenReturn(mockLease);
when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
when(mockLease.commit()).thenReturn(Boolean.TRUE);
ConsumeKafka_2_0 proc = new ConsumeKafka_2_0() {
@Override
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
return mockConsumerPool;
}
};
final TestRunner runner = TestRunners.newTestRunner(proc);
runner.setProperty(ConsumeKafka_2_0.BOOTSTRAP_SERVERS, "0.0.0.0:1234");
runner.setProperty(ConsumeKafka_2_0.TOPICS, "foo,bar");
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, groupName);
runner.setProperty(ConsumeKafka_2_0.AUTO_OFFSET_RESET, ConsumeKafka_2_0.OFFSET_EARLIEST);
runner.run(1, false);
verify(mockConsumerPool, times(1)).obtainConsumer(any(), any());
verify(mockLease, times(3)).continuePolling();
verify(mockLease, times(2)).poll();
verify(mockLease, times(1)).commit();
verify(mockLease, times(1)).close();
verifyNoMoreInteractions(mockConsumerPool);
verifyNoMoreInteractions(mockLease);
}
@Test
public void validateGetAllMessagesPattern() {
String groupName = "validateGetAllMessagesPattern";
when(mockConsumerPool.obtainConsumer(any(), any())).thenReturn(mockLease);
when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
when(mockLease.commit()).thenReturn(Boolean.TRUE);
ConsumeKafka_2_0 proc = new ConsumeKafka_2_0() {
@Override
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
return mockConsumerPool;
}
};
final TestRunner runner = TestRunners.newTestRunner(proc);
runner.setProperty(ConsumeKafka_2_0.BOOTSTRAP_SERVERS, "0.0.0.0:1234");
runner.setProperty(ConsumeKafka_2_0.TOPICS, "(fo.*)|(ba)");
runner.setProperty(ConsumeKafka_2_0.TOPIC_TYPE, "pattern");
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, groupName);
runner.setProperty(ConsumeKafka_2_0.AUTO_OFFSET_RESET, ConsumeKafka_2_0.OFFSET_EARLIEST);
runner.run(1, false);
verify(mockConsumerPool, times(1)).obtainConsumer(any(), any());
verify(mockLease, times(3)).continuePolling();
verify(mockLease, times(2)).poll();
verify(mockLease, times(1)).commit();
verify(mockLease, times(1)).close();
verifyNoMoreInteractions(mockConsumerPool);
verifyNoMoreInteractions(mockLease);
}
@Test
public void validateGetErrorMessages() {
String groupName = "validateGetErrorMessages";
when(mockConsumerPool.obtainConsumer(any(), any())).thenReturn(mockLease);
when(mockLease.continuePolling()).thenReturn(true, false);
when(mockLease.commit()).thenReturn(Boolean.FALSE);
ConsumeKafka_2_0 proc = new ConsumeKafka_2_0() {
@Override
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
return mockConsumerPool;
}
};
final TestRunner runner = TestRunners.newTestRunner(proc);
runner.setProperty(ConsumeKafka_2_0.BOOTSTRAP_SERVERS, "0.0.0.0:1234");
runner.setProperty(ConsumeKafka_2_0.TOPICS, "foo,bar");
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, groupName);
runner.setProperty(ConsumeKafka_2_0.AUTO_OFFSET_RESET, ConsumeKafka_2_0.OFFSET_EARLIEST);
runner.run(1, false);
verify(mockConsumerPool, times(1)).obtainConsumer(any(), any());
verify(mockLease, times(2)).continuePolling();
verify(mockLease, times(1)).poll();
verify(mockLease, times(1)).commit();
verify(mockLease, times(1)).close();
verifyNoMoreInteractions(mockConsumerPool);
verifyNoMoreInteractions(mockLease);
}
}

View File

@ -1,279 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.nifi.kafka.shared.property.SaslMechanism;
import org.apache.nifi.kafka.shared.property.SecurityProtocol;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.record.MockRecordWriter;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.when;
public class TestConsumeKafkaRecord_2_0 {
private ConsumerLease mockLease = null;
private ConsumerPool mockConsumerPool = null;
private TestRunner runner;
@BeforeEach
public void setup() throws InitializationException {
mockLease = mock(ConsumerLease.class);
mockConsumerPool = mock(ConsumerPool.class);
ConsumeKafkaRecord_2_0 proc = new ConsumeKafkaRecord_2_0() {
@Override
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
return mockConsumerPool;
}
};
runner = TestRunners.newTestRunner(proc);
runner.setProperty(ConsumeKafkaRecord_2_0.BOOTSTRAP_SERVERS, "okeydokey:1234");
final String readerId = "record-reader";
final MockRecordParser readerService = new MockRecordParser();
readerService.addSchemaField("name", RecordFieldType.STRING);
readerService.addSchemaField("age", RecordFieldType.INT);
runner.addControllerService(readerId, readerService);
runner.enableControllerService(readerService);
final String writerId = "record-writer";
final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
runner.addControllerService(writerId, writerService);
runner.enableControllerService(writerService);
runner.setProperty(ConsumeKafkaRecord_2_0.RECORD_READER, readerId);
runner.setProperty(ConsumeKafkaRecord_2_0.RECORD_WRITER, writerId);
}
@Test
public void validateCustomValidatorSettings() {
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
runner.assertValid();
runner.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
runner.assertValid();
}
@Test
public void validatePropertiesValidation() {
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.removeProperty(ConsumeKafkaRecord_2_0.GROUP_ID);
AssertionError e = assertThrows(AssertionError.class, () -> runner.assertValid());
assertTrue(e.getMessage().contains("invalid because Group ID is required"));
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "");
e = assertThrows(AssertionError.class, () -> runner.assertValid());
assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, " ");
e = assertThrows(AssertionError.class, () -> runner.assertValid());
assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
}
@Test
public void validateGetAllMessages() {
String groupName = "validateGetAllMessages";
when(mockConsumerPool.obtainConsumer(any(), any())).thenReturn(mockLease);
when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
when(mockLease.commit()).thenReturn(Boolean.TRUE);
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo,bar");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, groupName);
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.run(1, false);
verify(mockConsumerPool, times(1)).obtainConsumer(any(), any());
verify(mockLease, times(3)).continuePolling();
verify(mockLease, times(2)).poll();
verify(mockLease, times(1)).commit();
verify(mockLease, times(1)).close();
verifyNoMoreInteractions(mockConsumerPool);
verifyNoMoreInteractions(mockLease);
}
@Test
public void validateGetAllMessagesPattern() {
String groupName = "validateGetAllMessagesPattern";
when(mockConsumerPool.obtainConsumer(any(), any())).thenReturn(mockLease);
when(mockLease.continuePolling()).thenReturn(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
when(mockLease.commit()).thenReturn(Boolean.TRUE);
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "(fo.*)|(ba)");
runner.setProperty(ConsumeKafkaRecord_2_0.TOPIC_TYPE, "pattern");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, groupName);
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.run(1, false);
verify(mockConsumerPool, times(1)).obtainConsumer(any(), any());
verify(mockLease, times(3)).continuePolling();
verify(mockLease, times(2)).poll();
verify(mockLease, times(1)).commit();
verify(mockLease, times(1)).close();
verifyNoMoreInteractions(mockConsumerPool);
verifyNoMoreInteractions(mockLease);
}
@Test
public void validateGetErrorMessages() {
String groupName = "validateGetErrorMessages";
when(mockConsumerPool.obtainConsumer(any(), any())).thenReturn(mockLease);
when(mockLease.continuePolling()).thenReturn(true, false);
when(mockLease.commit()).thenReturn(Boolean.FALSE);
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo,bar");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, groupName);
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.run(1, false);
verify(mockConsumerPool, times(1)).obtainConsumer(any(), any());
verify(mockLease, times(2)).continuePolling();
verify(mockLease, times(1)).poll();
verify(mockLease, times(1)).commit();
verify(mockLease, times(1)).close();
verifyNoMoreInteractions(mockConsumerPool);
verifyNoMoreInteractions(mockLease);
}
@Test
public void testJaasConfigurationWithDefaultMechanism() {
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumeKafkaRecord_2_0.SECURITY_PROTOCOL, SecurityProtocol.SASL_PLAINTEXT.name());
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.KERBEROS_SERVICE_NAME, "kafka");
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.KERBEROS_PRINCIPAL, "nifi@APACHE.COM");
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.KERBEROS_KEYTAB, "not.A.File");
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.KERBEROS_KEYTAB, "src/test/resources/server.properties");
runner.assertValid();
}
@Test
public void testJaasConfigurationWithPlainMechanism() {
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumeKafkaRecord_2_0.SECURITY_PROTOCOL, SecurityProtocol.SASL_PLAINTEXT.name());
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_MECHANISM, SaslMechanism.PLAIN.getValue());
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_USERNAME, "user1");
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_PASSWORD, "password");
runner.assertValid();
runner.removeProperty(ConsumeKafkaRecord_2_0.SASL_USERNAME);
runner.assertNotValid();
}
@Test
public void testJaasConfigurationWithScram256Mechanism() {
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumeKafkaRecord_2_0.SECURITY_PROTOCOL, SecurityProtocol.SASL_PLAINTEXT.name());
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_MECHANISM, SaslMechanism.SCRAM_SHA_256.getValue());
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_USERNAME, "user1");
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_PASSWORD, "password");
runner.assertValid();
runner.removeProperty(ConsumeKafkaRecord_2_0.SASL_USERNAME);
runner.assertNotValid();
}
@Test
public void testJaasConfigurationWithScram512Mechanism() {
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumeKafkaRecord_2_0.SECURITY_PROTOCOL, SecurityProtocol.SASL_PLAINTEXT.name());
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_MECHANISM, SaslMechanism.SCRAM_SHA_512.getValue());
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_USERNAME, "user1");
runner.assertNotValid();
runner.setProperty(ConsumeKafkaRecord_2_0.SASL_PASSWORD, "password");
runner.assertValid();
runner.removeProperty(ConsumeKafkaRecord_2_0.SASL_USERNAME);
runner.assertNotValid();
}
@Test
public void testNonSaslSecurityProtocol() {
runner.setProperty(ConsumeKafkaRecord_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafkaRecord_2_0.AUTO_OFFSET_RESET, ConsumeKafkaRecord_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumeKafkaRecord_2_0.SECURITY_PROTOCOL, SecurityProtocol.PLAINTEXT.name());
runner.assertValid();
}
}

View File

@ -1,118 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.nifi.kafka.shared.property.SaslMechanism;
import org.apache.nifi.kafka.shared.property.SecurityProtocol;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
public class TestConsumeKafka_2_0 {
ConsumerLease mockLease = null;
ConsumerPool mockConsumerPool = null;
@BeforeEach
public void setup() {
mockLease = mock(ConsumerLease.class);
mockConsumerPool = mock(ConsumerPool.class);
}
@Test
public void validateCustomValidatorSettings() {
ConsumeKafka_2_0 consumeKafka = new ConsumeKafka_2_0();
TestRunner runner = TestRunners.newTestRunner(consumeKafka);
runner.setProperty(ConsumeKafka_2_0.BOOTSTRAP_SERVERS, "okeydokey:1234");
runner.setProperty(ConsumeKafka_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafka_2_0.AUTO_OFFSET_RESET, ConsumeKafka_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
runner.assertValid();
runner.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
runner.assertValid();
runner.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
runner.assertValid();
}
@Test
public void validatePropertiesValidation() {
ConsumeKafka_2_0 consumeKafka = new ConsumeKafka_2_0();
TestRunner runner = TestRunners.newTestRunner(consumeKafka);
runner.setProperty(ConsumeKafka_2_0.BOOTSTRAP_SERVERS, "okeydokey:1234");
runner.setProperty(ConsumeKafka_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafka_2_0.AUTO_OFFSET_RESET, ConsumeKafka_2_0.OFFSET_EARLIEST);
runner.removeProperty(ConsumeKafka_2_0.GROUP_ID);
AssertionError e = assertThrows(AssertionError.class, runner::assertValid);
assertTrue(e.getMessage().contains("invalid because Group ID is required"));
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, "");
e = assertThrows(AssertionError.class, runner::assertValid);
assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, " ");
e = assertThrows(AssertionError.class, runner::assertValid);
assertTrue(e.getMessage().contains("must contain at least one character that is not white space"));
}
@Test
public void testJaasConfiguration() {
ConsumeKafka_2_0 consumeKafka = new ConsumeKafka_2_0();
TestRunner runner = TestRunners.newTestRunner(consumeKafka);
runner.setProperty(ConsumeKafka_2_0.BOOTSTRAP_SERVERS, "okeydokey:1234");
runner.setProperty(ConsumeKafka_2_0.TOPICS, "foo");
runner.setProperty(ConsumeKafka_2_0.GROUP_ID, "foo");
runner.setProperty(ConsumeKafka_2_0.AUTO_OFFSET_RESET, ConsumeKafka_2_0.OFFSET_EARLIEST);
runner.setProperty(ConsumeKafka_2_0.SECURITY_PROTOCOL, SecurityProtocol.SASL_PLAINTEXT.name());
runner.setProperty(ConsumeKafka_2_0.SASL_MECHANISM, SaslMechanism.GSSAPI.getValue());
runner.assertNotValid();
runner.setProperty(ConsumeKafka_2_0.KERBEROS_SERVICE_NAME, "kafka");
runner.assertNotValid();
runner.setProperty(ConsumeKafka_2_0.KERBEROS_PRINCIPAL, "nifi@APACHE.COM");
runner.assertNotValid();
runner.setProperty(ConsumeKafka_2_0.KERBEROS_KEYTAB, "not.A.File");
runner.assertNotValid();
runner.setProperty(ConsumeKafka_2_0.KERBEROS_KEYTAB, "src/test/resources/server.properties");
runner.assertValid();
runner.setVariable("keytab", "src/test/resources/server.properties");
runner.setVariable("principal", "nifi@APACHE.COM");
runner.setVariable("service", "kafka");
runner.setProperty(ConsumeKafka_2_0.KERBEROS_PRINCIPAL, "${principal}");
runner.setProperty(ConsumeKafka_2_0.KERBEROS_KEYTAB, "${keytab}");
runner.setProperty(ConsumeKafka_2_0.KERBEROS_SERVICE_NAME, "${service}");
runner.assertValid();
}
}

View File

@ -1,119 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.mock.MockComponentLogger;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestConsumerPartitionsUtil {
private final ComponentLog logger = new MockComponentLogger();
private String hostname;
@BeforeEach
public void setup() throws UnknownHostException {
hostname = InetAddress.getLocalHost().getHostName();
}
@Test
public void testNoPartitionAssignments() throws UnknownHostException {
final Map<String, String> properties = Collections.singletonMap("key", "value");
final int[] partitions = ConsumerPartitionsUtil.getPartitionsForHost(properties, logger);
Assertions.assertNull(partitions);
}
@Test
public void testAllPartitionsAssignedToOneHost() throws UnknownHostException {
final Map<String, String> properties = new HashMap<>();
properties.put("key", "value");
properties.put("partitions." + hostname, "0, 1, 2, 3");
final int[] partitions = ConsumerPartitionsUtil.getPartitionsForHost(properties, logger);
assertNotNull(partitions);
assertArrayEquals(new int[] {0, 1, 2, 3}, partitions);
}
@Test
public void testSomePartitionsSkipped() {
final Map<String, String> properties = new HashMap<>();
properties.put("key", "value");
properties.put("partitions." + hostname, "0, 1, 2, 3, 5");
final ValidationResult invalidResult = ConsumerPartitionsUtil.validateConsumePartitions(properties);
assertNotNull(invalidResult);
assertFalse(invalidResult.isValid());
properties.put("partitions." + hostname, "0, 1,2,3,4, 5");
final ValidationResult validResult = ConsumerPartitionsUtil.validateConsumePartitions(properties);
assertNotNull(validResult);
assertTrue(validResult.isValid());
}
@Test
public void testCurrentNodeNotSpecified() {
final Map<String, String> properties = new HashMap<>();
properties.put("key", "value");
properties.put("partitions.other-host", "0, 1, 2, 3");
final ValidationResult invalidResult = ConsumerPartitionsUtil.validateConsumePartitions(properties);
assertNotNull(invalidResult);
assertFalse(invalidResult.isValid());
}
@Test
public void testPartitionListedTwice() {
final Map<String, String> properties = new HashMap<>();
properties.put("key", "value");
properties.put("partitions." + hostname, "2");
properties.put("partitions.other-host", "0, 1, 2, 3");
final ValidationResult invalidResult = ConsumerPartitionsUtil.validateConsumePartitions(properties);
assertNotNull(invalidResult);
assertFalse(invalidResult.isValid());
}
@Test
public void testNodeWithNoAssignment() throws UnknownHostException {
final Map<String, String> properties = new HashMap<>();
properties.put("key", "value");
properties.put("partitions." + hostname, "");
properties.put("partitions.other-host", "0, 1, 2, 3");
final ValidationResult invalidResult = ConsumerPartitionsUtil.validateConsumePartitions(properties);
assertNotNull(invalidResult);
assertTrue(invalidResult.isValid());
final int[] partitions = ConsumerPartitionsUtil.getPartitionsForHost(properties, logger);
assertNotNull(partitions);
assertEquals(0, partitions.length);
}
}

View File

@ -1,87 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.nifi.util.MockComponentLog;
import org.apache.nifi.util.MockFlowFile;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class TestInFlightMessageTracker {
@Test
@Timeout(value = 5000, unit = TimeUnit.MILLISECONDS)
public void testAwaitCompletionWhenComplete() throws InterruptedException, TimeoutException {
final MockFlowFile flowFile = new MockFlowFile(1L);
final InFlightMessageTracker tracker = new InFlightMessageTracker(new MockComponentLog("1", "unit-test"));
tracker.incrementSentCount(flowFile);
verifyNotComplete(tracker);
tracker.incrementSentCount(flowFile);
verifyNotComplete(tracker);
tracker.incrementAcknowledgedCount(flowFile);
verifyNotComplete(tracker);
tracker.incrementAcknowledgedCount(flowFile);
tracker.awaitCompletion(1L);
}
@Test
@Timeout(value = 5000, unit = TimeUnit.MILLISECONDS)
public void testAwaitCompletionWhileWaiting() throws InterruptedException, ExecutionException {
final MockFlowFile flowFile = new MockFlowFile(1L);
final InFlightMessageTracker tracker = new InFlightMessageTracker(new MockComponentLog("1", "unit-test"));
tracker.incrementSentCount(flowFile);
verifyNotComplete(tracker);
tracker.incrementSentCount(flowFile);
verifyNotComplete(tracker);
final ExecutorService exec = Executors.newFixedThreadPool(1);
final Future<?> future = exec.submit(() -> {
try {
tracker.awaitCompletion(10000L);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
tracker.incrementAcknowledgedCount(flowFile);
tracker.incrementAcknowledgedCount(flowFile);
future.get();
}
private void verifyNotComplete(final InFlightMessageTracker tracker) {
assertThrows(TimeoutException.class, () -> tracker.awaitCompletion(10L));
}
}

View File

@ -1,441 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.kafka.shared.property.FailureStrategy;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.record.MockRecordWriter;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.RecordSet;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.AdditionalMatchers;
import org.mockito.Mockito;
import org.mockito.stubbing.Answer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.isNull;
import static org.mockito.ArgumentMatchers.nullable;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class TestPublishKafkaRecord_2_0 {
private static final String TOPIC_NAME = "unit-test";
private PublisherPool mockPool;
private PublisherLease mockLease;
private TestRunner runner;
@BeforeEach
public void setup() throws InitializationException, IOException {
mockPool = mock(PublisherPool.class);
mockLease = mock(PublisherLease.class);
Mockito.doCallRealMethod().when(mockLease).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
any(RecordSchema.class), any(String.class), any(String.class), nullable(Function.class));
when(mockPool.obtainPublisher()).thenReturn(mockLease);
runner = TestRunners.newTestRunner(new PublishKafkaRecord_2_0() {
@Override
protected PublisherPool createPublisherPool(final ProcessContext context) {
return mockPool;
}
});
runner.setProperty(PublishKafkaRecord_2_0.TOPIC, TOPIC_NAME);
final String readerId = "record-reader";
final MockRecordParser readerService = new MockRecordParser();
readerService.addSchemaField("name", RecordFieldType.STRING);
readerService.addSchemaField("age", RecordFieldType.INT);
runner.addControllerService(readerId, readerService);
runner.enableControllerService(readerService);
final String writerId = "record-writer";
final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
runner.addControllerService(writerId, writerService);
runner.enableControllerService(writerService);
runner.setProperty(PublishKafkaRecord_2_0.RECORD_READER, readerId);
runner.setProperty(PublishKafkaRecord_2_0.RECORD_WRITER, writerId);
runner.setProperty(PublishKafka_2_0.DELIVERY_GUARANTEE, PublishKafka_2_0.DELIVERY_REPLICATED);
}
@Test
public void testSingleSuccess() throws IOException {
final MockFlowFile flowFile = runner.enqueue("John Doe, 48");
when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFile, 1));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_SUCCESS, 1);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(0)).poison();
verify(mockLease, times(1)).close();
}
@Test
public void testMultipleSuccess() throws IOException {
final Set<FlowFile> flowFiles = new HashSet<>();
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFiles, 1));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_SUCCESS, 3);
verify(mockLease, times(3)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(0)).poison();
verify(mockLease, times(1)).close();
}
@Test
public void testSingleFailure() throws IOException {
final MockFlowFile flowFile = runner.enqueue("John Doe, 48");
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_FAILURE, 1);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(1)).close();
}
@Test
public void testSingleFailureWithRollback() throws IOException {
runner.setProperty(PublishKafkaRecord_2_0.FAILURE_STRATEGY, FailureStrategy.ROLLBACK.getValue());
final MockFlowFile flowFile = runner.enqueue("John Doe, 48");
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_FAILURE, 0);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).close();
assertEquals(1, runner.getQueueSize().getObjectCount());
}
@Test
public void testFailureWhenCreationgTransaction() {
runner.enqueue("John Doe, 48");
doAnswer((Answer<Object>) invocationOnMock -> {
throw new ProducerFencedException("Intentional ProducedFencedException for unit test");
}).when(mockLease).beginTransaction();
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_FAILURE, 1);
verify(mockLease, times(1)).poison();
verify(mockLease, times(1)).close();
}
@Test
public void testFailureWhenCreatingTransactionWithRollback() {
runner.setProperty(PublishKafkaRecord_2_0.FAILURE_STRATEGY, FailureStrategy.ROLLBACK.getValue());
runner.enqueue("John Doe, 48");
doAnswer((Answer<Object>) invocationOnMock -> {
throw new ProducerFencedException("Intentional ProducedFencedException for unit test");
}).when(mockLease).beginTransaction();
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_FAILURE, 0);
verify(mockLease, times(1)).poison();
verify(mockLease, times(1)).close();
assertEquals(1, runner.getQueueSize().getObjectCount());
}
@Test
public void testMultipleFailures() throws IOException {
final Set<FlowFile> flowFiles = new HashSet<>();
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_FAILURE, 3);
verify(mockLease, times(3)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(1)).close();
}
@Test
public void testMultipleFailuresWithRollback() throws IOException {
runner.setProperty(PublishKafkaRecord_2_0.FAILURE_STRATEGY, FailureStrategy.ROLLBACK.getValue());
final Set<FlowFile> flowFiles = new HashSet<>();
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_FAILURE, 0);
verify(mockLease, times(3)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(1)).close();
assertEquals(3, runner.getQueueSize().getObjectCount());
}
@Test
public void testMultipleMessagesPerFlowFile() throws IOException {
final List<FlowFile> flowFiles = new ArrayList<>();
flowFiles.add(runner.enqueue("John Doe, 48\nJane Doe, 47"));
flowFiles.add(runner.enqueue("John Doe, 48\nJane Doe, 29"));
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
msgCounts.put(flowFiles.get(0), 10);
msgCounts.put(flowFiles.get(1), 20);
final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap());
when(mockLease.complete()).thenReturn(result);
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_SUCCESS, 2);
verify(mockLease, times(2)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(0)).publish(
any(FlowFile.class), any(Map.class), eq(null), any(byte[].class), eq(TOPIC_NAME), any(InFlightMessageTracker.class), any(Integer.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(0)).poison();
verify(mockLease, times(1)).close();
runner.assertAllFlowFilesContainAttribute("msg.count");
assertEquals(1, runner.getFlowFilesForRelationship(PublishKafkaRecord_2_0.REL_SUCCESS).stream()
.filter(ff -> ff.getAttribute("msg.count").equals("10"))
.count());
assertEquals(1, runner.getFlowFilesForRelationship(PublishKafkaRecord_2_0.REL_SUCCESS).stream()
.filter(ff -> ff.getAttribute("msg.count").equals("20"))
.count());
}
@Test
public void testNoRecordsInFlowFile() throws IOException {
final List<FlowFile> flowFiles = new ArrayList<>();
flowFiles.add(runner.enqueue(new byte[0]));
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
msgCounts.put(flowFiles.get(0), 0);
final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap());
when(mockLease.complete()).thenReturn(result);
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_SUCCESS, 1);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(0)).poison();
verify(mockLease, times(1)).close();
final MockFlowFile mff = runner.getFlowFilesForRelationship(PublishKafkaRecord_2_0.REL_SUCCESS).get(0);
mff.assertAttributeEquals("msg.count", "0");
}
@Test
public void testRecordPathPartition() throws IOException {
runner.setProperty(PublishKafkaRecord_2_0.PARTITION_CLASS, PublishKafkaRecord_2_0.RECORD_PATH_PARTITIONING);
runner.setProperty(PublishKafkaRecord_2_0.PARTITION, "/age");
final List<FlowFile> flowFiles = new ArrayList<>();
flowFiles.add(runner.enqueue("John Doe, 48\nJane Doe, 48\nJim Doe, 13"));
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
msgCounts.put(flowFiles.get(0), 0);
final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap());
mockLease = mock(PublisherLease.class);
when(mockLease.complete()).thenReturn(result);
when(mockPool.obtainPublisher()).thenReturn(mockLease);
final Map<Integer, List<Integer>> partitionsByAge = new HashMap<>();
doAnswer((Answer<Object>) invocationOnMock -> {
final Function<Record, Integer> partitioner = invocationOnMock.getArgument(6, Function.class);
final RecordSet recordSet = invocationOnMock.getArgument(1, RecordSet.class);
Record record;
while ((record = recordSet.next()) != null) {
final int partition = partitioner.apply(record);
final Integer age = record.getAsInt("age");
partitionsByAge.computeIfAbsent(age, k -> new ArrayList<>()).add(partition);
}
return null;
}).when(mockLease).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
nullable(RecordSchema.class), nullable(String.class), any(String.class), nullable(Function.class));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafkaRecord_2_0.REL_SUCCESS, 1);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
nullable(RecordSchema.class), nullable(String.class), any(String.class), nullable(Function.class));
assertEquals(2, partitionsByAge.size()); // 2 ages
final List<Integer> partitionsForAge13 = partitionsByAge.get(13);
assertEquals(1, partitionsForAge13.size());
final List<Integer> partitionsForAge48 = partitionsByAge.get(48);
assertEquals(2, partitionsForAge48.size());
assertEquals(partitionsForAge48.get(0), partitionsForAge48.get(1));
}
@Test
public void testSomeSuccessSomeFailure() throws IOException {
final List<FlowFile> flowFiles = new ArrayList<>();
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
flowFiles.add(runner.enqueue("John Doe, 48"));
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
msgCounts.put(flowFiles.get(0), 10);
msgCounts.put(flowFiles.get(1), 20);
final Map<FlowFile, Exception> failureMap = new HashMap<>();
failureMap.put(flowFiles.get(2), new RuntimeException("Intentional Unit Test Exception"));
failureMap.put(flowFiles.get(3), new RuntimeException("Intentional Unit Test Exception"));
final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles.subList(0, 2)), failureMap);
when(mockLease.complete()).thenReturn(result);
runner.run();
runner.assertTransferCount(PublishKafkaRecord_2_0.REL_SUCCESS, 0);
runner.assertTransferCount(PublishKafkaRecord_2_0.REL_FAILURE, 4);
verify(mockLease, times(4)).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class),
AdditionalMatchers.or(any(RecordSchema.class), isNull()), eq(null), eq(TOPIC_NAME), nullable(Function.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(1)).close();
assertTrue(runner.getFlowFilesForRelationship(PublishKafkaRecord_2_0.REL_FAILURE).stream()
.noneMatch(ff -> ff.getAttribute("msg.count") != null));
}
private PublishResult createAllSuccessPublishResult(final FlowFile successfulFlowFile, final int msgCount) {
return createAllSuccessPublishResult(Collections.singleton(successfulFlowFile), msgCount);
}
private PublishResult createAllSuccessPublishResult(final Set<FlowFile> successfulFlowFiles, final int msgCountPerFlowFile) {
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
for (final FlowFile ff : successfulFlowFiles) {
msgCounts.put(ff, msgCountPerFlowFile);
}
return createPublishResult(msgCounts, successfulFlowFiles, Collections.emptyMap());
}
private PublishResult createFailurePublishResult(final FlowFile failure) {
return createFailurePublishResult(Collections.singleton(failure));
}
private PublishResult createFailurePublishResult(final Set<FlowFile> failures) {
final Map<FlowFile, Exception> failureMap = failures.stream().collect(Collectors.toMap(ff -> ff, ff -> new RuntimeException("Intentional Unit Test Exception")));
return createPublishResult(Collections.emptyMap(), Collections.emptySet(), failureMap);
}
private PublishResult createPublishResult(final Map<FlowFile, Integer> msgCounts, final Set<FlowFile> successFlowFiles, final Map<FlowFile, Exception> failures) {
// sanity check.
for (final FlowFile success : successFlowFiles) {
if (failures.containsKey(success)) {
throw new IllegalArgumentException("Found same FlowFile in both 'success' and 'failures' collections: " + success);
}
}
return new PublishResult() {
@Override
public int getSuccessfulMessageCount(FlowFile flowFile) {
Integer count = msgCounts.get(flowFile);
return count == null ? 0 : count;
}
@Override
public Exception getReasonForFailure(FlowFile flowFile) {
return failures.get(flowFile);
}
@Override
public boolean isFailure() {
return !failures.isEmpty();
}
};
}
}

View File

@ -1,306 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.kafka.shared.property.FailureStrategy;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.stubbing.Answer;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.nullable;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
public class TestPublishKafka_2_0 {
private static final String TOPIC_NAME = "unit-test";
private PublisherPool mockPool;
private PublisherLease mockLease;
private TestRunner runner;
@BeforeEach
public void setup() {
mockPool = mock(PublisherPool.class);
mockLease = mock(PublisherLease.class);
when(mockPool.obtainPublisher()).thenReturn(mockLease);
runner = TestRunners.newTestRunner(new PublishKafka_2_0() {
@Override
protected PublisherPool createPublisherPool(final ProcessContext context) {
return mockPool;
}
});
runner.setProperty(PublishKafka_2_0.TOPIC, TOPIC_NAME);
runner.setProperty(PublishKafka_2_0.DELIVERY_GUARANTEE, PublishKafka_2_0.DELIVERY_REPLICATED);
}
@Test
public void testSingleSuccess() throws IOException {
final MockFlowFile flowFile = runner.enqueue("hello world");
when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFile, 1));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_SUCCESS, 1);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(0)).poison();
verify(mockLease, times(1)).close();
}
@Test
public void testMultipleSuccess() throws IOException {
final Set<FlowFile> flowFiles = new HashSet<>();
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
when(mockLease.complete()).thenReturn(createAllSuccessPublishResult(flowFiles, 1));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_SUCCESS, 3);
verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(0)).poison();
verify(mockLease, times(1)).close();
}
@Test
public void testPublisherPoisonedIfFencedDuringTransactionCreation() {
runner.enqueue("hello world");
runner.enqueue("Hello World");
doAnswer((Answer<Object>) invocationOnMock -> {
throw new ProducerFencedException("Intentional ProducedFencedException for unit test");
}).when(mockLease).beginTransaction();
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_FAILURE, 2);
verify(mockLease, times(1)).poison();
verify(mockLease, times(1)).close();
}
@Test
public void testSingleFailure() throws IOException {
final MockFlowFile flowFile = runner.enqueue("hello world");
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_FAILURE, 1);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(1)).close();
}
@Test
public void testSingleFailureWithRollback() throws IOException {
runner.setProperty(PublishKafka_2_0.FAILURE_STRATEGY, FailureStrategy.ROLLBACK.getValue());
final MockFlowFile flowFile = runner.enqueue("hello world");
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFile));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_FAILURE, 0);
verify(mockLease, times(1)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).close();
assertEquals(1, runner.getQueueSize().getObjectCount());
}
@Test
public void testMultipleFailures() throws IOException {
final Set<FlowFile> flowFiles = new HashSet<>();
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_FAILURE, 3);
verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(1)).close();
}
@Test
public void testMultipleFailuresWithRollback() throws IOException {
runner.setProperty(PublishKafka_2_0.FAILURE_STRATEGY, FailureStrategy.ROLLBACK.getValue());
final Set<FlowFile> flowFiles = new HashSet<>();
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
when(mockLease.complete()).thenReturn(createFailurePublishResult(flowFiles));
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_FAILURE, 0);
verify(mockLease, times(3)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).close();
assertEquals(3, runner.getQueueSize().getObjectCount());
}
@Test
public void testMultipleMessagesPerFlowFile() throws IOException {
final List<FlowFile> flowFiles = new ArrayList<>();
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
msgCounts.put(flowFiles.get(0), 10);
msgCounts.put(flowFiles.get(1), 20);
final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles), Collections.emptyMap());
when(mockLease.complete()).thenReturn(result);
runner.run();
runner.assertAllFlowFilesTransferred(PublishKafka_2_0.REL_SUCCESS, 2);
verify(mockLease, times(2)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(0)).poison();
verify(mockLease, times(1)).close();
runner.assertAllFlowFilesContainAttribute("msg.count");
assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_2_0.REL_SUCCESS).stream()
.filter(ff -> ff.getAttribute("msg.count").equals("10"))
.count());
assertEquals(1, runner.getFlowFilesForRelationship(PublishKafka_2_0.REL_SUCCESS).stream()
.filter(ff -> ff.getAttribute("msg.count").equals("20"))
.count());
}
@Test
public void testSomeSuccessSomeFailure() throws IOException {
final List<FlowFile> flowFiles = new ArrayList<>();
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
flowFiles.add(runner.enqueue("hello world"));
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
msgCounts.put(flowFiles.get(0), 10);
msgCounts.put(flowFiles.get(1), 20);
final Map<FlowFile, Exception> failureMap = new HashMap<>();
failureMap.put(flowFiles.get(2), new RuntimeException("Intentional Unit Test Exception"));
failureMap.put(flowFiles.get(3), new RuntimeException("Intentional Unit Test Exception"));
final PublishResult result = createPublishResult(msgCounts, new HashSet<>(flowFiles.subList(0, 2)), failureMap);
when(mockLease.complete()).thenReturn(result);
runner.run();
runner.assertTransferCount(PublishKafka_2_0.REL_SUCCESS, 0);
runner.assertTransferCount(PublishKafka_2_0.REL_FAILURE, 4);
verify(mockLease, times(4)).publish(any(FlowFile.class), any(InputStream.class), eq(null), eq(null), eq(TOPIC_NAME), nullable(Integer.class));
verify(mockLease, times(1)).complete();
verify(mockLease, times(1)).close();
assertTrue(runner.getFlowFilesForRelationship(PublishKafka_2_0.REL_FAILURE).stream()
.noneMatch(ff -> ff.getAttribute("msg.count") != null));
}
private PublishResult createAllSuccessPublishResult(final FlowFile successfulFlowFile, final int msgCount) {
return createAllSuccessPublishResult(Collections.singleton(successfulFlowFile), msgCount);
}
private PublishResult createAllSuccessPublishResult(final Set<FlowFile> successfulFlowFiles, final int msgCountPerFlowFile) {
final Map<FlowFile, Integer> msgCounts = new HashMap<>();
for (final FlowFile ff : successfulFlowFiles) {
msgCounts.put(ff, msgCountPerFlowFile);
}
return createPublishResult(msgCounts, successfulFlowFiles, Collections.emptyMap());
}
private PublishResult createFailurePublishResult(final FlowFile failure) {
return createFailurePublishResult(Collections.singleton(failure));
}
private PublishResult createFailurePublishResult(final Set<FlowFile> failures) {
final Map<FlowFile, Exception> failureMap = failures.stream().collect(Collectors.toMap(ff -> ff, ff -> new RuntimeException("Intentional Unit Test Exception")));
return createPublishResult(Collections.emptyMap(), Collections.emptySet(), failureMap);
}
private PublishResult createPublishResult(final Map<FlowFile, Integer> msgCounts, final Set<FlowFile> successFlowFiles, final Map<FlowFile, Exception> failures) {
// sanity check.
for (final FlowFile success : successFlowFiles) {
if (failures.containsKey(success)) {
throw new IllegalArgumentException("Found same FlowFile in both 'success' and 'failures' collections: " + success);
}
}
return new PublishResult() {
@Override
public boolean isFailure() {
return !failures.isEmpty();
}
@Override
public int getSuccessfulMessageCount(FlowFile flowFile) {
Integer count = msgCounts.get(flowFile);
return count == null ? 0 : count;
}
@Override
public Exception getReasonForFailure(FlowFile flowFile) {
return failures.get(flowFile);
}
};
}
}

View File

@ -1,275 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser;
import org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.serialization.MalformedRecordException;
import org.apache.nifi.serialization.RecordReader;
import org.apache.nifi.serialization.RecordSetWriter;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.WriteResult;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.RecordSet;
import org.apache.nifi.util.MockFlowFile;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import org.mockito.stubbing.Answer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
public class TestPublisherLease {
private ComponentLog logger;
private Producer<byte[], byte[]> producer;
@BeforeEach
@SuppressWarnings("unchecked")
public void setup() {
logger = Mockito.mock(ComponentLog.class);
producer = Mockito.mock(Producer.class);
}
@Test
public void testPoisonOnException() {
final PoisonCountingLease lease = new PoisonCountingLease();
final FlowFile flowFile = Mockito.spy(new MockFlowFile(1L));
// Need a size grater than zero to make the lease reads the InputStream.
Mockito.when(flowFile.getSize()).thenReturn(1L);
final String topic = "unit-test";
final byte[] messageKey = null;
final byte[] demarcatorBytes = null;
final InputStream failureInputStream = new InputStream() {
@Override
public int read() throws IOException {
throw new IOException("Intentional Unit Test Exception");
}
};
assertThrows(IOException.class, () -> lease.publish(flowFile, failureInputStream, messageKey, demarcatorBytes, topic, null));
assertEquals(1, lease.getPoisonCount());
final PublishResult result = lease.complete();
assertTrue(result.isFailure());
}
@Test
public void testPoisonOnExceptionCreatingTransaction() {
final PoisonCountingLease lease = new PoisonCountingLease();
final FlowFile flowFile = Mockito.spy(new MockFlowFile(1L));
// Need a size grater than zero to make the lease reads the InputStream.
Mockito.when(flowFile.getSize()).thenReturn(1L);
doAnswer((Answer<Object>) invocationOnMock -> {
throw new ProducerFencedException("Intenitional exception thrown from unit test");
}).when(producer).beginTransaction();
assertThrows(ProducerFencedException.class, () -> lease.beginTransaction());
assertEquals(1, lease.getPoisonCount());
}
@Test
@SuppressWarnings("unchecked")
public void testPoisonOnFailure() throws IOException {
final PoisonCountingLease lease = new PoisonCountingLease();
final FlowFile flowFile = new MockFlowFile(1L);
final String topic = "unit-test";
final byte[] messageKey = null;
final byte[] demarcatorBytes = null;
doAnswer((Answer<Object>) invocation -> {
final Callback callback = invocation.getArgument(1);
callback.onCompletion(null, new RuntimeException("Unit Test Intentional Exception"));
return null;
}).when(producer).send(any(ProducerRecord.class), any(Callback.class));
lease.publish(flowFile, new ByteArrayInputStream(new byte[1]), messageKey, demarcatorBytes, topic, null);
assertEquals(1, lease.getPoisonCount());
final PublishResult result = lease.complete();
assertTrue(result.isFailure());
}
@Test
@SuppressWarnings("unchecked")
public void testAllDelimitedMessagesSent() throws IOException {
final PoisonCountingLease lease = new PoisonCountingLease();
final AtomicInteger correctMessages = new AtomicInteger(0);
final AtomicInteger incorrectMessages = new AtomicInteger(0);
doAnswer((Answer<Object>) invocation -> {
final ProducerRecord<byte[], byte[]> record = invocation.getArgument(0);
final byte[] value = record.value();
final String valueString = new String(value, StandardCharsets.UTF_8);
if ("1234567890".equals(valueString)) {
correctMessages.incrementAndGet();
} else {
incorrectMessages.incrementAndGet();
}
return null;
}).when(producer).send(any(ProducerRecord.class), any(Callback.class));
final FlowFile flowFile = new MockFlowFile(1L);
final String topic = "unit-test";
final byte[] messageKey = null;
final byte[] demarcatorBytes = "\n".getBytes(StandardCharsets.UTF_8);
final byte[] flowFileContent = "1234567890\n1234567890\n1234567890\n\n\n\n1234567890\n\n\n1234567890\n\n\n\n".getBytes(StandardCharsets.UTF_8);
lease.publish(flowFile, new ByteArrayInputStream(flowFileContent), messageKey, demarcatorBytes, topic, null);
final byte[] flowFileContent2 = new byte[0];
lease.publish(new MockFlowFile(2L), new ByteArrayInputStream(flowFileContent2), messageKey, demarcatorBytes, topic, null);
final byte[] flowFileContent3 = "1234567890\n1234567890".getBytes(StandardCharsets.UTF_8); // no trailing new line
lease.publish(new MockFlowFile(3L), new ByteArrayInputStream(flowFileContent3), messageKey, demarcatorBytes, topic, null);
final byte[] flowFileContent4 = "\n\n\n".getBytes(StandardCharsets.UTF_8);
lease.publish(new MockFlowFile(4L), new ByteArrayInputStream(flowFileContent4), messageKey, demarcatorBytes, topic, null);
assertEquals(0, lease.getPoisonCount());
verify(producer, times(0)).flush();
final PublishResult result = lease.complete();
assertTrue(result.isFailure());
assertEquals(7, correctMessages.get());
assertEquals(0, incorrectMessages.get());
verify(producer, times(1)).flush();
}
@Test
@SuppressWarnings("unchecked")
public void testZeroByteMessageSent() throws IOException {
final PoisonCountingLease lease = new PoisonCountingLease();
final AtomicInteger correctMessages = new AtomicInteger(0);
final AtomicInteger incorrectMessages = new AtomicInteger(0);
doAnswer((Answer<Object>) invocation -> {
final ProducerRecord<byte[], byte[]> record = invocation.getArgument(0);
final byte[] value = record.value();
final String valueString = new String(value, StandardCharsets.UTF_8);
if ("".equals(valueString)) {
correctMessages.incrementAndGet();
} else {
incorrectMessages.incrementAndGet();
}
return null;
}).when(producer).send(any(ProducerRecord.class), any(Callback.class));
final FlowFile flowFile = new MockFlowFile(1L);
final String topic = "unit-test";
final byte[] messageKey = null;
final byte[] demarcatorBytes = null;
final byte[] flowFileContent = new byte[0];
lease.publish(flowFile, new ByteArrayInputStream(flowFileContent), messageKey, demarcatorBytes, topic, null);
assertEquals(0, lease.getPoisonCount());
verify(producer, times(0)).flush();
lease.complete();
assertEquals(1, correctMessages.get());
assertEquals(0, incorrectMessages.get());
verify(producer, times(1)).flush();
}
@Test
public void testRecordsSentToRecordWriterAndThenToProducer() throws IOException, SchemaNotFoundException, MalformedRecordException {
final PoisonCountingLease lease = new PoisonCountingLease();
final FlowFile flowFile = new MockFlowFile(1L);
final byte[] exampleInput = "101, John Doe, 48\n102, Jane Doe, 47".getBytes(StandardCharsets.UTF_8);
final MockRecordParser readerService = new MockRecordParser();
readerService.addSchemaField("person_id", RecordFieldType.LONG);
readerService.addSchemaField("name", RecordFieldType.STRING);
readerService.addSchemaField("age", RecordFieldType.INT);
final RecordReader reader = readerService.createRecordReader(Collections.emptyMap(), new ByteArrayInputStream(exampleInput), -1, logger);
final RecordSet recordSet = reader.createRecordSet();
final RecordSchema schema = reader.getSchema();
final String topic = "unit-test";
final String keyField = "person_id";
final RecordSetWriterFactory writerFactory = Mockito.mock(RecordSetWriterFactory.class);
final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class);
Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap()));
Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any(), eq(flowFile))).thenReturn(writer);
lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic, null);
verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any(), eq(flowFile));
verify(writer, times(2)).write(any(Record.class));
verify(producer, times(2)).send(any(), any());
assertEquals(0, lease.getPoisonCount());
}
private class PoisonCountingLease extends PublisherLease {
private final AtomicInteger poisonCount = new AtomicInteger(0);
public PoisonCountingLease() {
super(producer, 1024 * 1024, 1000L, logger, true, null, StandardCharsets.UTF_8);
}
@Override
public void poison() {
poisonCount.incrementAndGet();
super.poison();
}
public int getPoisonCount() {
return poisonCount.get();
}
}
}

View File

@ -1,68 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.apache.nifi.logging.ComponentLog;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class TestPublisherPool {
@Test
public void testLeaseCloseReturnsToPool() {
final Map<String, Object> kafkaProperties = new HashMap<>();
kafkaProperties.put("bootstrap.servers", "localhost:1111");
kafkaProperties.put("key.serializer", ByteArraySerializer.class.getName());
kafkaProperties.put("value.serializer", ByteArraySerializer.class.getName());
final PublisherPool pool = new PublisherPool(kafkaProperties, Mockito.mock(ComponentLog.class), 1024 * 1024, 1000L, false, null, null, StandardCharsets.UTF_8);
assertEquals(0, pool.available());
final PublisherLease lease = pool.obtainPublisher();
assertEquals(0, pool.available());
lease.close();
assertEquals(1, pool.available());
}
@Test
public void testPoisonedLeaseNotReturnedToPool() {
final Map<String, Object> kafkaProperties = new HashMap<>();
kafkaProperties.put("bootstrap.servers", "localhost:1111");
kafkaProperties.put("key.serializer", ByteArraySerializer.class.getName());
kafkaProperties.put("value.serializer", ByteArraySerializer.class.getName());
final PublisherPool pool = new PublisherPool(kafkaProperties, Mockito.mock(ComponentLog.class), 1024 * 1024, 1000L, false, null, null, StandardCharsets.UTF_8);
assertEquals(0, pool.available());
final PublisherLease lease = pool.obtainPublisher();
assertEquals(0, pool.available());
lease.poison();
lease.close();
assertEquals(0, pool.available());
}
}

View File

@ -1,105 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.kafka.pubsub.util;
import org.apache.nifi.controller.AbstractControllerService;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.serialization.MalformedRecordException;
import org.apache.nifi.serialization.RecordReader;
import org.apache.nifi.serialization.RecordReaderFactory;
import org.apache.nifi.serialization.SchemaValidationException;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.MapRecord;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class MockRecordParser extends AbstractControllerService implements RecordReaderFactory {
private final List<Object[]> records = new ArrayList<>();
private final List<RecordField> fields = new ArrayList<>();
private final int failAfterN;
public MockRecordParser() {
this(-1);
}
public MockRecordParser(final int failAfterN) {
this.failAfterN = failAfterN;
}
public void addSchemaField(final String fieldName, final RecordFieldType type) {
fields.add(new RecordField(fieldName, type.getDataType()));
}
public void addRecord(Object... values) {
records.add(values);
}
@Override
public RecordReader createRecordReader(Map<String, String> variables, InputStream in, long inputLength, ComponentLog logger) throws IOException, SchemaNotFoundException {
final BufferedReader reader = new BufferedReader(new InputStreamReader(in));
return new RecordReader() {
private int recordCount = 0;
@Override
public void close() throws IOException {
}
@Override
public Record nextRecord(boolean coerceTypes, boolean dropUnknown) throws IOException, MalformedRecordException, SchemaValidationException {
if (failAfterN >= recordCount) {
throw new MalformedRecordException("Intentional Unit Test Exception because " + recordCount + " records have been read");
}
final String line = reader.readLine();
if (line == null) {
return null;
}
recordCount++;
final String[] values = line.split(",");
final Map<String, Object> valueMap = new HashMap<>();
int i = 0;
for (final RecordField field : fields) {
final String fieldName = field.getFieldName();
valueMap.put(fieldName, values[i++].trim());
}
return new MapRecord(new SimpleRecordSchema(fields), valueMap);
}
@Override
public RecordSchema getSchema() {
return new SimpleRecordSchema(fields);
}
};
}
}

View File

@ -1,211 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.record.sink.kafka;
import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.nifi.attribute.expression.language.StandardPropertyValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyValue;
import org.apache.nifi.components.state.StateManager;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.controller.ControllerServiceInitializationContext;
import org.apache.nifi.kafka.shared.property.SecurityProtocol;
import org.apache.nifi.kerberos.KerberosCredentialsService;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.DataUnit;
import org.apache.nifi.record.sink.RecordSinkService;
import org.apache.nifi.reporting.InitializationException;
import org.apache.nifi.serialization.RecordSetWriterFactory;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.ListRecordSet;
import org.apache.nifi.serialization.record.MapRecord;
import org.apache.nifi.serialization.record.MockRecordWriter;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.RecordSet;
import org.apache.nifi.ssl.SSLContextService;
import org.apache.nifi.state.MockStateManager;
import org.apache.nifi.util.MockControllerServiceInitializationContext;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentMatcher;
import org.mockito.Mockito;
import org.mockito.stubbing.Answer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class TestKafkaRecordSink_2_0 {
private static final String TOPIC_NAME = "unit-test";
@Test
public void testRecordFormat() throws IOException, InitializationException {
MockKafkaRecordSink_2_0 task = initTask();
List<RecordField> recordFields = Arrays.asList(
new RecordField("field1", RecordFieldType.INT.getDataType()),
new RecordField("field2", RecordFieldType.STRING.getDataType())
);
RecordSchema recordSchema = new SimpleRecordSchema(recordFields);
Map<String, Object> row1 = new HashMap<>();
row1.put("field1", 15);
row1.put("field2", "Hello");
Map<String, Object> row2 = new HashMap<>();
row2.put("field1", 6);
row2.put("field2", "World!");
RecordSet recordSet = new ListRecordSet(recordSchema, Arrays.asList(
new MapRecord(recordSchema, row1),
new MapRecord(recordSchema, row2)
));
task.sendData(recordSet, new HashMap<>(), true);
assertEquals(1, task.dataSent.size());
String[] lines = new String(task.dataSent.get(0)).split("\n");
assertNotNull(lines);
assertEquals(2, lines.length);
String[] data = lines[0].split(",");
assertEquals("15", data[0]); // In the MockRecordWriter all values are strings
assertEquals("Hello", data[1]);
data = lines[1].split(",");
assertEquals("6", data[0]);
assertEquals("World!", data[1]);
}
private MockKafkaRecordSink_2_0 initTask() throws InitializationException {
final ComponentLog logger = mock(ComponentLog.class);
final MockKafkaRecordSink_2_0 task = new MockKafkaRecordSink_2_0();
ConfigurationContext context = mock(ConfigurationContext.class);
final StateManager stateManager = new MockStateManager(task);
final PropertyValue topicValue = Mockito.mock(StandardPropertyValue.class);
when(topicValue.evaluateAttributeExpressions()).thenReturn(topicValue);
when(topicValue.getValue()).thenReturn(TOPIC_NAME);
when(context.getProperty(KafkaRecordSink_2_0.TOPIC)).thenReturn(topicValue);
final PropertyValue deliveryValue = Mockito.mock(StandardPropertyValue.class);
when(deliveryValue.getValue()).thenReturn(KafkaRecordSink_2_0.DELIVERY_REPLICATED.getValue());
when(context.getProperty(KafkaRecordSink_2_0.DELIVERY_GUARANTEE)).thenReturn(deliveryValue);
final PropertyValue maxSizeValue = Mockito.mock(StandardPropertyValue.class);
when(maxSizeValue.asDataSize(DataUnit.B)).thenReturn(1024.0);
when(context.getProperty(KafkaRecordSink_2_0.MAX_REQUEST_SIZE)).thenReturn(maxSizeValue);
final PropertyValue maxAckWaitValue = Mockito.mock(StandardPropertyValue.class);
when(maxAckWaitValue.asTimePeriod(TimeUnit.MILLISECONDS)).thenReturn(5000L);
when(context.getProperty(KafkaRecordSink_2_0.ACK_WAIT_TIME)).thenReturn(maxAckWaitValue);
final PropertyValue charEncodingValue = Mockito.mock(StandardPropertyValue.class);
when(charEncodingValue.evaluateAttributeExpressions()).thenReturn(charEncodingValue);
when(charEncodingValue.getValue()).thenReturn("UTF-8");
when(context.getProperty(KafkaRecordSink_2_0.MESSAGE_HEADER_ENCODING)).thenReturn(charEncodingValue);
final PropertyValue securityValue = Mockito.mock(StandardPropertyValue.class);
when(securityValue.getValue()).thenReturn(SecurityProtocol.PLAINTEXT.name());
when(context.getProperty(KafkaRecordSink_2_0.SECURITY_PROTOCOL)).thenReturn(securityValue);
final PropertyValue jaasValue = Mockito.mock(StandardPropertyValue.class);
when(jaasValue.evaluateAttributeExpressions()).thenReturn(jaasValue);
when(jaasValue.getValue()).thenReturn(null);
when(context.getProperty(KafkaRecordSink_2_0.KERBEROS_SERVICE_NAME)).thenReturn(jaasValue);
Map<PropertyDescriptor, String> propertyMap = new HashMap<>();
propertyMap.put(KafkaRecordSink_2_0.TOPIC, KafkaRecordSink_2_0.TOPIC.getName());
propertyMap.put(KafkaRecordSink_2_0.DELIVERY_GUARANTEE, KafkaRecordSink_2_0.DELIVERY_GUARANTEE.getName());
propertyMap.put(KafkaRecordSink_2_0.MAX_REQUEST_SIZE, KafkaRecordSink_2_0.MAX_REQUEST_SIZE.getName());
propertyMap.put(KafkaRecordSink_2_0.ACK_WAIT_TIME, KafkaRecordSink_2_0.ACK_WAIT_TIME.getName());
propertyMap.put(KafkaRecordSink_2_0.MESSAGE_HEADER_ENCODING, KafkaRecordSink_2_0.MESSAGE_HEADER_ENCODING.getName());
when(context.getProperties()).thenReturn(propertyMap);
final PropertyValue pValue = Mockito.mock(StandardPropertyValue.class);
// No header, don't quote values
MockRecordWriter writer = new MockRecordWriter(null, false);
when(context.getProperty(RecordSinkService.RECORD_WRITER_FACTORY)).thenReturn(pValue);
when(pValue.asControllerService(RecordSetWriterFactory.class)).thenReturn(writer);
when(context.getProperty(KafkaRecordSink_2_0.SSL_CONTEXT_SERVICE)).thenReturn(pValue);
when(pValue.asControllerService(SSLContextService.class)).thenReturn(null);
when(context.getProperty(KafkaRecordSink_2_0.KERBEROS_CREDENTIALS_SERVICE)).thenReturn(pValue);
when(pValue.asControllerService(KerberosCredentialsService.class)).thenReturn(null);
final ControllerServiceInitializationContext initContext = new MockControllerServiceInitializationContext(task, UUID.randomUUID().toString(), logger, stateManager);
task.initialize(initContext);
task.onEnabled(context);
return task;
}
private static class MockKafkaRecordSink_2_0 extends KafkaRecordSink_2_0 {
final List<byte[]> dataSent = new ArrayList<>();
@SuppressWarnings("unchecked")
@Override
protected Producer<byte[], byte[]> createProducer(Map<String, Object> kafkaProperties) {
final Producer<byte[], byte[]> mockProducer = (Producer<byte[], byte[]>) mock(Producer.class);
when(mockProducer.send(Mockito.argThat(new ByteProducerRecordMatcher()), any(Callback.class))).then(
(Answer<Future<RecordMetadata>>) invocationOnMock -> {
ProducerRecord<byte[], byte[]> producerRecord = invocationOnMock.getArgument(0);
final byte[] data = producerRecord.value();
dataSent.add(data);
Callback callback = invocationOnMock.getArgument(1);
RecordMetadata recordMetadata = new RecordMetadata(
new TopicPartition(producerRecord.topic(), producerRecord.partition() != null ? producerRecord.partition() : 0),
0,
data.length,
producerRecord.timestamp() != null ? producerRecord.timestamp() : System.currentTimeMillis(),
0L,
producerRecord.key() != null ? producerRecord.key().length : 0,
data.length);
callback.onCompletion(recordMetadata, null);
return new FutureTask(() -> {}, recordMetadata);
});
return mockProducer;
}
}
private static class ByteProducerRecordMatcher implements ArgumentMatcher<ProducerRecord<byte[], byte[]>> {
@Override
public boolean matches(ProducerRecord<byte[], byte[]> producer) {
return true;
}
}
}

View File

@ -1,121 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# see kafka.server.KafkaConfig for additional details and defaults
############################# Server Basics #############################
# The id of the broker. This must be set to a unique integer for each broker.
broker.id=0
############################# Socket Server Settings #############################
# The port the socket server listens on
#port=9092
# Hostname the broker will bind to. If not set, the server will bind to all interfaces
#host.name=localhost
# Hostname the broker will advertise to producers and consumers. If not set, it uses the
# value for "host.name" if configured. Otherwise, it will use the value returned from
# java.net.InetAddress.getCanonicalHostName().
#advertised.host.name=<hostname routable by clients>
# The port to publish to ZooKeeper for clients to use. If this is not set,
# it will publish the same port that the broker binds to.
#advertised.port=<port accessible by clients>
# The number of threads handling network requests
num.network.threads=3
# The number of threads doing disk I/O
num.io.threads=8
# The send buffer (SO_SNDBUF) used by the socket server
socket.send.buffer.bytes=102400
# The receive buffer (SO_RCVBUF) used by the socket server
socket.receive.buffer.bytes=102400
# The maximum size of a request that the socket server will accept (protection against OOM)
socket.request.max.bytes=104857600
############################# Log Basics #############################
# A comma seperated list of directories under which to store log files
log.dirs=target/kafka-tmp/kafka-logs
# The default number of log partitions per topic. More partitions allow greater
# parallelism for consumption, but this will also result in more files across
# the brokers.
num.partitions=1
# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
# This value is recommended to be increased for installations with data dirs located in RAID array.
num.recovery.threads.per.data.dir=1
############################# Log Flush Policy #############################
# Messages are immediately written to the filesystem but by default we only fsync() to sync
# the OS cache lazily. The following configurations control the flush of data to disk.
# There are a few important trade-offs here:
# 1. Durability: Unflushed data may be lost if you are not using replication.
# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
# The settings below allow one to configure the flush policy to flush data after a period of time or
# every N messages (or both). This can be done globally and overridden on a per-topic basis.
# The number of messages to accept before forcing a flush of data to disk
#log.flush.interval.messages=10000
# The maximum amount of time a message can sit in a log before we force a flush
#log.flush.interval.ms=1000
############################# Log Retention Policy #############################
# The following configurations control the disposal of log segments. The policy can
# be set to delete segments after a period of time, or after a given size has accumulated.
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
# from the end of the log.
# The minimum age of a log file to be eligible for deletion
log.retention.hours=168
# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
# segments don't drop below log.retention.bytes.
#log.retention.bytes=1073741824
# The maximum size of a log segment file. When this size is reached a new log segment will be created.
log.segment.bytes=1073741824
# The interval at which log segments are checked to see if they can be deleted according
# to the retention policies
log.retention.check.interval.ms=300000
# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
log.cleaner.enable=false
############################# Zookeeper #############################
# Zookeeper connection string (see zookeeper docs for details).
# This is a comma separated host:port pairs, each corresponding to a zk
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
# You can also append an optional chroot string to the urls to specify the
# root directory for all kafka znodes.
zookeeper.connect=localhost:2181
# Timeout in ms for connecting to zookeeper
zookeeper.connection.timeout.ms=6000

View File

@ -1,20 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# the directory where the snapshot is stored.
dataDir=target/kafka-tmp/zookeeper
# the port at which the clients will connect
#clientPort=2181
# disable the per-ip limit on the number of connections since this is a non-production config
maxClientCnxns=0

View File

@ -29,9 +29,7 @@
</properties>
<modules>
<module>nifi-kafka-2-0-processors</module>
<module>nifi-kafka-2-6-processors</module>
<module>nifi-kafka-2-0-nar</module>
<module>nifi-kafka-2-6-nar</module>
<module>nifi-kafka-shared</module>
</modules>