NIFI-3709: Export NiFi flow dataset lineage to Apache Atlas

This commit is contained in:
Koji Kawamura 2017-10-30 12:41:27 +09:00 committed by Mark Payne
parent 44a3ac9eff
commit fc73c60924
103 changed files with 20345 additions and 0 deletions

View File

@ -553,6 +553,12 @@ language governing permissions and limitations under the License. -->
<version>1.5.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-atlas-nar</artifactId>
<version>1.5.0-SNAPSHOT</version>
<type>nar</type>
</dependency>
</dependencies>
<profiles>
<profile>

View File

@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-atlas-bundle</artifactId>
<version>1.5.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-atlas-nar</artifactId>
<packaging>nar</packaging>
<properties>
<maven.javadoc.skip>true</maven.javadoc.skip>
<source.skip>true</source.skip>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-atlas-reporting-task</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-standard-services-api-nar</artifactId>
<type>nar</type>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,364 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
APACHE NIFI SUBCOMPONENTS:
The Apache NiFi project contains subcomponents with separate copyright
notices and license terms. Your use of the source code for the these
subcomponents is subject to the terms and conditions of the following
licenses.
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
under an MIT style license.
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
This product bundles 'asm' which is available under a 3-Clause BSD style license.
For details see http://asm.ow2.org/asmdex-license.html
Copyright (c) 2012 France Télécom
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holders nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
The binary distribution of this product bundles 'JCraft Jsch' which is available
under a BSD style license.
Copyright (c) 2002-2015 Atsuhiko Yamanaka, JCraft,Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
3. The names of the authors may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT,
INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The binary distribution of this product bundles 'Scala Library' under a BSD
style license.
Copyright (c) 2002-2015 EPFL
Copyright (c) 2011-2015 Typesafe, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
Neither the name of the EPFL nor the names of its contributors may be used to endorse
or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The binary distribution of this product bundles 'The JSR-305 reference implementation'
which is licensed under a BSD license.
The JSR-305 reference implementation (lib/jsr305.jar) is
distributed under the terms of the New BSD license:
http://www.opensource.org/licenses/bsd-license.php
See the JSR-305 home page for more information:
http://code.google.com/p/jsr-305/
The binary distribution of this product bundles 'ParaNamer' and 'Paranamer Core'
which is available under a BSD style license.
Copyright (c) 2006 Paul Hammant & ThoughtWorks Inc
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holders nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,516 @@
nifi-atlas-nar
Copyright 2014-2017 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
===========================================
Apache Software License v2
===========================================
The following binary components are provided under the Apache Software License v2
(ASLv2) Apache Directory Server
The following NOTICE information applies:
ApacheDS Protocol Kerberos Codec
Copyright 2003-2013 The Apache Software Foundation
ApacheDS I18n
Copyright 2003-2013 The Apache Software Foundation
Apache Directory API ASN.1 API
Copyright 2003-2013 The Apache Software Foundation
Apache Directory LDAP API Utilities
Copyright 2003-2013 The Apache Software Foundation
(ASLv2) Apache Atlas (incubating)
The following NOTICE information applies:
Apache Atlas (incubating)
Copyright [2015-2017] The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
==============================================================
This product bundles titan 0.5.4(https://github.com/thinkaurelius/titan/blob/titan05):
==============================================================
Titan: Distributed Graph Database
Copyright 2012 and onwards Aurelius
==============================================================
Titan includes software developed by Aurelius (http://thinkaurelius.com/) and the following individuals:
* Matthias Broecheler
* Dan LaRocque
* Marko A. Rodriguez
* Stephen Mallette
* Pavel Yaskevich
(ASLv2) Apache Avro
The following NOTICE information applies:
Apache Avro
Copyright 2009-2017 The Apache Software Foundation
(ASLv2) Apache Commons BeanUtils
The following NOTICE information applies:
Apache Commons BeanUtils
Copyright 2000-2008 The Apache Software Foundation
(ASLv2) Apache Commons-CLI
The following NOTICE information applies:
Apache Commons CLI
Copyright 2001-2017 The Apache Software Foundation
(ASLv2) Apache Commons Compress
The following NOTICE information applies:
Apache Commons Compress
Copyright 2002-2017 The Apache Software Foundation
The files in the package org.apache.commons.compress.archivers.sevenz
were derived from the LZMA SDK, version 9.20 (C/ and CPP/7zip/),
which has been placed in the public domain:
"LZMA SDK is placed in the public domain." (http://www.7-zip.org/sdk.html)
(ASLv2) Apache Jakarta Commons Digester
The following NOTICE information applies:
Apache Jakarta Commons Digester
Copyright 2001-2006 The Apache Software Foundation
(ASLv2) Apache Commons Codec
The following NOTICE information applies:
Apache Commons Codec
Copyright 2002-2014 The Apache Software Foundation
src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
contains test data from http://aspell.net/test/orig/batch0.tab.
Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
===============================================================================
The content of package org.apache.commons.codec.language.bm has been translated
from the original php source code available at http://stevemorse.org/phoneticinfo.htm
with permission from the original authors.
Original source copyright:
Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
(ASLv2) Apache Commons Collections
The following NOTICE information applies:
Apache Commons Collections
Copyright 2001-2015 The Apache Software Foundation
(ASLv2) Apache Commons Configuration
The following NOTICE information applies:
Apache Commons Configuration
Copyright 2001-2013 The Apache Software Foundation
(ASLv2) Apache Commons IO
The following NOTICE information applies:
Apache Commons IO
Copyright 2002-2017 The Apache Software Foundation
(ASLv2) Apache Commons Lang
The following NOTICE information applies:
Apache Commons Lang
Copyright 2001-2017 The Apache Software Foundation
This product includes software from the Spring Framework,
under the Apache License 2.0 (see: StringUtils.containsWhitespace())
(ASLv2) Apache Commons Logging
The following NOTICE information applies:
Apache Commons Logging
Copyright 2003-2014 The Apache Software Foundation
(ASLv2) Apache Commons Math
The following NOTICE information applies:
Apache Commons Math
Copyright 2001-2012 The Apache Software Foundation
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).
===============================================================================
The BracketFinder (package org.apache.commons.math3.optimization.univariate)
and PowellOptimizer (package org.apache.commons.math3.optimization.general)
classes are based on the Python code in module "optimize.py" (version 0.5)
developed by Travis E. Oliphant for the SciPy library (http://www.scipy.org/)
Copyright © 2003-2009 SciPy Developers.
===============================================================================
The LinearConstraint, LinearObjectiveFunction, LinearOptimizer,
RelationShip, SimplexSolver and SimplexTableau classes in package
org.apache.commons.math3.optimization.linear include software developed by
Benjamin McCann (http://www.benmccann.com) and distributed with
the following copyright: Copyright 2009 Google Inc.
===============================================================================
This product includes software developed by the
University of Chicago, as Operator of Argonne National
Laboratory.
The LevenbergMarquardtOptimizer class in package
org.apache.commons.math3.optimization.general includes software
translated from the lmder, lmpar and qrsolv Fortran routines
from the Minpack package
Minpack Copyright Notice (1999) University of Chicago. All rights reserved
===============================================================================
The GraggBulirschStoerIntegrator class in package
org.apache.commons.math3.ode.nonstiff includes software translated
from the odex Fortran routine developed by E. Hairer and G. Wanner.
Original source copyright:
Copyright (c) 2004, Ernst Hairer
===============================================================================
The EigenDecompositionImpl class in package
org.apache.commons.math3.linear includes software translated
from some LAPACK Fortran routines. Original source copyright:
Copyright (c) 1992-2008 The University of Tennessee. All rights reserved.
===============================================================================
The MersenneTwister class in package org.apache.commons.math3.random
includes software translated from the 2002-01-26 version of
the Mersenne-Twister generator written in C by Makoto Matsumoto and Takuji
Nishimura. Original source copyright:
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved
===============================================================================
The LocalizedFormatsTest class in the unit tests is an adapted version of
the OrekitMessagesTest class from the orekit library distributed under the
terms of the Apache 2 licence. Original source copyright:
Copyright 2010 CS Systèmes d'Information
===============================================================================
The HermiteInterpolator class and its corresponding test have been imported from
the orekit library distributed under the terms of the Apache 2 licence. Original
source copyright:
Copyright 2010-2012 CS Systèmes d'Information
===============================================================================
The creation of the package "o.a.c.m.analysis.integration.gauss" was inspired
by an original code donated by Sébastien Brisard.
===============================================================================
(ASLv2) Apache Commons Net
The following NOTICE information applies:
Apache Commons Net
Copyright 2001-2013 The Apache Software Foundation
(ASLv2) Apache Curator
The following NOTICE information applies:
Curator Framework
Copyright 2011-2014 The Apache Software Foundation
Curator Client
Copyright 2011-2014 The Apache Software Foundation
Curator Recipes
Copyright 2011-2014 The Apache Software Foundation
(ASLv2) Google GSON
The following NOTICE information applies:
Copyright 2008 Google Inc.
(ASLv2) Guava
The following NOTICE information applies:
Guava
Copyright 2015 The Guava Authors
(ASLv2) Apache Hadoop
The following NOTICE information applies:
The binary distribution of this product bundles binaries of
org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the
following notices:
* Copyright 2011 Dain Sundstrom <dain@iq80.com>
* Copyright 2011 FuseSource Corp. http://fusesource.com
The binary distribution of this product bundles binaries of
org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni),
which has the following notices:
* This product includes software developed by FuseSource Corp.
http://fusesource.com
* This product includes software developed at
Progress Software Corporation and/or its subsidiaries or affiliates.
* This product includes software developed by IBM Corporation and others.
(ASLv2) Apache HTrace Core
The following NOTICE information applies:
Copyright 2016 The Apache Software Foundation
Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin
is a distributed tracing system that is Apache 2.0 Licensed.
Copyright 2012 Twitter, Inc.
(ASLv2) Apache HttpComponents
The following NOTICE information applies:
Apache HttpClient
Copyright 1999-2015 The Apache Software Foundation
Apache HttpCore
Copyright 2005-2015 The Apache Software Foundation
This project contains annotations derived from JCIP-ANNOTATIONS
Copyright (c) 2005 Brian Goetz and Tim Peierls. See http://www.jcip.net
(ASLv2) Jackson Core ASL
The following NOTICE information applies:
This product currently only contains code developed by authors
of specific components, as identified by the source code files;
if such notes are missing files have been created by
Tatu Saloranta.
For additional credits (generally to people who reported problems)
see CREDITS file.
(ASLv2) Jackson Jaxrs
The following NOTICE information applies:
This product currently only contains code developed by authors
of specific components, as identified by the source code files;
if such notes are missing files have been created by
Tatu Saloranta.
For additional credits (generally to people who reported problems)
see CREDITS file.
(ASLv2) Jackson Mapper ASL
The following NOTICE information applies:
This product currently only contains code developed by authors
of specific components, as identified by the source code files;
if such notes are missing files have been created by
Tatu Saloranta.
For additional credits (generally to people who reported problems)
see CREDITS file.
(ASLv2) Jackson Xc
The following NOTICE information applies:
This product currently only contains code developed by authors
of specific components, as identified by the source code files;
if such notes are missing files have been created by
Tatu Saloranta.
For additional credits (generally to people who reported problems)
see CREDITS file.
(ASLv2) Jets3t
The following NOTICE information applies:
This product includes software developed by:
The Apache Software Foundation (http://www.apache.org/).
The ExoLab Project (http://www.exolab.org/)
Sun Microsystems (http://www.sun.com/)
Codehaus (http://castor.codehaus.org)
Tatu Saloranta (http://wiki.fasterxml.com/TatuSaloranta)
(ASLv2) Jettison
The following NOTICE information applies:
Copyright 2006 Envoi Solutions LLC
(ASLv2) Joda Time
The following NOTICE information applies:
This product includes software developed by
Joda.org (http://www.joda.org/).
(ASLv2) JSON4S
The following NOTICE information applies:
This product includes software developed by
Json4s.org (http://json4s.org/).
(ASLv2) Apache Kafka
The following NOTICE information applies:
Apache Kafka
Copyright 2012 The Apache Software Foundation.
(ASLv2) Apache log4j
The following NOTICE information applies:
Apache log4j
Copyright 2007 The Apache Software Foundation
The Netty Project
=================
Please visit the Netty web site for more information:
* http://netty.io/
Copyright 2011 The Netty Project
The Netty Project licenses this file to you under the Apache License,
version 2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations
under the License.
Also, please refer to each LICENSE.<component>.txt file, which is located in
the 'license' directory of the distribution file, for the license terms of the
components that this product depends on.
-------------------------------------------------------------------------------
This product contains the extensions to Java Collections Framework which has
been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene:
* LICENSE:
* license/LICENSE.jsr166y.txt (Public Domain)
* HOMEPAGE:
* http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/
* http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/
This product contains a modified version of Robert Harder's Public Domain
Base64 Encoder and Decoder, which can be obtained at:
* LICENSE:
* license/LICENSE.base64.txt (Public Domain)
* HOMEPAGE:
* http://iharder.sourceforge.net/current/java/base64/
This product contains a modified version of 'JZlib', a re-implementation of
zlib in pure Java, which can be obtained at:
* LICENSE:
* license/LICENSE.jzlib.txt (BSD Style License)
* HOMEPAGE:
* http://www.jcraft.com/jzlib/
This product optionally depends on 'Protocol Buffers', Google's data
interchange format, which can be obtained at:
* LICENSE:
* license/LICENSE.protobuf.txt (New BSD License)
* HOMEPAGE:
* http://code.google.com/p/protobuf/
This product optionally depends on 'SLF4J', a simple logging facade for Java,
which can be obtained at:
* LICENSE:
* license/LICENSE.slf4j.txt (MIT License)
* HOMEPAGE:
* http://www.slf4j.org/
This product optionally depends on 'Apache Commons Logging', a logging
framework, which can be obtained at:
* LICENSE:
* license/LICENSE.commons-logging.txt (Apache License 2.0)
* HOMEPAGE:
* http://commons.apache.org/logging/
This product optionally depends on 'Apache Log4J', a logging framework,
which can be obtained at:
* LICENSE:
* license/LICENSE.log4j.txt (Apache License 2.0)
* HOMEPAGE:
* http://logging.apache.org/log4j/
This product optionally depends on 'JBoss Logging', a logging framework,
which can be obtained at:
* LICENSE:
* license/LICENSE.jboss-logging.txt (GNU LGPL 2.1)
* HOMEPAGE:
* http://anonsvn.jboss.org/repos/common/common-logging-spi/
This product optionally depends on 'Apache Felix', an open source OSGi
framework implementation, which can be obtained at:
* LICENSE:
* license/LICENSE.felix.txt (Apache License 2.0)
* HOMEPAGE:
* http://felix.apache.org/
This product optionally depends on 'Webbit', a Java event based
WebSocket and HTTP server:
* LICENSE:
* license/LICENSE.webbit.txt (BSD License)
* HOMEPAGE:
* https://github.com/joewalnes/webbit
(ASLv2) Spring Framework
The following NOTICE information applies:
Spring Framework 4.3.10.RELEASE
Copyright (c) 2002-2015 Pivotal, Inc.
(ASLv2) Snappy Java
The following NOTICE information applies:
This product includes software developed by Google
Snappy: http://code.google.com/p/snappy/ (New BSD License)
This product includes software developed by Apache
PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/
(Apache 2.0 license)
This library containd statically linked libstdc++. This inclusion is allowed by
"GCC RUntime Library Exception"
http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html
(ASLv2) Yammer Metrics
The following NOTICE information applies:
Metrics
Copyright 2010-2012 Coda Hale and Yammer, Inc.
This product includes software developed by Coda Hale and Yammer, Inc.
This product includes code derived from the JSR-166 project (ThreadLocalRandom), which was released
with the following comments:
Written by Doug Lea with assistance from members of JCP JSR-166
Expert Group and released to the public domain, as explained at
http://creativecommons.org/publicdomain/zero/1.0/
(ASLv2) ZkClient
The following NOTICE information applies:
ZkClient
Copyright 2009 Stefan Groschupf
(ASLv2) Apache ZooKeeper
The following NOTICE information applies:
Apache ZooKeeper
Copyright 2009-2012 The Apache Software Foundation
************************
Common Development and Distribution License 1.0
************************
The following binary components are provided under the Common Development and Distribution License 1.0. See project link for details.
(CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp)
(CDDL 1.1) (GPL2 w/ CPE) javax.inject:1 as OSGi bundle (org.glassfish.hk2.external:javax.inject:jar:2.4.0-b25 - https://hk2.java.net/external/javax.inject)
(CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/)
(CDDL 1.1) (GPL2 w/ CPE) Old JAXB Runtime (com.sun.xml.bind:jaxb-impl:jar:2.2.3-1 - http://jaxb.java.net/)
(CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.19 - https://jersey.java.net/jersey-client/)
(CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.19 - https://jersey.java.net/jersey-core/)
(CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.19 - https://jersey.java.net/jersey-json/)
(CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/)
(CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net)
(CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided)
*****************
Public Domain
*****************
The following binary components are provided to the 'Public Domain'. See project link for details.
(Public Domain) XZ for Java (org.tukaani:xz:jar:1.5 - http://tukaani.org/xz/java.html

View File

@ -0,0 +1,127 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-atlas-bundle</artifactId>
<version>1.5.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-atlas-reporting-task</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-processor-utils</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-reporting-utils</artifactId>
</dependency>
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-ssl-context-service-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-client</artifactId>
<!-- Exclude dependencies to reduce NAR file size -->
<exclusions>
<!-- NOTE: Scala is required by atlas notification -->
<!--
fastutil-6.5.16.jar is 16MB.
'fastutil' is only used by
org.apache.atlas.repository.memory.AttributeStores
which is deprecated as being part of V1 API.
-->
<exclusion>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.atlas</groupId>
<artifactId>atlas-notification</artifactId>
</dependency>
<!--
NOTE: Could not use nifi-hadoop-libraries-nar because hadoop-client uses httpclient-4.2.5,
but atlas-client uses httpclient-4.5.3.
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>provided</scope>
</dependency>
-->
<dependency>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
<version>1.1</version>
<exclusions>
<exclusion>
<!-- jersey-json has dependency to newer javax.xml.stream:stax-api:jar -->
<groupId>stax</groupId>
<artifactId>stax-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-jaxrs</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-xc</artifactId>
<version>1.9.13</version>
</dependency>
<!-- test -->
<dependency>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-mock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.instance.AtlasObjectId;
import java.util.Set;
/**
* This interface represents 'Process' type in Atlas type system
* which has inputs and outputs attribute referring 'DataSet' entities.
*/
public interface AtlasProcess {
Set<AtlasObjectId> getInputs();
Set<AtlasObjectId> getOutputs();
}

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.instance.AtlasObjectId;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
public class AtlasUtils {
public static String toStr(Object obj) {
return obj != null ? obj.toString() : null;
}
public static boolean isGuidAssigned(String guid) {
return guid != null && !guid.startsWith("-");
}
public static String toQualifiedName(String clusterName, String componentId) {
return componentId + "@" + clusterName;
}
public static String getComponentIdFromQualifiedName(String qualifiedName) {
return qualifiedName.split("@")[0];
}
public static String getClusterNameFromQualifiedName(String qualifiedName) {
return qualifiedName.split("@")[1];
}
public static String toTypedQualifiedName(String typeName, String qualifiedName) {
return typeName + "::" + qualifiedName;
}
public static boolean isUpdated(Object current, Object arg) {
if (current == null) {
// Null to something.
return arg != null;
}
// Something to something.
return !current.equals(arg);
}
public static void updateMetadata(AtomicBoolean updatedTracker, List<String> updateAudit,
String subject, Object currentValue, Object newValue) {
if (isUpdated(currentValue, newValue)) {
updatedTracker.set(true);
updateAudit.add(String.format("%s changed from %s to %s", subject, currentValue, newValue));
}
}
public static Optional<AtlasObjectId> findIdByQualifiedName(Set<AtlasObjectId> ids, String qualifiedName) {
return ids.stream().filter(id -> qualifiedName.equals(id.getUniqueAttributes().get(ATTR_QUALIFIED_NAME))).findFirst();
}
}

View File

@ -0,0 +1,542 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import com.sun.jersey.api.client.UniformInterfaceException;
import com.sun.jersey.core.util.MultivaluedMapImpl;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClientV2;
import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.model.SearchFilter;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.model.typedef.AtlasEntityDef;
import org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef;
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.nifi.atlas.security.AtlasAuthN;
import org.apache.nifi.util.StringUtils;
import org.apache.nifi.util.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.ws.rs.core.MultivaluedMap;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static org.apache.nifi.atlas.AtlasUtils.findIdByQualifiedName;
import static org.apache.nifi.atlas.AtlasUtils.getComponentIdFromQualifiedName;
import static org.apache.nifi.atlas.AtlasUtils.toStr;
import static org.apache.nifi.atlas.NiFiFlow.EntityChangeType.AS_IS;
import static org.apache.nifi.atlas.NiFiFlow.EntityChangeType.CREATED;
import static org.apache.nifi.atlas.NiFiFlow.EntityChangeType.DELETED;
import static org.apache.nifi.atlas.NiFiFlow.EntityChangeType.UPDATED;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_DESCRIPTION;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_FLOW_PATHS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_GUID;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_INPUT_PORTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUT_PORTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUEUES;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_URL;
import static org.apache.nifi.atlas.NiFiTypes.ENTITIES;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE;
public class NiFiAtlasClient {
private static final Logger logger = LoggerFactory.getLogger(NiFiAtlasClient.class);
private static NiFiAtlasClient nifiClient;
private AtlasClientV2 atlasClient;
private NiFiAtlasClient() {
super();
}
public static NiFiAtlasClient getInstance() {
if (nifiClient == null) {
synchronized (NiFiAtlasClient.class) {
if (nifiClient == null) {
nifiClient = new NiFiAtlasClient();
}
}
}
return nifiClient;
}
public void initialize(final String[] baseUrls, final AtlasAuthN authN, final File atlasConfDir) {
synchronized (NiFiAtlasClient.class) {
if (atlasClient != null) {
logger.info("{} had been setup but replacing it with new one.", atlasClient);
ApplicationProperties.forceReload();
}
if (atlasConfDir != null) {
// If atlasConfDir is not set, atlas-application.properties will be searched under classpath.
Properties props = System.getProperties();
final String atlasConfProp = "atlas.conf";
props.setProperty(atlasConfProp, atlasConfDir.getAbsolutePath());
logger.debug("{} has been set to: {}", atlasConfProp, props.getProperty(atlasConfProp));
}
atlasClient = authN.createClient(baseUrls);
}
}
/**
* This is an utility method to delete unused types.
* Should be used during development or testing only.
* @param typeNames to delete
*/
void deleteTypeDefs(String ... typeNames) throws AtlasServiceException {
final AtlasTypesDef existingTypeDef = getTypeDefs(typeNames);
try {
atlasClient.deleteAtlasTypeDefs(existingTypeDef);
} catch (UniformInterfaceException e) {
if (e.getResponse().getStatus() == 204) {
// 204 is a successful response.
// NOTE: However after executing this, Atlas should be restarted to work properly.
logger.info("Deleted type defs: {}", existingTypeDef);
} else {
throw e;
}
}
}
/**
* @return True when required NiFi types are already created.
*/
public boolean isNiFiTypeDefsRegistered() throws AtlasServiceException {
final Set<String> typeNames = ENTITIES.keySet();
final Map<String, AtlasEntityDef> existingDefs = getTypeDefs(typeNames.toArray(new String[typeNames.size()])).getEntityDefs().stream()
.collect(Collectors.toMap(AtlasEntityDef::getName, Function.identity()));
return typeNames.stream().allMatch(existingDefs::containsKey);
}
/**
* Create or update NiFi types in Atlas type system.
* @param update If false, doesn't perform anything if there is existing type def for the name.
*/
public void registerNiFiTypeDefs(boolean update) throws AtlasServiceException {
final Set<String> typeNames = ENTITIES.keySet();
final Map<String, AtlasEntityDef> existingDefs = getTypeDefs(typeNames.toArray(new String[typeNames.size()])).getEntityDefs().stream()
.collect(Collectors.toMap(AtlasEntityDef::getName, Function.identity()));
final AtomicBoolean shouldUpdate = new AtomicBoolean(false);
final AtlasTypesDef type = new AtlasTypesDef();
typeNames.stream().filter(typeName -> {
final AtlasEntityDef existingDef = existingDefs.get(typeName);
if (existingDef != null) {
// type is already defined.
if (!update) {
return false;
}
shouldUpdate.set(true);
}
return true;
}).forEach(typeName -> {
final NiFiTypes.EntityDefinition def = ENTITIES.get(typeName);
final AtlasEntityDef entity = new AtlasEntityDef();
type.getEntityDefs().add(entity);
entity.setName(typeName);
Set<String> superTypes = new HashSet<>();
List<AtlasAttributeDef> attributes = new ArrayList<>();
def.define(entity, superTypes, attributes);
entity.setSuperTypes(superTypes);
entity.setAttributeDefs(attributes);
});
// Create or Update.
final AtlasTypesDef atlasTypeDefsResult = shouldUpdate.get()
? atlasClient.updateAtlasTypeDefs(type)
: atlasClient.createAtlasTypeDefs(type);
logger.debug("Result={}", atlasTypeDefsResult);
}
private AtlasTypesDef getTypeDefs(String ... typeNames) throws AtlasServiceException {
final AtlasTypesDef typeDefs = new AtlasTypesDef();
for (int i = 0; i < typeNames.length; i++) {
final MultivaluedMap<String, String> searchParams = new MultivaluedMapImpl();
searchParams.add(SearchFilter.PARAM_NAME, typeNames[i]);
final AtlasTypesDef typeDef = atlasClient.getAllTypeDefs(new SearchFilter(searchParams));
typeDefs.getEntityDefs().addAll(typeDef.getEntityDefs());
}
logger.debug("typeDefs={}", typeDefs);
return typeDefs;
}
private Pattern FLOW_PATH_URL_PATTERN = Pattern.compile("^http.+processGroupId=([0-9a-z\\-]+).*$");
/**
* Fetch existing NiFiFlow entity from Atlas.
* @param rootProcessGroupId The id of a NiFi flow root process group.
* @param clusterName The cluster name of a flow.
* @return A NiFiFlow instance filled with retrieved data from Atlas. Status objects are left blank, e.g. ProcessorStatus.
* @throws AtlasServiceException Thrown if requesting to Atlas API failed, including when the flow is not found.
*/
public NiFiFlow fetchNiFiFlow(String rootProcessGroupId, String clusterName) throws AtlasServiceException {
final String qualifiedName = AtlasUtils.toQualifiedName(clusterName, rootProcessGroupId);
final AtlasObjectId flowId = new AtlasObjectId(TYPE_NIFI_FLOW, ATTR_QUALIFIED_NAME, qualifiedName);
final AtlasEntity.AtlasEntityWithExtInfo nifiFlowExt = searchEntityDef(flowId);
if (nifiFlowExt == null || nifiFlowExt.getEntity() == null) {
return null;
}
final AtlasEntity nifiFlowEntity = nifiFlowExt.getEntity();
final Map<String, Object> attributes = nifiFlowEntity.getAttributes();
final NiFiFlow nifiFlow = new NiFiFlow(rootProcessGroupId);
nifiFlow.setExEntity(nifiFlowEntity);
nifiFlow.setFlowName(toStr(attributes.get(ATTR_NAME)));
nifiFlow.setClusterName(clusterName);
nifiFlow.setUrl(toStr(attributes.get(ATTR_URL)));
nifiFlow.setDescription(toStr(attributes.get(ATTR_DESCRIPTION)));
nifiFlow.getQueues().putAll(toQualifiedNameIds(toAtlasObjectIds(nifiFlowEntity.getAttribute(ATTR_QUEUES))));
nifiFlow.getRootInputPortEntities().putAll(toQualifiedNameIds(toAtlasObjectIds(nifiFlowEntity.getAttribute(ATTR_INPUT_PORTS))));
nifiFlow.getRootOutputPortEntities().putAll(toQualifiedNameIds(toAtlasObjectIds(nifiFlowEntity.getAttribute(ATTR_OUTPUT_PORTS))));
final Map<String, NiFiFlowPath> flowPaths = nifiFlow.getFlowPaths();
final Map<AtlasObjectId, AtlasEntity> flowPathEntities = toQualifiedNameIds(toAtlasObjectIds(attributes.get(ATTR_FLOW_PATHS)));
for (AtlasEntity flowPathEntity : flowPathEntities.values()) {
final String pathQualifiedName = toStr(flowPathEntity.getAttribute(ATTR_QUALIFIED_NAME));
final NiFiFlowPath flowPath = new NiFiFlowPath(getComponentIdFromQualifiedName(pathQualifiedName));
if (flowPathEntity.hasAttribute(ATTR_URL)) {
final Matcher urlMatcher = FLOW_PATH_URL_PATTERN.matcher(toStr(flowPathEntity.getAttribute(ATTR_URL)));
if (urlMatcher.matches()) {
flowPath.setGroupId(urlMatcher.group(1));
}
}
flowPath.setExEntity(flowPathEntity);
flowPath.setName(toStr(flowPathEntity.getAttribute(ATTR_NAME)));
flowPath.getInputs().addAll(toQualifiedNameIds(toAtlasObjectIds(flowPathEntity.getAttribute(ATTR_INPUTS))).keySet());
flowPath.getOutputs().addAll(toQualifiedNameIds(toAtlasObjectIds(flowPathEntity.getAttribute(ATTR_OUTPUTS))).keySet());
flowPath.startTrackingChanges(nifiFlow);
flowPaths.put(flowPath.getId(), flowPath);
}
nifiFlow.startTrackingChanges();
return nifiFlow;
}
@SuppressWarnings("unchecked")
private List<AtlasObjectId> toAtlasObjectIds(Object _references) {
if (_references == null) {
return Collections.emptyList();
}
List<Map<String, Object>> references = (List<Map<String, Object>>) _references;
return references.stream()
.map(ref -> new AtlasObjectId(toStr(ref.get(ATTR_GUID)), toStr(ref.get(ATTR_TYPENAME)), ref))
.collect(Collectors.toList());
}
/**
* <p>AtlasObjectIds returned from Atlas have GUID, but do not have qualifiedName, while ones created by the reporting task
* do not have GUID, but qualifiedName. AtlasObjectId.equals returns false for this combination.
* In order to match ids correctly, this method converts fetches actual entities from ids to get qualifiedName attribute.</p>
*
* <p>Also, AtlasObjectIds returned from Atlas does not have entity state.
* If Atlas is configured to use soft-delete (default), deleted ids are still returned.
* Fetched entities are used to determine whether an AtlasObjectId is still active or deleted.
* Deleted entities will not be included in the result of this method.
* </p>
* @param ids to convert
* @return AtlasObjectIds with qualifiedName
*/
private Map<AtlasObjectId, AtlasEntity> toQualifiedNameIds(List<AtlasObjectId> ids) {
if (ids == null) {
return Collections.emptyMap();
}
return ids.stream().distinct().map(id -> {
try {
final AtlasEntity.AtlasEntityWithExtInfo entityExt = searchEntityDef(id);
final AtlasEntity entity = entityExt.getEntity();
if (AtlasEntity.Status.DELETED.equals(entity.getStatus())) {
return null;
}
final Map<String, Object> uniqueAttrs = Collections.singletonMap(ATTR_QUALIFIED_NAME, entity.getAttribute(ATTR_QUALIFIED_NAME));
return new Tuple<>(new AtlasObjectId(id.getGuid(), id.getTypeName(), uniqueAttrs), entity);
} catch (AtlasServiceException e) {
logger.warn("Failed to search entity by id {}, due to {}", id, e);
return null;
}
}).filter(Objects::nonNull).collect(Collectors.toMap(Tuple::getKey, Tuple::getValue));
}
public void registerNiFiFlow(NiFiFlow nifiFlow) throws AtlasServiceException {
// Create parent flow entity, so that common properties are taken over.
final AtlasEntity flowEntity = registerNiFiFlowEntity(nifiFlow);
// Create DataSet entities those are created by this NiFi flow.
final Map<String, List<AtlasEntity>> updatedDataSetEntities = registerDataSetEntities(nifiFlow);
// Create path entities.
final Set<AtlasObjectId> remainingPathIds = registerFlowPathEntities(nifiFlow);
// Update these attributes only if anything is created, updated or removed.
boolean shouldUpdateNiFiFlow = nifiFlow.isMetadataUpdated();
if (remainingPathIds != null) {
flowEntity.setAttribute(ATTR_FLOW_PATHS, remainingPathIds);
shouldUpdateNiFiFlow = true;
}
if (updatedDataSetEntities.containsKey(TYPE_NIFI_QUEUE)) {
flowEntity.setAttribute(ATTR_QUEUES, updatedDataSetEntities.get(TYPE_NIFI_QUEUE));
shouldUpdateNiFiFlow = true;
}
if (updatedDataSetEntities.containsKey(TYPE_NIFI_INPUT_PORT)) {
flowEntity.setAttribute(ATTR_INPUT_PORTS, updatedDataSetEntities.get(TYPE_NIFI_INPUT_PORT));
shouldUpdateNiFiFlow = true;
}
if (updatedDataSetEntities.containsKey(TYPE_NIFI_OUTPUT_PORT)) {
flowEntity.setAttribute(ATTR_OUTPUT_PORTS, updatedDataSetEntities.get(TYPE_NIFI_OUTPUT_PORT));
shouldUpdateNiFiFlow = true;
}
if (logger.isDebugEnabled()) {
logger.debug("### NiFi Flow Audit Logs START");
nifiFlow.getUpdateAudit().forEach(logger::debug);
nifiFlow.getFlowPaths().forEach((k, v) -> {
logger.debug("--- NiFiFlowPath Audit Logs: {}", k);
v.getUpdateAudit().forEach(logger::debug);
});
logger.debug("### NiFi Flow Audit Logs END");
}
if (shouldUpdateNiFiFlow) {
// Send updated entities.
final List<AtlasEntity> entities = new ArrayList<>();
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(entities);
entities.add(flowEntity);
try {
final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
logger.debug("mutation response={}", mutationResponse);
} catch (AtlasServiceException e) {
if (e.getStatus().getStatusCode() == AtlasErrorCode.INSTANCE_NOT_FOUND.getHttpCode().getStatusCode()
&& e.getMessage().contains(AtlasErrorCode.INSTANCE_NOT_FOUND.getErrorCode())) {
// NOTE: If previously existed nifi_flow_path entity is removed because the path is removed from NiFi,
// then Atlas respond with 404 even though the entity is successfully updated.
// Following exception is thrown in this case. Just log it.
// org.apache.atlas.AtlasServiceException:
// Metadata service API org.apache.atlas.AtlasBaseClient$APIInfo@45a37759
// failed with status 404 (Not Found) Response Body
// ({"errorCode":"ATLAS-404-00-00B","errorMessage":"Given instance is invalid/not found:
// Could not find entities in the repository with guids: [96d24487-cd66-4795-b552-f00b426fed26]"})
logger.debug("Received error response from Atlas but it should be stored." + e);
} else {
throw e;
}
}
}
}
private AtlasEntity registerNiFiFlowEntity(final NiFiFlow nifiFlow) throws AtlasServiceException {
final List<AtlasEntity> entities = new ArrayList<>();
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(entities);
if (!nifiFlow.isMetadataUpdated()) {
// Nothing has been changed, return existing entity.
return nifiFlow.getExEntity();
}
// Create parent flow entity using existing NiFiFlow entity if available, so that common properties are taken over.
final AtlasEntity flowEntity = nifiFlow.getExEntity() != null ? new AtlasEntity(nifiFlow.getExEntity()) : new AtlasEntity();
flowEntity.setTypeName(TYPE_NIFI_FLOW);
flowEntity.setVersion(1L);
flowEntity.setAttribute(ATTR_NAME, nifiFlow.getFlowName());
flowEntity.setAttribute(ATTR_QUALIFIED_NAME, nifiFlow.toQualifiedName(nifiFlow.getRootProcessGroupId()));
flowEntity.setAttribute(ATTR_URL, nifiFlow.getUrl());
flowEntity.setAttribute(ATTR_DESCRIPTION, nifiFlow.getDescription());
// If flowEntity is not persisted yet, then store nifi_flow entity to make nifiFlowId available for other entities.
if (flowEntity.getGuid().startsWith("-")) {
entities.add(flowEntity);
final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
logger.debug("Registered a new nifi_flow entity, mutation response={}", mutationResponse);
final String assignedNiFiFlowGuid = mutationResponse.getGuidAssignments().get(flowEntity.getGuid());
flowEntity.setGuid(assignedNiFiFlowGuid);
nifiFlow.setAtlasGuid(assignedNiFiFlowGuid);
}
return flowEntity;
}
/**
* Register DataSet within specified NiFiFlow.
* @return Set of registered Atlas type names and its remaining entities without deleted ones.
*/
private Map<String, List<AtlasEntity>> registerDataSetEntities(final NiFiFlow nifiFlow) throws AtlasServiceException {
final Map<NiFiFlow.EntityChangeType, List<AtlasEntity>> changedEntities = nifiFlow.getChangedDataSetEntities();
if (changedEntities.containsKey(CREATED)) {
final List<AtlasEntity> createdEntities = changedEntities.get(CREATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(createdEntities);
final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
logger.debug("Created DataSet entities mutation response={}", mutationResponse);
final Map<String, String> guidAssignments = mutationResponse.getGuidAssignments();
for (AtlasEntity entity : createdEntities) {
final String guid = guidAssignments.get(entity.getGuid());
final String qualifiedName = toStr(entity.getAttribute(ATTR_QUALIFIED_NAME));
if (StringUtils.isEmpty(guid)) {
logger.warn("GUID was not assigned for {}::{} for some reason.", entity.getTypeName(), qualifiedName);
continue;
}
final Map<AtlasObjectId, AtlasEntity> entityMap;
switch (entity.getTypeName()) {
case TYPE_NIFI_INPUT_PORT:
entityMap = nifiFlow.getRootInputPortEntities();
break;
case TYPE_NIFI_OUTPUT_PORT:
entityMap = nifiFlow.getRootOutputPortEntities();
break;
case TYPE_NIFI_QUEUE:
entityMap = nifiFlow.getQueues();
break;
default:
throw new RuntimeException(entity.getTypeName() + " is not expected.");
}
// In order to replace the id, remove current id which does not have GUID.
findIdByQualifiedName(entityMap.keySet(), qualifiedName).ifPresent(entityMap::remove);
entity.setGuid(guid);
final AtlasObjectId idWithGuid = new AtlasObjectId(guid, entity.getTypeName(), Collections.singletonMap(ATTR_QUALIFIED_NAME, qualifiedName));
entityMap.put(idWithGuid, entity);
}
}
if (changedEntities.containsKey(UPDATED)) {
final List<AtlasEntity> updatedEntities = changedEntities.get(UPDATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(updatedEntities);
final EntityMutationResponse mutationResponse = atlasClient.updateEntities(atlasEntities);
logger.debug("Updated DataSet entities mutation response={}", mutationResponse);
}
final Set<String> changedTypeNames = changedEntities.entrySet().stream()
.filter(entry -> !AS_IS.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream())
.map(AtlasEntity::getTypeName)
.collect(Collectors.toSet());
// NOTE: Cascading DELETE will be performed when parent NiFiFlow is updated without removed DataSet entities.
final Map<String, List<AtlasEntity>> remainingEntitiesByType = changedEntities.entrySet().stream()
.filter(entry -> !DELETED.equals(entry.getKey()))
.flatMap(entry -> entry.getValue().stream())
.filter(entity -> changedTypeNames.contains(entity.getTypeName()))
.collect(Collectors.groupingBy(AtlasEntity::getTypeName));
// If all entities are deleted for a type (e.g. nifi_intput_port), then remainingEntitiesByType will not contain such key.
// If the returning map does not contain anything for a type, then the corresponding attribute will not be updated.
// To empty an attribute when all of its elements are deleted, add empty list for a type.
changedTypeNames.forEach(changedTypeName -> remainingEntitiesByType.computeIfAbsent(changedTypeName, k -> Collections.emptyList()));
return remainingEntitiesByType;
}
private Set<AtlasObjectId> registerFlowPathEntities(final NiFiFlow nifiFlow) throws AtlasServiceException {
final Map<NiFiFlow.EntityChangeType, List<AtlasEntity>> changedEntities = nifiFlow.getChangedFlowPathEntities();
if (changedEntities.containsKey(CREATED)) {
final List<AtlasEntity> createdEntities = changedEntities.get(CREATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(createdEntities);
final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
logger.debug("Created FlowPath entities mutation response={}", mutationResponse);
final Map<String, String> guidAssignments = mutationResponse.getGuidAssignments();
createdEntities.forEach(entity -> {
final String guid = entity.getGuid();
entity.setGuid(guidAssignments.get(guid));
final String pathId = getComponentIdFromQualifiedName(toStr(entity.getAttribute(ATTR_QUALIFIED_NAME)));
final NiFiFlowPath path = nifiFlow.getFlowPaths().get(pathId);
path.setExEntity(entity);
});
}
if (changedEntities.containsKey(UPDATED)) {
final List<AtlasEntity> updatedEntities = changedEntities.get(UPDATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(updatedEntities);
final EntityMutationResponse mutationResponse = atlasClient.updateEntities(atlasEntities);
logger.debug("Updated FlowPath entities mutation response={}", mutationResponse);
updatedEntities.forEach(entity -> {
final String pathId = getComponentIdFromQualifiedName(toStr(entity.getAttribute(ATTR_QUALIFIED_NAME)));
final NiFiFlowPath path = nifiFlow.getFlowPaths().get(pathId);
path.setExEntity(entity);
});
}
if (NiFiFlow.EntityChangeType.containsChange(changedEntities.keySet())) {
return changedEntities.entrySet().stream()
.filter(entry -> !DELETED.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream())
.map(path -> new AtlasObjectId(path.getGuid(), TYPE_NIFI_FLOW_PATH,
Collections.singletonMap(ATTR_QUALIFIED_NAME, path.getAttribute(ATTR_QUALIFIED_NAME))))
.collect(Collectors.toSet());
}
return null;
}
public AtlasEntity.AtlasEntityWithExtInfo searchEntityDef(AtlasObjectId id) throws AtlasServiceException {
final String guid = id.getGuid();
if (!StringUtils.isEmpty(guid)) {
return atlasClient.getEntityByGuid(guid);
}
final Map<String, String> attributes = new HashMap<>();
id.getUniqueAttributes().entrySet().stream().filter(entry -> entry.getValue() != null)
.forEach(entry -> attributes.put(entry.getKey(), entry.getValue().toString()));
return atlasClient.getEntityByAttribute(id.getTypeName(), attributes);
}
}

View File

@ -0,0 +1,294 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import com.sun.jersey.api.client.ClientResponse;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.hook.AtlasHook;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.notification.hook.HookNotification.EntityPartialUpdateRequest;
import org.apache.atlas.notification.hook.HookNotification.HookNotificationMessage;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.atlas.typesystem.persistence.Id;
import org.apache.nifi.atlas.provenance.lineage.LineageContext;
import org.apache.nifi.util.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.apache.atlas.notification.hook.HookNotification.HookNotificationType.ENTITY_PARTIAL_UPDATE;
import static org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_GUID;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH;
/**
* This class is not thread-safe as it holds uncommitted notification messages within instance.
* {@link #addMessage(HookNotificationMessage)} and {@link #commitMessages()} should be used serially from a single thread.
*/
public class NiFiAtlasHook extends AtlasHook implements LineageContext {
public static final String NIFI_USER = "nifi";
private static final Logger logger = LoggerFactory.getLogger(NiFiAtlasHook.class);
private static final String CONF_PREFIX = "atlas.hook.nifi.";
private static final String HOOK_NUM_RETRIES = CONF_PREFIX + "numRetries";
private final NiFiAtlasClient atlasClient;
/**
* An index to resolve a qualifiedName from a GUID.
*/
private final Map<String, String> guidToQualifiedName;
/**
* An index to resolve a Referenceable from a typeName::qualifiedName.
*/
private final Map<String, Referenceable> typedQualifiedNameToRef;
private static <K, V> Map<K, V> createCache(final int maxSize) {
return new LinkedHashMap<K, V>(maxSize, 0.75f, true) {
@Override
protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
return size() > maxSize;
}
};
}
public NiFiAtlasHook(NiFiAtlasClient atlasClient) {
this.atlasClient = atlasClient;
final int qualifiedNameCacheSize = 10_000;
this.guidToQualifiedName = createCache(qualifiedNameCacheSize);
final int dataSetRefCacheSize = 1_000;
this.typedQualifiedNameToRef = createCache(dataSetRefCacheSize);
}
@Override
protected String getNumberOfRetriesPropertyKey() {
return HOOK_NUM_RETRIES;
}
private final List<HookNotificationMessage> messages = new ArrayList<>();
@Override
public void addMessage(HookNotificationMessage message) {
messages.add(message);
}
private class Metrics {
final long startedAt = System.currentTimeMillis();
int totalMessages;
int partialNiFiFlowPathUpdates;
int dedupedPartialNiFiFlowPathUpdates;
int otherMessages;
int flowPathSearched;
int dataSetSearched;
int dataSetCacheHit;
private void log(String message) {
logger.debug(String.format("%s, %d ms passed, totalMessages=%d," +
" partialNiFiFlowPathUpdates=%d, dedupedPartialNiFiFlowPathUpdates=%d, otherMessage=%d," +
" flowPathSearched=%d, dataSetSearched=%d, dataSetCacheHit=%s," +
" guidToQualifiedName.size=%d, typedQualifiedNameToRef.size=%d",
message, System.currentTimeMillis() - startedAt, totalMessages,
partialNiFiFlowPathUpdates, dedupedPartialNiFiFlowPathUpdates, otherMessages,
flowPathSearched, dataSetSearched, dataSetCacheHit,
guidToQualifiedName.size(), typedQualifiedNameToRef.size()));
}
}
public void commitMessages() {
final Map<Boolean, List<HookNotificationMessage>> partialNiFiFlowPathUpdateAndOthers
= messages.stream().collect(Collectors.groupingBy(msg
-> ENTITY_PARTIAL_UPDATE.equals(msg.getType())
&& TYPE_NIFI_FLOW_PATH.equals(((EntityPartialUpdateRequest)msg).getTypeName())
&& ATTR_QUALIFIED_NAME.equals(((EntityPartialUpdateRequest)msg).getAttribute())
));
final List<HookNotificationMessage> otherMessages = partialNiFiFlowPathUpdateAndOthers.computeIfAbsent(false, k -> Collections.emptyList());
final List<HookNotificationMessage> partialNiFiFlowPathUpdates = partialNiFiFlowPathUpdateAndOthers.computeIfAbsent(true, k -> Collections.emptyList());
logger.info("Commit messages: {} partialNiFiFlowPathUpdate and {} other messages.", partialNiFiFlowPathUpdates.size(), otherMessages.size());
final Metrics metrics = new Metrics();
metrics.totalMessages = messages.size();
metrics.partialNiFiFlowPathUpdates = partialNiFiFlowPathUpdates.size();
metrics.otherMessages = otherMessages.size();
try {
// Notify other messages first.
notifyEntities(otherMessages);
// De-duplicate messages.
final List<HookNotificationMessage> deduplicatedMessages = partialNiFiFlowPathUpdates.stream().map(msg -> (EntityPartialUpdateRequest) msg)
// Group by nifi_flow_path qualifiedName value.
.collect(Collectors.groupingBy(EntityPartialUpdateRequest::getAttributeValue)).entrySet().stream()
.map(entry -> {
final String flowPathQualifiedName = entry.getKey();
final Map<String, Referenceable> distinctInputs;
final Map<String, Referenceable> distinctOutputs;
final String flowPathGuid;
try {
// Fetch existing nifi_flow_path and its inputs/ouputs.
metrics.flowPathSearched++;
final AtlasEntity.AtlasEntityWithExtInfo flowPathExt
= atlasClient.searchEntityDef(new AtlasObjectId(TYPE_NIFI_FLOW_PATH, ATTR_QUALIFIED_NAME, flowPathQualifiedName));
final AtlasEntity flowPathEntity = flowPathExt.getEntity();
flowPathGuid = flowPathEntity.getGuid();
distinctInputs = toReferenceables(flowPathEntity.getAttribute(ATTR_INPUTS), metrics);
distinctOutputs = toReferenceables(flowPathEntity.getAttribute(ATTR_OUTPUTS), metrics);
} catch (AtlasServiceException e) {
if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())) {
logger.debug("nifi_flow_path was not found for qualifiedName {}", flowPathQualifiedName);
} else {
logger.warn("Failed to retrieve nifi_flow_path with qualifiedName {} due to {}", flowPathQualifiedName, e, e);
}
return null;
}
// Merge all inputs and outputs for this nifi_flow_path.
for (EntityPartialUpdateRequest msg : entry.getValue()) {
fromReferenceable(msg.getEntity().get(ATTR_INPUTS), metrics)
.entrySet().stream().filter(ref -> !distinctInputs.containsKey(ref.getKey()))
.forEach(ref -> distinctInputs.put(ref.getKey(), ref.getValue()));
fromReferenceable(msg.getEntity().get(ATTR_OUTPUTS), metrics)
.entrySet().stream().filter(ref -> !distinctOutputs.containsKey(ref.getKey()))
.forEach(ref -> distinctOutputs.put(ref.getKey(), ref.getValue()));
}
// Consolidate messages into one.
final Referenceable flowPathRef = new Referenceable(flowPathGuid, TYPE_NIFI_FLOW_PATH, null);
// NOTE: distinctInputs.values() returns HashMap$Values, which causes following error. To avoid that, wrap with ArrayList:
// org.json4s.package$MappingException: Can't find ScalaSig for class org.apache.atlas.typesystem.Referenceable
flowPathRef.set(ATTR_INPUTS, new ArrayList<>(distinctInputs.values()));
flowPathRef.set(ATTR_OUTPUTS, new ArrayList<>(distinctOutputs.values()));
return new EntityPartialUpdateRequest(NIFI_USER, TYPE_NIFI_FLOW_PATH,
ATTR_QUALIFIED_NAME, flowPathQualifiedName, flowPathRef);
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
metrics.dedupedPartialNiFiFlowPathUpdates = deduplicatedMessages.size();
notifyEntities(deduplicatedMessages);
} finally {
metrics.log("Committed");
messages.clear();
}
}
/**
* <p>Convert nifi_flow_path inputs or outputs to a map of Referenceable keyed by qualifiedName.</p>
* <p>Atlas removes existing references those are not specified when a collection attribute is updated.
* In order to preserve existing DataSet references, existing elements should be passed within a partial update message.</p>
* <p>This method also populates entity cache for subsequent lookups.</p>
* @param _refs Contains references from an existin nifi_flow_path entity inputs or outputs attribute.
* @return A map of Referenceables keyed by qualifiedName.
*/
@SuppressWarnings("unchecked")
private Map<String, Referenceable> toReferenceables(Object _refs, Metrics metrics) {
if (_refs == null) {
// NOTE: This empty map may be used to add new Referenceables. Can not be Collection.emptyMap which does not support addition.
return new HashMap<>();
}
final List<Map<String, Object>> refs = (List<Map<String, Object>>) _refs;
return refs.stream().map(ref -> {
// Existing reference should has a GUID.
final String typeName = (String) ref.get(ATTR_TYPENAME);
final String guid = (String) ref.get(ATTR_GUID);
if (guidToQualifiedName.containsKey(guid)) {
metrics.dataSetCacheHit++;
}
final String refQualifiedName = guidToQualifiedName.computeIfAbsent(guid, k -> {
try {
metrics.dataSetSearched++;
final AtlasEntity.AtlasEntityWithExtInfo refExt = atlasClient.searchEntityDef(new AtlasObjectId(guid, typeName));
final String qualifiedName = (String) refExt.getEntity().getAttribute(ATTR_QUALIFIED_NAME);
typedQualifiedNameToRef.put(toTypedQualifiedName(typeName, qualifiedName), new Referenceable(guid, typeName, Collections.EMPTY_MAP));
return qualifiedName;
} catch (AtlasServiceException e) {
if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())) {
logger.warn("{} entity was not found for guid {}", typeName, guid);
} else {
logger.warn("Failed to retrieve {} with guid {} due to {}", typeName, guid, e);
}
return null;
}
});
if (refQualifiedName == null) {
return null;
}
return new Tuple<>(refQualifiedName, typedQualifiedNameToRef.get(toTypedQualifiedName(typeName, refQualifiedName)));
}).filter(Objects::nonNull).filter(tuple -> tuple.getValue() != null)
.collect(Collectors.toMap(Tuple::getKey, Tuple::getValue));
}
@SuppressWarnings("unchecked")
private Map<String, Referenceable> fromReferenceable(Object _refs, Metrics metrics) {
if (_refs == null) {
return Collections.emptyMap();
}
final List<Referenceable> refs = (List<Referenceable>) _refs;
return refs.stream().map(ref -> {
// This ref is created within this reporting cycle, and it may not have GUID assigned yet, if it is a brand new reference.
// If cache has the Reference, then use it because instances in the cache are guaranteed to have GUID assigned.
// Brand new Referenceables have to have all mandatory attributes.
final String typeName = ref.getTypeName();
final Id id = ref.getId();
final String refQualifiedName = (String) ref.get(ATTR_QUALIFIED_NAME);
final String typedRefQualifiedName = toTypedQualifiedName(typeName, refQualifiedName);
final Referenceable refFromCacheIfAvailable = typedQualifiedNameToRef.computeIfAbsent(typedRefQualifiedName, k -> {
if (id.isAssigned()) {
// If this referenceable has Guid assigned, then add this one to cache.
guidToQualifiedName.put(id._getId(), refQualifiedName);
typedQualifiedNameToRef.put(typedRefQualifiedName, ref);
}
return ref;
});
return new Tuple<>(refQualifiedName, refFromCacheIfAvailable);
}).filter(Objects::nonNull).filter(tuple -> tuple.getValue() != null)
.collect(Collectors.toMap(Tuple::getKey, Tuple::getValue));
}
public void close() {
if (notificationInterface != null) {
notificationInterface.close();
}
}
}

View File

@ -0,0 +1,540 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.controller.status.PortStatus;
import org.apache.nifi.controller.status.ProcessorStatus;
import org.apache.nifi.controller.status.RemoteProcessGroupStatus;
import org.apache.nifi.util.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.nifi.atlas.AtlasUtils.findIdByQualifiedName;
import static org.apache.nifi.atlas.AtlasUtils.isGuidAssigned;
import static org.apache.nifi.atlas.AtlasUtils.isUpdated;
import static org.apache.nifi.atlas.AtlasUtils.updateMetadata;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_CLUSTER_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_DESCRIPTION;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NIFI_FLOW;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_URL;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE;
public class NiFiFlow {
private static final Logger logger = LoggerFactory.getLogger(NiFiFlow.class);
private final String rootProcessGroupId;
private String flowName;
private String clusterName;
private String url;
private String atlasGuid;
private AtlasEntity exEntity;
private AtlasObjectId atlasObjectId;
private String description;
/**
* Track whether this instance has metadata updated and should be updated in Atlas.
*/
private AtomicBoolean metadataUpdated = new AtomicBoolean(false);
private List<String> updateAudit = new ArrayList<>();
private Set<String> updatedEntityGuids = new LinkedHashSet<>();
private Set<String> stillExistingEntityGuids = new LinkedHashSet<>();
private Set<String> traversedPathIds = new LinkedHashSet<>();
private boolean urlUpdated = false;
private final Map<String, NiFiFlowPath> flowPaths = new HashMap<>();
private final Map<String, ProcessorStatus> processors = new HashMap<>();
private final Map<String, RemoteProcessGroupStatus> remoteProcessGroups = new HashMap<>();
private final Map<String, List<ConnectionStatus>> incomingConnections = new HashMap<>();
private final Map<String, List<ConnectionStatus>> outGoingConnections = new HashMap<>();
private final Map<AtlasObjectId, AtlasEntity> queues = new HashMap<>();
// Any Ports.
private final Map<String, PortStatus> inputPorts = new HashMap<>();
private final Map<String, PortStatus> outputPorts = new HashMap<>();
// Root Group Ports.
private final Map<String, PortStatus> rootInputPorts = new HashMap<>();
private final Map<String, PortStatus> rootOutputPorts = new HashMap<>();
// Root Group Ports Entity.
private final Map<AtlasObjectId, AtlasEntity> rootInputPortEntities = new HashMap<>();
private final Map<AtlasObjectId, AtlasEntity> rootOutputPortEntities = new HashMap<>();
public NiFiFlow(String rootProcessGroupId) {
this.rootProcessGroupId = rootProcessGroupId;
}
public AtlasObjectId getAtlasObjectId() {
return atlasObjectId;
}
public String getRootProcessGroupId() {
return rootProcessGroupId;
}
public String getClusterName() {
return clusterName;
}
public void setClusterName(String clusterName) {
updateMetadata(metadataUpdated, updateAudit, ATTR_CLUSTER_NAME, this.clusterName, clusterName);
this.clusterName = clusterName;
atlasObjectId = createAtlasObjectId();
}
private AtlasObjectId createAtlasObjectId() {
return new AtlasObjectId(atlasGuid, TYPE_NIFI_FLOW, Collections.singletonMap(ATTR_QUALIFIED_NAME, getQualifiedName()));
}
public AtlasEntity getExEntity() {
return exEntity;
}
public void setExEntity(AtlasEntity exEntity) {
this.exEntity = exEntity;
this.setAtlasGuid(exEntity.getGuid());
}
public String getAtlasGuid() {
return atlasGuid;
}
public void setAtlasGuid(String atlasGuid) {
this.atlasGuid = atlasGuid;
atlasObjectId = createAtlasObjectId();
}
public String getQualifiedName() {
return toQualifiedName(rootProcessGroupId);
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
updateMetadata(metadataUpdated, updateAudit, ATTR_DESCRIPTION, this.description, description);
this.description = description;
}
public void addConnection(ConnectionStatus c) {
outGoingConnections.computeIfAbsent(c.getSourceId(), k -> new ArrayList<>()).add(c);
incomingConnections.computeIfAbsent(c.getDestinationId(), k -> new ArrayList<>()).add(c);
}
public void addProcessor(ProcessorStatus p) {
processors.put(p.getId(), p);
}
public Map<String, ProcessorStatus> getProcessors() {
return processors;
}
public void addRemoteProcessGroup(RemoteProcessGroupStatus r) {
remoteProcessGroups.put(r.getId(), r);
}
public void setFlowName(String flowName) {
updateMetadata(metadataUpdated, updateAudit, ATTR_NAME, this.flowName, flowName);
this.flowName = flowName;
}
public String getFlowName() {
return flowName;
}
public void setUrl(String url) {
updateMetadata(metadataUpdated, updateAudit, ATTR_URL, this.url, url);
if (isUpdated(this.url, url)) {
this.urlUpdated = true;
}
this.url = url;
}
public String getUrl() {
return url;
}
public List<ConnectionStatus> getIncomingConnections(String componentId) {
return incomingConnections.get(componentId);
}
public List<ConnectionStatus> getOutgoingConnections(String componentId) {
return outGoingConnections.get(componentId);
}
public void addInputPort(PortStatus port) {
inputPorts.put(port.getId(), port);
}
public Map<String, PortStatus> getInputPorts() {
return inputPorts;
}
public void addOutputPort(PortStatus port) {
outputPorts.put(port.getId(), port);
}
public Map<String, PortStatus> getOutputPorts() {
return outputPorts;
}
public void addRootInputPort(PortStatus port) {
rootInputPorts.put(port.getId(), port);
createOrUpdateRootGroupPortEntity(true, toQualifiedName(port.getId()), port.getName());
}
public Map<String, PortStatus> getRootInputPorts() {
return rootInputPorts;
}
public void addRootOutputPort(PortStatus port) {
rootOutputPorts.put(port.getId(), port);
createOrUpdateRootGroupPortEntity(false, toQualifiedName(port.getId()), port.getName());
}
public Map<String, PortStatus> getRootOutputPorts() {
return rootOutputPorts;
}
public Map<AtlasObjectId, AtlasEntity> getRootInputPortEntities() {
return rootInputPortEntities;
}
private AtlasEntity createOrUpdateRootGroupPortEntity(boolean isInput, String qualifiedName, String portName) {
final Map<AtlasObjectId, AtlasEntity> ports = isInput ? rootInputPortEntities : rootOutputPortEntities;
final Optional<AtlasObjectId> existingPortId = findIdByQualifiedName(ports.keySet(), qualifiedName);
final String typeName = isInput ? TYPE_NIFI_INPUT_PORT : TYPE_NIFI_OUTPUT_PORT;
if (existingPortId.isPresent()) {
final AtlasEntity entity = ports.get(existingPortId.get());
final String portGuid = entity.getGuid();
stillExistingEntityGuids.add(portGuid);
final Object currentName = entity.getAttribute(ATTR_NAME);
if (isUpdated(currentName, portName)) {
// Update port name and set updated flag.
entity.setAttribute(ATTR_NAME, portName);
updatedEntityGuids.add(portGuid);
updateAudit.add(String.format("Name of %s %s changed from %s to %s", entity.getTypeName(), portGuid, currentName, portName));
}
return entity;
} else {
final AtlasEntity entity = new AtlasEntity(typeName);
entity.setAttribute(ATTR_NIFI_FLOW, getAtlasObjectId());
entity.setAttribute(ATTR_NAME, portName);
entity.setAttribute(ATTR_QUALIFIED_NAME, qualifiedName);
final AtlasObjectId portId = new AtlasObjectId(typeName, ATTR_QUALIFIED_NAME, qualifiedName);
ports.put(portId, entity);
return entity;
}
}
public Map<AtlasObjectId, AtlasEntity> getRootOutputPortEntities() {
return rootOutputPortEntities;
}
public Tuple<AtlasObjectId, AtlasEntity> getOrCreateQueue(String destinationComponentId) {
final String qualifiedName = toQualifiedName(destinationComponentId);
final Optional<AtlasObjectId> existingQueueId = findIdByQualifiedName(queues.keySet(), qualifiedName);
if (existingQueueId.isPresent()) {
final AtlasEntity entity = queues.get(existingQueueId.get());
stillExistingEntityGuids.add(entity.getGuid());
return new Tuple<>(existingQueueId.get(), entity);
} else {
final AtlasObjectId queueId = new AtlasObjectId(TYPE_NIFI_QUEUE, ATTR_QUALIFIED_NAME, qualifiedName);
final AtlasEntity queue = new AtlasEntity(TYPE_NIFI_QUEUE);
queue.setAttribute(ATTR_NIFI_FLOW, getAtlasObjectId());
queue.setAttribute(ATTR_QUALIFIED_NAME, qualifiedName);
queue.setAttribute(ATTR_NAME, "queue");
queue.setAttribute(ATTR_DESCRIPTION, "Input queue for " + destinationComponentId);
queues.put(queueId, queue);
return new Tuple<>(queueId, queue);
}
}
public Map<AtlasObjectId, AtlasEntity> getQueues() {
return queues;
}
public Map<String, NiFiFlowPath> getFlowPaths() {
return flowPaths;
}
/**
* Find a flow_path that contains specified componentId.
*/
public NiFiFlowPath findPath(String componentId) {
for (NiFiFlowPath path: flowPaths.values()) {
if (path.getProcessComponentIds().contains(componentId)){
return path;
}
}
return null;
}
/**
* Determine if a component should be reported as NiFiFlowPath.
*/
public boolean isProcessComponent(String componentId) {
return isProcessor(componentId) || isRootInputPort(componentId) || isRootOutputPort(componentId);
}
public boolean isProcessor(String componentId) {
return processors.containsKey(componentId);
}
public boolean isInputPort(String componentId) {
return inputPorts.containsKey(componentId);
}
public boolean isOutputPort(String componentId) {
return outputPorts.containsKey(componentId);
}
public boolean isRootInputPort(String componentId) {
return rootInputPorts.containsKey(componentId);
}
public boolean isRootOutputPort(String componentId) {
return rootOutputPorts.containsKey(componentId);
}
public String getProcessComponentName(String componentId) {
return getProcessComponentName(componentId, () -> "unknown");
}
public String getProcessComponentName(String componentId, Supplier<String> unknown) {
return isProcessor(componentId) ? getProcessors().get(componentId).getName()
: isRootInputPort(componentId) ? getRootInputPorts().get(componentId).getName()
: isRootOutputPort(componentId) ? getRootOutputPorts().get(componentId).getName() : unknown.get();
}
/**
* Start tracking changes from current state.
*/
public void startTrackingChanges() {
this.metadataUpdated.set(false);
this.updateAudit.clear();
this.updatedEntityGuids.clear();
this.stillExistingEntityGuids.clear();
this.urlUpdated = false;
}
public boolean isMetadataUpdated() {
return this.metadataUpdated.get();
}
public String toQualifiedName(String componentId) {
return AtlasUtils.toQualifiedName(clusterName, componentId);
}
public enum EntityChangeType {
AS_IS,
CREATED,
UPDATED,
DELETED;
public static boolean containsChange(Collection<EntityChangeType> types) {
return types.contains(CREATED) || types.contains(UPDATED) || types.contains(DELETED);
}
}
private EntityChangeType getEntityChangeType(String guid) {
if (!isGuidAssigned(guid)) {
return EntityChangeType.CREATED;
} else if (updatedEntityGuids.contains(guid)) {
return EntityChangeType.UPDATED;
} else if (!stillExistingEntityGuids.contains(guid)) {
return EntityChangeType.DELETED;
}
return EntityChangeType.AS_IS;
}
public Map<EntityChangeType, List<AtlasEntity>> getChangedDataSetEntities() {
final Map<EntityChangeType, List<AtlasEntity>> changedEntities = Stream
.of(rootInputPortEntities.values().stream(), rootOutputPortEntities.values().stream(), queues.values().stream())
.flatMap(Function.identity())
.collect(Collectors.groupingBy(entity -> getEntityChangeType(entity.getGuid())));
updateAudit.add("CREATED DataSet entities=" + changedEntities.get(EntityChangeType.CREATED));
updateAudit.add("UPDATED DataSet entities=" + changedEntities.get(EntityChangeType.UPDATED));
updateAudit.add("DELETED DataSet entities=" + changedEntities.get(EntityChangeType.DELETED));
return changedEntities;
}
public NiFiFlowPath getOrCreateFlowPath(String pathId) {
traversedPathIds.add(pathId);
return flowPaths.computeIfAbsent(pathId, k -> new NiFiFlowPath(pathId));
}
public boolean isTraversedPath(String pathId) {
return traversedPathIds.contains(pathId);
}
private EntityChangeType getFlowPathChangeType(NiFiFlowPath path) {
if (path.getExEntity() == null) {
return EntityChangeType.CREATED;
} else if (path.isMetadataUpdated() || urlUpdated) {
return EntityChangeType.UPDATED;
} else if (!traversedPathIds.contains(path.getId())) {
return EntityChangeType.DELETED;
}
return EntityChangeType.AS_IS;
}
private EntityChangeType getFlowPathIOChangeType(AtlasObjectId id) {
final String guid = id.getGuid();
if (!isGuidAssigned(guid)) {
return EntityChangeType.CREATED;
} else {
if (TYPE_NIFI_QUEUE.equals(id.getTypeName()) && queues.containsKey(id)) {
// If an input/output is a queue, and it is owned by this NiFiFlow, then check if it's still needed. NiFiFlow knows active queues.
if (stillExistingEntityGuids.contains(guid)) {
return EntityChangeType.AS_IS;
} else {
return EntityChangeType.DELETED;
}
} else {
// Otherwise, do not need to delete.
return EntityChangeType.AS_IS;
}
}
}
private Tuple<EntityChangeType, AtlasEntity> toAtlasEntity(EntityChangeType changeType, final NiFiFlowPath path) {
final AtlasEntity entity = EntityChangeType.CREATED.equals(changeType) ? new AtlasEntity() : new AtlasEntity(path.getExEntity());
entity.setTypeName(TYPE_NIFI_FLOW_PATH);
entity.setVersion(1L);
entity.setAttribute(ATTR_NIFI_FLOW, getAtlasObjectId());
final StringBuilder name = new StringBuilder();
final StringBuilder description = new StringBuilder();
path.getProcessComponentIds().forEach(pid -> {
final String componentName = getProcessComponentName(pid);
if (name.length() > 0) {
name.append(", ");
description.append(", ");
}
name.append(componentName);
description.append(String.format("%s::%s", componentName, pid));
});
path.setName(name.toString());
entity.setAttribute(ATTR_NAME, name.toString());
entity.setAttribute(ATTR_DESCRIPTION, description.toString());
// Use first processor's id as qualifiedName.
entity.setAttribute(ATTR_QUALIFIED_NAME, toQualifiedName(path.getId()));
entity.setAttribute(ATTR_URL, path.createDeepLinkURL(getUrl()));
final boolean inputsChanged = setChangedIOIds(path, entity, true);
final boolean outputsChanged = setChangedIOIds(path, entity, false);
// Even iff there's no flow path metadata changed, if any IO is changed then the pass should be updated.
EntityChangeType finalChangeType = EntityChangeType.AS_IS.equals(changeType)
? (path.isMetadataUpdated() || inputsChanged || outputsChanged ? EntityChangeType.UPDATED : EntityChangeType.AS_IS)
: changeType;
return new Tuple<>(finalChangeType, entity);
}
/**
* Set input or output DataSet ids for a NiFiFlowPath.
* The updated ids only containing active ids.
* @return True if there is any changed IO reference (create, update, delete).
*/
private boolean setChangedIOIds(NiFiFlowPath path, AtlasEntity pathEntity, boolean isInput) {
Set<AtlasObjectId> ids = isInput ? path.getInputs() : path.getOutputs();
String targetAttribute = isInput ? ATTR_INPUTS : ATTR_OUTPUTS;
final Map<EntityChangeType, List<AtlasObjectId>> changedIOIds
= ids.stream().collect(Collectors.groupingBy(this::getFlowPathIOChangeType));
// Remove DELETED references.
final Set<AtlasObjectId> remainingFlowPathIOIds = toRemainingFlowPathIOIds(changedIOIds);
// If references are changed, update it.
if (path.isDataSetReferenceChanged(remainingFlowPathIOIds, isInput)) {
pathEntity.setAttribute(targetAttribute, remainingFlowPathIOIds);
return true;
}
return false;
}
private Set<AtlasObjectId> toRemainingFlowPathIOIds(Map<EntityChangeType, List<AtlasObjectId>> ids) {
return ids.entrySet().stream()
.filter(entry -> !EntityChangeType.DELETED.equals(entry.getKey()))
.flatMap(entry -> entry.getValue().stream())
.collect(Collectors.toSet());
}
public Map<EntityChangeType, List<AtlasEntity>> getChangedFlowPathEntities() {
// Convert NiFiFlowPath to AtlasEntity.
final HashMap<EntityChangeType, List<AtlasEntity>> changedPaths = flowPaths.values().stream()
.map(path -> {
final EntityChangeType changeType = getFlowPathChangeType(path);
switch (changeType) {
case CREATED:
case UPDATED:
case AS_IS:
return toAtlasEntity(changeType, path);
default:
return new Tuple<>(changeType, path.getExEntity());
}
}).collect(Collectors.groupingBy(Tuple::getKey, HashMap::new, Collectors.mapping(Tuple::getValue, Collectors.toList())));
updateAudit.add("CREATED NiFiFlowPath=" + changedPaths.get(EntityChangeType.CREATED));
updateAudit.add("UPDATED NiFiFlowPath=" + changedPaths.get(EntityChangeType.UPDATED));
updateAudit.add("DELETED NiFiFlowPath=" + changedPaths.get(EntityChangeType.DELETED));
return changedPaths;
}
public List<String> getUpdateAudit() {
return updateAudit;
}
}

View File

@ -0,0 +1,223 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.controller.status.ProcessGroupStatus;
import org.apache.nifi.controller.status.ProcessorStatus;
import org.apache.nifi.util.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
public class NiFiFlowAnalyzer {
private static final Logger logger = LoggerFactory.getLogger(NiFiFlowAnalyzer.class);
public void analyzeProcessGroup(NiFiFlow nifiFlow, ProcessGroupStatus rootProcessGroup) {
analyzeProcessGroup(rootProcessGroup, nifiFlow);
analyzeRootGroupPorts(nifiFlow, rootProcessGroup);
}
private void analyzeRootGroupPorts(NiFiFlow nifiFlow, ProcessGroupStatus rootProcessGroup) {
rootProcessGroup.getInputPortStatus().forEach(port -> nifiFlow.addRootInputPort(port));
rootProcessGroup.getOutputPortStatus().forEach(port -> nifiFlow.addRootOutputPort(port));
}
private void analyzeProcessGroup(final ProcessGroupStatus processGroupStatus, final NiFiFlow nifiFlow) {
processGroupStatus.getConnectionStatus().forEach(c -> nifiFlow.addConnection(c));
processGroupStatus.getProcessorStatus().forEach(p -> nifiFlow.addProcessor(p));
processGroupStatus.getRemoteProcessGroupStatus().forEach(r -> nifiFlow.addRemoteProcessGroup(r));
processGroupStatus.getInputPortStatus().forEach(p -> nifiFlow.addInputPort(p));
processGroupStatus.getOutputPortStatus().forEach(p -> nifiFlow.addOutputPort(p));
// Analyze child ProcessGroups recursively.
for (ProcessGroupStatus child : processGroupStatus.getProcessGroupStatus()) {
analyzeProcessGroup(child, nifiFlow);
}
}
private List<String> getIncomingProcessorsIds(NiFiFlow nifiFlow, List<ConnectionStatus> incomingConnections) {
if (incomingConnections == null) {
return Collections.emptyList();
}
final List<String> ids = new ArrayList<>();
incomingConnections.forEach(c -> {
// Ignore self relationship.
final String sourceId = c.getSourceId();
if (!sourceId.equals(c.getDestinationId())) {
if (nifiFlow.isProcessor(sourceId)) {
ids.add(sourceId);
} else {
ids.addAll(getIncomingProcessorsIds(nifiFlow, nifiFlow.getIncomingConnections(sourceId)));
}
}
});
return ids;
}
private List<String> getNextProcessComponent(NiFiFlow nifiFlow, NiFiFlowPath path, String componentId) {
final List<ConnectionStatus> outs = nifiFlow.getOutgoingConnections(componentId);
if (outs == null || outs.isEmpty()) {
return Collections.emptyList();
}
final List<String> nextProcessComponent = new ArrayList<>();
for (ConnectionStatus out : outs) {
final String destinationId = out.getDestinationId();
if (path.getProcessComponentIds().contains(destinationId)) {
// If the connection is pointing back to current path, then skip it to avoid loop.
continue;
}
if (nifiFlow.isProcessComponent(destinationId)) {
nextProcessComponent.add(destinationId);
} else {
nextProcessComponent.addAll(getNextProcessComponent(nifiFlow, path, destinationId));
}
}
return nextProcessComponent;
}
private void traverse(NiFiFlow nifiFlow, NiFiFlowPath path, String componentId) {
// If the pid is RootInputPort of the same NiFi instance, then stop traversing to create separate self S2S path.
// E.g InputPort -> MergeContent, GenerateFlowFile -> InputPort.
if (path.getProcessComponentIds().size() > 0 && nifiFlow.isRootInputPort(componentId)) {
return;
}
// Add known inputs/outputs to/from this processor, such as RootGroupIn/Output port
if (nifiFlow.isProcessComponent(componentId)) {
path.addProcessor(componentId);
}
final List<ConnectionStatus> outs = nifiFlow.getOutgoingConnections(componentId);
if (outs == null || outs.isEmpty()) {
return;
}
// Analyze destination process components.
final List<String> nextProcessComponents = getNextProcessComponent(nifiFlow, path, componentId);
nextProcessComponents.forEach(destPid -> {
if (path.getProcessComponentIds().contains(destPid)) {
// Avoid loop to it self.
return;
}
// If the destination has more than one inputs, or there are multiple destinations,
// then it should be treated as a separate flow path.
final boolean createJointPoint = nextProcessComponents.size() > 1
|| getIncomingProcessorsIds(nifiFlow, nifiFlow.getIncomingConnections(destPid)).size() > 1;
if (createJointPoint) {
final boolean alreadyTraversed = nifiFlow.isTraversedPath(destPid);
// Create an input queue DataSet because Atlas doesn't show lineage if it doesn't have in and out.
// This DataSet is also useful to link flowPaths together on Atlas lineage graph.
final Tuple<AtlasObjectId, AtlasEntity> queueTuple = nifiFlow.getOrCreateQueue(destPid);
final AtlasObjectId queueId = queueTuple.getKey();
path.getOutputs().add(queueId);
// If the destination is already traversed once, it doesn't have to be visited again.
if (alreadyTraversed) {
return;
}
// Get existing or create new one.
final NiFiFlowPath jointPoint = nifiFlow.getOrCreateFlowPath(destPid);
jointPoint.getInputs().add(queueId);
// Start traversing as a new joint point.
traverse(nifiFlow, jointPoint, destPid);
} else {
// Normal relation, continue digging.
traverse(nifiFlow, path, destPid);
}
});
}
private boolean isHeadProcessor(NiFiFlow nifiFlow, List<ConnectionStatus> ins) {
if (ins == null || ins.isEmpty()) {
return true;
}
return ins.stream().allMatch(
in -> {
// If it has incoming relationship from other process components, then return false.
final String sourceId = in.getSourceId();
if (nifiFlow.isProcessComponent(sourceId)) {
return false;
}
// Check next level.
final List<ConnectionStatus> incomingConnections = nifiFlow.getIncomingConnections(sourceId);
return isHeadProcessor(nifiFlow, incomingConnections);
}
);
}
public void analyzePaths(NiFiFlow nifiFlow) {
final String rootProcessGroupId = nifiFlow.getRootProcessGroupId();
// Now let's break it into flow paths.
final Map<String, ProcessorStatus> processors = nifiFlow.getProcessors();
final Set<String> headProcessComponents = processors.keySet().stream()
.filter(pid -> {
final List<ConnectionStatus> ins = nifiFlow.getIncomingConnections(pid);
return isHeadProcessor(nifiFlow, ins);
})
.collect(Collectors.toSet());
// Use RootInputPorts as headProcessors.
headProcessComponents.addAll(nifiFlow.getRootInputPorts().keySet());
headProcessComponents.forEach(startPid -> {
// By using the startPid as its qualifiedName, it's guaranteed that
// the same path will end up being the same Atlas entity.
// However, if the first processor is replaced by another,
// the flow path will have a different id, and the old path is logically deleted.
final NiFiFlowPath path = nifiFlow.getOrCreateFlowPath(startPid);
traverse(nifiFlow, path, startPid);
});
nifiFlow.getFlowPaths().values().forEach(path -> {
if (processors.containsKey(path.getId())) {
final ProcessorStatus processor = processors.get(path.getId());
path.setGroupId(processor.getGroupId());
} else {
path.setGroupId(rootProcessGroupId);
}
});
}
}

View File

@ -0,0 +1,177 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import static org.apache.nifi.atlas.AtlasUtils.updateMetadata;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
public class NiFiFlowPath implements AtlasProcess {
private final List<String> processComponentIds = new ArrayList<>();
private final String id;
private final Set<AtlasObjectId> inputs = new HashSet<>();
private final Set<AtlasObjectId> outputs = new HashSet<>();
private String atlasGuid;
private String name;
private String groupId;
private AtlasEntity exEntity;
private AtomicBoolean metadataUpdated = new AtomicBoolean(false);
private List<String> updateAudit = new ArrayList<>();
private Set<String> existingInputGuids;
private Set<String> existingOutputGuids;
public NiFiFlowPath(String id) {
this.id = id;
}
public NiFiFlowPath(String id, long lineageHash) {
this.id = id + "::" + lineageHash;
}
public AtlasEntity getExEntity() {
return exEntity;
}
public void setExEntity(AtlasEntity exEntity) {
this.exEntity = exEntity;
this.atlasGuid = exEntity.getGuid();
}
public String getAtlasGuid() {
return atlasGuid;
}
public void setAtlasGuid(String atlasGuid) {
this.atlasGuid = atlasGuid;
}
public String getName() {
return name;
}
public void setName(String name) {
updateMetadata(metadataUpdated, updateAudit, ATTR_NAME, this.name, name);
this.name = name;
}
public String getGroupId() {
return groupId;
}
public void setGroupId(String groupId) {
updateMetadata(metadataUpdated, updateAudit, "groupId", this.groupId, groupId);
this.groupId = groupId;
}
public void addProcessor(String processorId) {
processComponentIds.add(processorId);
}
public Set<AtlasObjectId> getInputs() {
return inputs;
}
public Set<AtlasObjectId> getOutputs() {
return outputs;
}
public List<String> getProcessComponentIds() {
return processComponentIds;
}
public String getId() {
return id;
}
public String createDeepLinkURL(String nifiUrl) {
// Remove lineage hash part.
final String componentId = id.split("::")[0];
return componentId.equals(groupId)
// This path represents the root path of a process group.
? String.format("%s?processGroupId=%s", nifiUrl, groupId)
// This path represents a partial flow within a process group consists of processors.
: String.format("%s?processGroupId=%s&componentIds=%s", nifiUrl, groupId, componentId);
}
/**
* Start tracking changes from current state.
*/
public void startTrackingChanges(NiFiFlow nifiFlow) {
this.metadataUpdated.set(false);
this.updateAudit.clear();
existingInputGuids = inputs.stream().map(AtlasObjectId::getGuid).collect(Collectors.toSet());
existingOutputGuids = outputs.stream().map(AtlasObjectId::getGuid).collect(Collectors.toSet());
// Remove all nifi_queues those are owned by the nifiFlow to delete ones no longer exist.
// Because it should be added again if not deleted when flow analysis finished.
final Set<AtlasObjectId> ownedQueues = nifiFlow.getQueues().keySet();
inputs.removeAll(ownedQueues);
outputs.removeAll(ownedQueues);
}
public boolean isMetadataUpdated() {
return this.metadataUpdated.get();
}
public List<String> getUpdateAudit() {
return updateAudit;
}
boolean isDataSetReferenceChanged(Set<AtlasObjectId> ids, boolean isInput) {
final Set<String> guids = ids.stream().map(AtlasObjectId::getGuid).collect(Collectors.toSet());
final Set<String> existingGuids = isInput ? existingInputGuids : existingOutputGuids;
return existingGuids == null || !existingGuids.equals(guids);
}
@Override
public String toString() {
return "NiFiFlowPath{" +
"name='" + name + '\'' +
", inputs=" + inputs +
", outputs=" + outputs +
", processComponentIds=" + processComponentIds +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
NiFiFlowPath that = (NiFiFlowPath) o;
return id.equals(that.id);
}
@Override
public int hashCode() {
return id.hashCode();
}
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.typedef.AtlasEntityDef;
import org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef;
import org.apache.atlas.model.typedef.AtlasStructDef.AtlasConstraintDef;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class NiFiTypes {
public static final String TYPE_ASSET = "Asset";
public static final String TYPE_REFERENCEABLE = "Referenceable";
public static final String TYPE_PROCESS = "Process";
public static final String TYPE_DATASET = "DataSet";
public static final String TYPE_NIFI_COMPONENT = "nifi_component";
public static final String TYPE_NIFI_FLOW = "nifi_flow";
public static final String TYPE_NIFI_FLOW_PATH = "nifi_flow_path";
public static final String TYPE_NIFI_DATA = "nifi_data";
public static final String TYPE_NIFI_QUEUE = "nifi_queue";
public static final String TYPE_NIFI_INPUT_PORT = "nifi_input_port";
public static final String TYPE_NIFI_OUTPUT_PORT = "nifi_output_port";
public static final String ATTR_GUID = "guid";
public static final String ATTR_TYPENAME = "typeName";
public static final String ATTR_NAME = "name";
public static final String ATTR_CLUSTER_NAME = "clusterName";
public static final String ATTR_DESCRIPTION = "description";
public static final String ATTR_INPUTS = "inputs";
public static final String ATTR_OUTPUTS = "outputs";
public static final String ATTR_URL = "url";
public static final String ATTR_URI = "uri";
public static final String ATTR_PATH = "path";
public static final String ATTR_QUALIFIED_NAME = "qualifiedName";
public static final String ATTR_NIFI_FLOW = "nifiFlow";
public static final String ATTR_FLOW_PATHS = "flowPaths";
public static final String ATTR_QUEUES = "queues";
public static final String ATTR_INPUT_PORTS = "inputPorts";
public static final String ATTR_OUTPUT_PORTS = "outputPorts";
@FunctionalInterface
interface EntityDefinition {
void define(AtlasEntityDef entity, Set<String> superTypes, List<AtlasAttributeDef> attributes);
}
private static String arrayOf(String typeName) {
return "array<" + typeName + ">";
}
private static EntityDefinition NIFI_FLOW = (entity, superTypes, attributes) -> {
entity.setVersion(1L);
superTypes.add(TYPE_REFERENCEABLE);
superTypes.add(TYPE_ASSET);
final AtlasAttributeDef url = new AtlasAttributeDef(ATTR_URL, "string");
final AtlasAttributeDef flowPaths = new AtlasAttributeDef(ATTR_FLOW_PATHS, arrayOf(TYPE_NIFI_FLOW_PATH));
flowPaths.setIsOptional(true);
// Set ownedRef so that child flowPaths entities those no longer exist can be deleted when a NiFi is updated.
final AtlasConstraintDef ownedRef = new AtlasConstraintDef("ownedRef");
flowPaths.addConstraint(ownedRef);
final AtlasAttributeDef queues = new AtlasAttributeDef(ATTR_QUEUES, arrayOf(TYPE_NIFI_QUEUE));
queues.setIsOptional(true);
queues.addConstraint(ownedRef);
final AtlasAttributeDef inputPorts = new AtlasAttributeDef(ATTR_INPUT_PORTS, arrayOf(TYPE_NIFI_INPUT_PORT));
inputPorts.setIsOptional(true);
inputPorts.addConstraint(ownedRef);
final AtlasAttributeDef outputPorts = new AtlasAttributeDef(ATTR_OUTPUT_PORTS, arrayOf(TYPE_NIFI_OUTPUT_PORT));
outputPorts.setIsOptional(true);
outputPorts.addConstraint(ownedRef);
attributes.add(url);
attributes.add(flowPaths);
attributes.add(queues);
attributes.add(inputPorts);
attributes.add(outputPorts);
};
private static EntityDefinition NIFI_COMPONENT = (entity, superTypes, attributes) -> {
entity.setVersion(1L);
final AtlasAttributeDef nifiFlow = new AtlasAttributeDef(ATTR_NIFI_FLOW, TYPE_NIFI_FLOW);
nifiFlow.setIsOptional(true);
attributes.add(nifiFlow);
};
private static EntityDefinition NIFI_FLOW_PATH = (entity, superTypes, attributes) -> {
entity.setVersion(1L);
superTypes.add(TYPE_PROCESS);
superTypes.add(TYPE_NIFI_COMPONENT);
final AtlasAttributeDef url = new AtlasAttributeDef(ATTR_URL, "string");
attributes.add(url);
};
private static EntityDefinition NIFI_DATA = (entity, superTypes, attributes) -> {
entity.setVersion(1L);
superTypes.add(TYPE_DATASET);
superTypes.add(TYPE_NIFI_COMPONENT);
};
private static EntityDefinition NIFI_QUEUE = (entity, superTypes, attributes) -> {
entity.setVersion(1L);
superTypes.add(TYPE_DATASET);
superTypes.add(TYPE_NIFI_COMPONENT);
};
private static EntityDefinition NIFI_INPUT_PORT = (entity, superTypes, attributes) -> {
entity.setVersion(1L);
superTypes.add(TYPE_DATASET);
superTypes.add(TYPE_NIFI_COMPONENT);
};
private static EntityDefinition NIFI_OUTPUT_PORT = (entity, superTypes, attributes) -> {
entity.setVersion(1L);
superTypes.add(TYPE_DATASET);
superTypes.add(TYPE_NIFI_COMPONENT);
};
static Map<String, EntityDefinition> ENTITIES = new HashMap<>();
static {
ENTITIES.put(TYPE_NIFI_COMPONENT, NIFI_COMPONENT);
ENTITIES.put(TYPE_NIFI_DATA, NIFI_DATA);
ENTITIES.put(TYPE_NIFI_QUEUE, NIFI_QUEUE);
ENTITIES.put(TYPE_NIFI_INPUT_PORT, NIFI_INPUT_PORT);
ENTITIES.put(TYPE_NIFI_OUTPUT_PORT, NIFI_OUTPUT_PORT);
ENTITIES.put(TYPE_NIFI_FLOW_PATH, NIFI_FLOW_PATH);
ENTITIES.put(TYPE_NIFI_FLOW, NIFI_FLOW);
}
static final String[] NIFI_TYPES = ENTITIES.keySet().toArray(new String[ENTITIES.size()]);
}

View File

@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.provenance.ProvenanceEventType;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Arrays;
public abstract class AbstractNiFiProvenanceEventAnalyzer implements NiFiProvenanceEventAnalyzer {
/**
* Utility method to parse a string uri silently.
* @param uri uri to parse
* @return parsed URI instance
*/
protected URI parseUri(String uri) {
try {
return new URI(uri);
} catch (URISyntaxException e) {
final String msg = String.format("Failed to parse uri %s due to %s", uri, e);
throw new IllegalArgumentException(msg, e);
}
}
/**
* Utility method to parse a string uri silently.
* @param url url to parse
* @return parsed URL instance
*/
protected URL parseUrl(String url) {
try {
return new URL(url);
} catch (MalformedURLException e) {
final String msg = String.format("Failed to parse url %s due to %s", url, e);
throw new IllegalArgumentException(msg, e);
}
}
protected DataSetRefs singleDataSetRef(String componentId, ProvenanceEventType eventType, Referenceable ref) {
final DataSetRefs refs = new DataSetRefs(componentId);
switch (eventType) {
case SEND:
case REMOTE_INVOCATION:
refs.addOutput(ref);
break;
case FETCH:
case RECEIVE:
refs.addInput(ref);
break;
}
return refs;
}
/**
* Utility method to split comma separated host names. Port number will be removed.
*/
protected String[] splitHostNames(String hostNames) {
return Arrays.stream(hostNames.split(","))
.map(hostName -> hostName.split(":")[0].trim())
.toArray(String[]::new);
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance;
import org.apache.nifi.atlas.resolver.ClusterResolver;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.lineage.ComputeLineageResult;
import java.util.List;
public interface AnalysisContext {
String getNiFiClusterName();
ClusterResolver getClusterResolver();
List<ConnectionStatus> findConnectionTo(String componentId);
List<ConnectionStatus> findConnectionFrom(String componentId);
ComputeLineageResult queryLineage(long eventId);
ComputeLineageResult findParents(long eventId);
ProvenanceEventRecord getProvenanceEvent(long eventId);
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance;
import org.apache.atlas.typesystem.Referenceable;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;
public class DataSetRefs {
private final Set<String> componentIds;
private Set<Referenceable> inputs;
private Set<Referenceable> outputs;
private boolean referableFromRootPath;
public DataSetRefs(String componentId) {
this.componentIds = Collections.singleton(componentId);
}
public DataSetRefs(Set<String> componentIds) {
this.componentIds = componentIds;
}
public Set<String> getComponentIds() {
return componentIds;
}
public Set<Referenceable> getInputs() {
return inputs != null ? inputs : Collections.emptySet();
}
public void addInput(Referenceable input) {
if (inputs == null) {
inputs = new LinkedHashSet<>();
}
inputs.add(input);
}
public Set<Referenceable> getOutputs() {
return outputs != null ? outputs : Collections.emptySet();
}
public void addOutput(Referenceable output) {
if (outputs == null) {
outputs = new LinkedHashSet<>();
}
outputs.add(output);
}
public boolean isEmpty() {
return (inputs == null || inputs.isEmpty()) && (outputs == null || outputs.isEmpty());
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
/**
* Responsible for analyzing NiFi provenance event data to generate Atlas DataSet reference.
* Implementations of this interface should be thread safe.
*/
public interface NiFiProvenanceEventAnalyzer {
DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event);
/**
* Returns target component type pattern that this Analyzer supports.
* Note that a component type of NiFi provenance event only has processor type name without package name.
* @return A RegularExpression to match with a component type of a provenance event.
*/
default String targetComponentTypePattern() {
return null;
}
/**
* Returns target transit URI pattern that this Analyzer supports.
* @return A RegularExpression to match with a transit URI of a provenance event.
*/
default String targetTransitUriPattern() {
return null;
}
/**
* Returns target provenance event type that this Analyzer supports.
* @return A Provenance event type
*/
default ProvenanceEventType targetProvenanceEventType() {
return null;
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
public class NiFiProvenanceEventAnalyzerFactory {
/**
* This holder class is used to implement initialization-on-demand holder idiom to avoid double-checked locking anti-pattern.
* The static initializer is performed only once for a class loader.
* See these links for detail:
* <ul>
* <li><a href="https://en.wikipedia.org/wiki/Double-checked_locking">Double-checked locking</a></li>
* <li><a href="https://en.wikipedia.org/wiki/Initialization-on-demand_holder_idiom">Initialization-on-demand holder</a></li>
* </ul>
*/
private static class AnalyzerHolder {
private static final Logger logger = LoggerFactory.getLogger(NiFiProvenanceEventAnalyzerFactory.AnalyzerHolder.class);
private static final Map<Pattern, NiFiProvenanceEventAnalyzer> analyzersForComponentType = new ConcurrentHashMap<>();
private static final Map<Pattern, NiFiProvenanceEventAnalyzer> analyzersForTransitUri = new ConcurrentHashMap<>();
private static final Map<ProvenanceEventType, NiFiProvenanceEventAnalyzer> analyzersForProvenanceEventType = new ConcurrentHashMap<>();
private static void addAnalyzer(String patternStr, Map<Pattern, NiFiProvenanceEventAnalyzer> toAdd,
NiFiProvenanceEventAnalyzer analyzer) {
if (patternStr != null && !patternStr.isEmpty()) {
Pattern pattern = Pattern.compile(patternStr.trim());
toAdd.put(pattern, analyzer);
}
}
static {
logger.debug("Loading NiFiProvenanceEventAnalyzer ...");
final ServiceLoader<NiFiProvenanceEventAnalyzer> serviceLoader
= ServiceLoader.load(NiFiProvenanceEventAnalyzer.class);
serviceLoader.forEach(analyzer -> {
addAnalyzer(analyzer.targetComponentTypePattern(), analyzersForComponentType, analyzer);
addAnalyzer(analyzer.targetTransitUriPattern(), analyzersForTransitUri, analyzer);
final ProvenanceEventType eventType = analyzer.targetProvenanceEventType();
if (eventType != null) {
if (analyzersForProvenanceEventType.containsKey(eventType)) {
logger.warn("Fo ProvenanceEventType {}, an Analyzer {} is already assigned." +
" Only one analyzer for a type can be registered. Ignoring {}",
eventType, analyzersForProvenanceEventType.get(eventType), analyzer);
}
analyzersForProvenanceEventType.put(eventType, analyzer);
}
});
logger.info("Loaded NiFiProvenanceEventAnalyzers: componentTypes={}, transitUris={}", analyzersForComponentType, analyzersForTransitUri);
}
private static Map<Pattern, NiFiProvenanceEventAnalyzer> getAnalyzersForComponentType() {
return analyzersForComponentType;
}
private static Map<Pattern, NiFiProvenanceEventAnalyzer> getAnalyzersForTransitUri() {
return analyzersForTransitUri;
}
private static Map<ProvenanceEventType, NiFiProvenanceEventAnalyzer> getAnalyzersForProvenanceEventType() {
return analyzersForProvenanceEventType;
}
}
/**
* Find and retrieve NiFiProvenanceEventAnalyzer implementation for the specified targets.
* Pattern matching is performed by following order, and the one found at first is returned:
* <ol>
* <li>Component type name. Use an analyzer supporting the Component type with its {@link NiFiProvenanceEventAnalyzer#targetProvenanceEventType()}.
* <li>TransitUri. Use an analyzer supporting the TransitUri with its {@link NiFiProvenanceEventAnalyzer#targetTransitUriPattern()}.
* <li>Provenance Event Type. Use an analyzer supporting the Provenance Event Type with its {@link NiFiProvenanceEventAnalyzer#targetProvenanceEventType()}.
* </ol>
* @param typeName NiFi component type name.
* @param transitUri Transit URI.
* @param eventType Provenance event type.
* @return Instance of NiFiProvenanceEventAnalyzer if one is found for the specified className, otherwise null.
*/
public static NiFiProvenanceEventAnalyzer getAnalyzer(String typeName, String transitUri, ProvenanceEventType eventType) {
for (Map.Entry<Pattern, NiFiProvenanceEventAnalyzer> entry
: NiFiProvenanceEventAnalyzerFactory.AnalyzerHolder.getAnalyzersForComponentType().entrySet()) {
if (entry.getKey().matcher(typeName).matches()) {
return entry.getValue();
}
}
if (transitUri != null) {
for (Map.Entry<Pattern, NiFiProvenanceEventAnalyzer> entry
: NiFiProvenanceEventAnalyzerFactory.AnalyzerHolder.getAnalyzersForTransitUri().entrySet()) {
if (entry.getKey().matcher(transitUri).matches()) {
return entry.getValue();
}
}
}
// If there's no specific implementation, just use generic analyzer.
return NiFiProvenanceEventAnalyzerFactory.AnalyzerHolder.getAnalyzersForProvenanceEventType().get(eventType);
}
}

View File

@ -0,0 +1,136 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance;
import org.apache.nifi.atlas.NiFiFlow;
import org.apache.nifi.atlas.resolver.ClusterResolver;
import org.apache.nifi.authorization.user.NiFiUser;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceRepository;
import org.apache.nifi.provenance.lineage.ComputeLineageResult;
import org.apache.nifi.provenance.lineage.ComputeLineageSubmission;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
public class StandardAnalysisContext implements AnalysisContext {
private final Logger logger = LoggerFactory.getLogger(StandardAnalysisContext.class);
private final NiFiFlow nifiFlow;
private final ClusterResolver clusterResolver;
private final ProvenanceRepository provenanceRepository;
public StandardAnalysisContext(NiFiFlow nifiFlow, ClusterResolver clusterResolver,
ProvenanceRepository provenanceRepository) {
this.nifiFlow = nifiFlow;
this.clusterResolver = clusterResolver;
this.provenanceRepository = provenanceRepository;
}
@Override
public List<ConnectionStatus> findConnectionTo(String componentId) {
return nifiFlow.getIncomingConnections(componentId);
}
@Override
public List<ConnectionStatus> findConnectionFrom(String componentId) {
return nifiFlow.getOutgoingConnections(componentId);
}
@Override
public String getNiFiClusterName() {
return nifiFlow.getClusterName();
}
@Override
public ClusterResolver getClusterResolver() {
return clusterResolver;
}
private ComputeLineageResult getLineageResult(long eventId, ComputeLineageSubmission submission) {
final ComputeLineageResult result = submission.getResult();
try {
if (result.awaitCompletion(10, TimeUnit.SECONDS)) {
return result;
}
logger.warn("Lineage query for {} timed out.", new Object[]{eventId});
} catch (InterruptedException e) {
logger.warn("Lineage query for {} was interrupted due to {}.", new Object[]{eventId, e}, e);
} finally {
submission.cancel();
}
return null;
}
@Override
public ComputeLineageResult queryLineage(long eventId) {
final ComputeLineageSubmission submission = provenanceRepository.submitLineageComputation(eventId, NIFI_USER);
return getLineageResult(eventId, submission);
}
public ComputeLineageResult findParents(long eventId) {
final ComputeLineageSubmission submission = provenanceRepository.submitExpandParents(eventId, NIFI_USER);
return getLineageResult(eventId, submission);
}
// NOTE: This user is required to avoid NullPointerException at PersistentProvenanceRepository.submitLineageComputation
private static final QueryNiFiUser NIFI_USER = new QueryNiFiUser();
private static class QueryNiFiUser implements NiFiUser {
@Override
public String getIdentity() {
return StandardAnalysisContext.class.getSimpleName();
}
@Override
public Set<String> getGroups() {
return Collections.emptySet();
}
@Override
public NiFiUser getChain() {
return null;
}
@Override
public boolean isAnonymous() {
return true;
}
@Override
public String getClientAddress() {
return null;
}
}
@Override
public ProvenanceEventRecord getProvenanceEvent(long eventId) {
try {
return provenanceRepository.getEvent(eventId);
} catch (IOException e) {
logger.error("Failed to get provenance event for {} due to {}", new Object[]{eventId, e}, e);
return null;
}
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AbstractNiFiProvenanceEventAnalyzer;
import org.apache.nifi.util.Tuple;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_CLUSTER_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.toTableNameStr;
public abstract class AbstractHiveAnalyzer extends AbstractNiFiProvenanceEventAnalyzer {
static final String TYPE_DATABASE = "hive_db";
static final String TYPE_TABLE = "hive_table";
static final String ATTR_DB = "db";
protected Referenceable createDatabaseRef(String clusterName, String databaseName) {
final Referenceable ref = new Referenceable(TYPE_DATABASE);
ref.set(ATTR_NAME, databaseName);
ref.set(ATTR_CLUSTER_NAME, clusterName);
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(clusterName, databaseName));
return ref;
}
protected Referenceable createTableRef(String clusterName, Tuple<String, String> tableName) {
final Referenceable ref = new Referenceable(TYPE_TABLE);
ref.set(ATTR_NAME, tableName.getValue());
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(clusterName, toTableNameStr(tableName)));
ref.set(ATTR_DB, createDatabaseRef(clusterName, tableName.getKey()));
return ref;
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.nifi.util.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Collections;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
public class DatabaseAnalyzerUtil {
private static final Logger logger = LoggerFactory.getLogger(DatabaseAnalyzerUtil.class);
public static String ATTR_INPUT_TABLES = "query.input.tables";
public static String ATTR_OUTPUT_TABLES = "query.output.tables";
public static Set<Tuple<String, String>> parseTableNames(String connectedDatabaseName, String tableNamesStr) {
if (tableNamesStr == null || tableNamesStr.isEmpty()) {
return Collections.emptySet();
}
return Arrays.stream(tableNamesStr.split(","))
.map(String::trim).filter(s -> !s.isEmpty())
.map(t -> DatabaseAnalyzerUtil.parseTableName(connectedDatabaseName, t))
.filter(Objects::nonNull)
.collect(Collectors.toSet());
}
private static Tuple<String, String> parseTableName(String connectedDatabaseName, String tableNameStr) {
final String[] tableNameSplit = tableNameStr.split("\\.");
if (tableNameSplit.length != 1 && tableNameSplit.length != 2) {
logger.warn("Unexpected table name format: {}", tableNameStr);
return null;
}
final String databaseName = tableNameSplit.length == 2 ? tableNameSplit[0] : connectedDatabaseName;
final String tableName = tableNameSplit.length == 2 ? tableNameSplit[1] : tableNameSplit[0];
return new Tuple<>(databaseName, tableName);
}
public static String toTableNameStr(Tuple<String, String> tableName) {
return toTableNameStr(tableName.getKey(), tableName.getValue());
}
public static String toTableNameStr(String databaseName, String tableName) {
return String.format("%s.%s", databaseName, tableName);
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AbstractNiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_PATH;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
/**
* Analyze a transit URI as a file system path.
* <li>qualifiedName=/path/fileName@hostname (example: /tmp/dir/filename.txt@host.example.com)
* <li>name=/path/fileName (example: /tmp/dir/filename.txt)
*/
public class FilePath extends AbstractNiFiProvenanceEventAnalyzer {
private static final Logger logger = LoggerFactory.getLogger(FilePath.class);
private static final String TYPE = "fs_path";
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
final Referenceable ref = new Referenceable(TYPE);
final URI uri = parseUri(event.getTransitUri());
final String clusterName;
try {
// use hostname in uri if available for remote path.
final String uriHost = uri.getHost();
final String hostname = StringUtils.isEmpty(uriHost) ? InetAddress.getLocalHost().getHostName() : uriHost;
clusterName = context.getClusterResolver().fromHostNames(hostname);
} catch (UnknownHostException e) {
logger.warn("Failed to get localhost name due to " + e, e);
return null;
}
final String path = uri.getPath();
ref.set(ATTR_NAME, path);
ref.set(ATTR_PATH, path);
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(clusterName, path));
return singleDataSetRef(event.getComponentId(), event.getEventType(), ref);
}
@Override
public String targetTransitUriPattern() {
return "^file:/.+$";
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AbstractNiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_URI;
/**
* Analyze a transit URI as a HBase table.
* <li>qualifiedName=tableName@clusterName (example: myTable@cl1)
* <li>name=tableName (example: myTable)
*/
public class HBaseTable extends AbstractNiFiProvenanceEventAnalyzer {
private static final Logger logger = LoggerFactory.getLogger(HBaseTable.class);
private static final String TYPE = "hbase_table";
// hbase://masterAddress/hbaseTableName/hbaseRowId(optional)
private static final Pattern URI_PATTERN = Pattern.compile("^hbase://([^/]+)/([^/]+)/?.*$");
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
final String transitUri = event.getTransitUri();
final Matcher uriMatcher = URI_PATTERN.matcher(transitUri);
if (!uriMatcher.matches()) {
logger.warn("Unexpected transit URI: {}", new Object[]{transitUri});
return null;
}
final Referenceable ref = new Referenceable(TYPE);
final String[] hostNames = splitHostNames(uriMatcher.group(1));
final String clusterName = context.getClusterResolver().fromHostNames(hostNames);
final String tableName = uriMatcher.group(2);
ref.set(ATTR_NAME, tableName);
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(clusterName, tableName));
// TODO: 'uri' is a mandatory attribute, but what should we set?
ref.set(ATTR_URI, transitUri);
return singleDataSetRef(event.getComponentId(), event.getEventType(), ref);
}
@Override
public String targetTransitUriPattern() {
return "^hbase://.+$";
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AbstractNiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import java.net.URI;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_CLUSTER_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_PATH;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
/**
* Analyze a transit URI as a HDFS path.
* <li>qualifiedName=/path/fileName@clusterName (example: /app/warehouse/hive/db/default@cl1)
* <li>name=/path/fileName (example: /app/warehouse/hive/db/default)
*/
public class HDFSPath extends AbstractNiFiProvenanceEventAnalyzer {
private static final String TYPE = "hdfs_path";
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
final Referenceable ref = new Referenceable(TYPE);
final URI uri = parseUri(event.getTransitUri());
final String clusterName = context.getClusterResolver().fromHostNames(uri.getHost());
final String path = uri.getPath();
ref.set(ATTR_NAME, path);
ref.set(ATTR_PATH, path);
ref.set(ATTR_CLUSTER_NAME, clusterName);
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(clusterName, path));
return singleDataSetRef(event.getComponentId(), event.getEventType(), ref);
}
@Override
public String targetTransitUriPattern() {
return "^hdfs://.+$";
}
}

View File

@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.apache.nifi.util.Tuple;
import java.net.URI;
import java.util.Set;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.ATTR_INPUT_TABLES;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.ATTR_OUTPUT_TABLES;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.parseTableNames;
/**
* Analyze provenance events for Hive2 using JDBC.
* <ul>
* <li>If a Provenance event has 'query.input.tables' or 'query.output.tables' attributes then 'hive_table' DataSet reference is created:
* <ul>
* <li>qualifiedName=tableName@clusterName (example: myTable@cl1)
* <li>name=tableName (example: myTable)
* </ul>
* </li>
* <li>If not, 'hive_database' DataSet reference is created from transit URI:
* <ul>
* <li>qualifiedName=dbName@clusterName (example: default@cl1)
* <li>dbName (example: default)
* </ul>
* </li>
* </ul>
*/
public class Hive2JDBC extends AbstractHiveAnalyzer {
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
// Replace the colon so that the schema in the URI can be parsed correctly.
final String transitUri = event.getTransitUri().replaceFirst("^jdbc:hive2", "jdbc-hive2");
final URI uri = parseUri(transitUri);
final String clusterName = context.getClusterResolver().fromHostNames(uri.getHost());
// Remove the heading '/'
final String path = uri.getPath();
// If uri does not contain database name, then use 'default' as database name.
final String connectedDatabaseName = path == null || path.isEmpty() ? "default" : path.substring(1);
final Set<Tuple<String, String>> inputTables = parseTableNames(connectedDatabaseName, event.getAttribute(ATTR_INPUT_TABLES));
final Set<Tuple<String, String>> outputTables = parseTableNames(connectedDatabaseName, event.getAttribute(ATTR_OUTPUT_TABLES));
if (inputTables.isEmpty() && outputTables.isEmpty()) {
// If input/output tables are unknown, create database level lineage.
return getDatabaseRef(event.getComponentId(), event.getEventType(),
clusterName, connectedDatabaseName);
}
final DataSetRefs refs = new DataSetRefs(event.getComponentId());
addRefs(refs, true, clusterName, inputTables);
addRefs(refs, false, clusterName, outputTables);
return refs;
}
private DataSetRefs getDatabaseRef(String componentId, ProvenanceEventType eventType,
String clusterName, String databaseName) {
final Referenceable ref = createDatabaseRef(clusterName, databaseName);
return singleDataSetRef(componentId, eventType, ref);
}
private void addRefs(DataSetRefs refs, boolean isInput, String clusterName,
Set<Tuple<String, String>> tableNames) {
tableNames.forEach(tableName -> {
final Referenceable ref = createTableRef(clusterName, tableName);
if (isInput) {
refs.addInput(ref);
} else {
refs.addOutput(ref);
}
});
}
@Override
public String targetTransitUriPattern() {
return "^jdbc:hive2://.+$";
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AbstractNiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_URI;
/**
* Analyze a transit URI as a Kafka topic.
* <li>qualifiedName=topicName@clusterName (example: testTopic@cl1)
* <li>name=topicName (example: testTopic)
*/
public class KafkaTopic extends AbstractNiFiProvenanceEventAnalyzer {
private static final Logger logger = LoggerFactory.getLogger(KafkaTopic.class);
private static final String TYPE = "kafka_topic";
private static final String ATTR_TOPIC = "topic";
// PLAINTEXT://0.example.com:6667,1.example.com:6667/topicA
private static final Pattern URI_PATTERN = Pattern.compile("^.+://([^/]+)/(.+)$");
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
final Referenceable ref = new Referenceable(TYPE);
final String transitUri = event.getTransitUri();
if (transitUri == null) {
return null;
}
final Matcher uriMatcher = URI_PATTERN.matcher(transitUri);
if (!uriMatcher.matches()) {
logger.warn("Unexpected transit URI: {}", new Object[]{transitUri});
return null;
}
String clusterName = null;
for (String broker : uriMatcher.group(1).split(",")) {
final String brokerHostname = broker.split(":")[0].trim();
clusterName = context.getClusterResolver().fromHostNames(brokerHostname);
if (clusterName != null && !clusterName.isEmpty()) {
break;
}
}
final String topicName = uriMatcher.group(2);
ref.set(ATTR_NAME, topicName);
ref.set(ATTR_TOPIC, topicName);
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(clusterName, topicName));
ref.set(ATTR_URI, transitUri);
return singleDataSetRef(event.getComponentId(), event.getEventType(), ref);
}
@Override
public String targetComponentTypePattern() {
return "^(Publish|Consume)Kafka.*$";
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT;
/**
* Analyze a transit URI as a NiFi Site-to-Site remote input/output port.
* <li>qualifiedName=remotePortGUID@clusterName (example: 35dbc0ab-015e-1000-144c-a8d71255027d@cl1)
* <li>name=portName (example: input)
*/
public class NiFiRemotePort extends NiFiS2S {
private static final Logger logger = LoggerFactory.getLogger(NiFiRemotePort.class);
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
if (!ProvenanceEventType.SEND.equals(event.getEventType())
&& !ProvenanceEventType.RECEIVE.equals(event.getEventType())) {
return null;
}
final boolean isRemoteInputPort = event.getComponentType().equals("Remote Input Port");
final String type = isRemoteInputPort ? TYPE_NIFI_INPUT_PORT : TYPE_NIFI_OUTPUT_PORT;
final String remotePortId = event.getComponentId();
final S2STransitUrl s2sUrl = parseTransitURL(event.getTransitUri(), context.getClusterResolver());
// Find connections that connects to/from the remote port.
final List<ConnectionStatus> connections = isRemoteInputPort
? context.findConnectionTo(remotePortId)
: context.findConnectionFrom(remotePortId);
if (connections == null || connections.isEmpty()) {
logger.warn("Connection was not found: {}", new Object[]{event});
return null;
}
// The name of remote port can be retrieved from any connection, use the first one.
final ConnectionStatus connection = connections.get(0);
final Referenceable ref = new Referenceable(type);
ref.set(ATTR_NAME, isRemoteInputPort ? connection.getDestinationName() : connection.getSourceName());
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(s2sUrl.clusterName, s2sUrl.targetPortId));
return singleDataSetRef(event.getComponentId(), event.getEventType(), ref);
}
@Override
public String targetComponentTypePattern() {
return "^Remote (In|Out)put Port$";
}
}

View File

@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT;
/**
* Analyze a provenance event as a NiFi RootGroupPort for Site-to-Site communication at the server side.
* <li>qualifiedName=rootPortGUID (example: 35dbc0ab-015e-1000-144c-a8d71255027d)
* <li>name=portName (example: input)
*/
public class NiFiRootGroupPort extends NiFiS2S {
private static final Logger logger = LoggerFactory.getLogger(NiFiRootGroupPort.class);
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
if (!ProvenanceEventType.SEND.equals(event.getEventType())
&& !ProvenanceEventType.RECEIVE.equals(event.getEventType())) {
return null;
}
final boolean isInputPort = event.getComponentType().equals("Input Port");
final String type = isInputPort ? TYPE_NIFI_INPUT_PORT : TYPE_NIFI_OUTPUT_PORT;
final String rootPortId = event.getComponentId();
final S2STransitUrl s2sUrl = parseTransitURL(event.getTransitUri(), context.getClusterResolver());
// Find connections connecting to/from the remote port.
final List<ConnectionStatus> connections = isInputPort
? context.findConnectionFrom(rootPortId)
: context.findConnectionTo(rootPortId);
if (connections == null || connections.isEmpty()) {
logger.warn("Connection was not found: {}", new Object[]{event});
return null;
}
// The name of the port can be retrieved from any connection, use the first one.
final ConnectionStatus connection = connections.get(0);
final Referenceable ref = new Referenceable(type);
ref.set(ATTR_NAME, isInputPort ? connection.getSourceName() : connection.getDestinationName());
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(s2sUrl.clusterName, rootPortId));
return singleDataSetRef(event.getComponentId(), event.getEventType(), ref);
}
@Override
public String targetComponentTypePattern() {
return "^(In|Out)put Port$";
}
}

View File

@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.nifi.atlas.provenance.AbstractNiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.resolver.ClusterResolver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public abstract class NiFiS2S extends AbstractNiFiProvenanceEventAnalyzer {
private static final Logger logger = LoggerFactory.getLogger(NiFiS2S.class);
private static final Pattern RAW_URL_REGEX = Pattern.compile("([0-9a-zA-Z\\-]+)");
private static final Pattern HTTP_URL_REGEX = Pattern.compile(".*/nifi-api/data-transfer/(in|out)put-ports/([[0-9a-zA-Z\\-]]+)/transactions/.*");
protected S2STransitUrl parseTransitURL(String transitUri, ClusterResolver clusterResolver) {
final URL url = parseUrl(transitUri);
final String clusterName = clusterResolver.fromHostNames(url.getHost());
final String targetPortId;
final String protocol = url.getProtocol().toLowerCase();
switch (protocol) {
case "http":
case "https": {
final Matcher uriMatcher = matchUrl(url, HTTP_URL_REGEX);
targetPortId = uriMatcher.group(2);
}
break;
case "nifi": {
final Matcher uriMatcher = matchUrl(url, RAW_URL_REGEX);
targetPortId = uriMatcher.group(1);
}
break;
default:
throw new IllegalArgumentException("Protocol " + protocol + " is not supported as NiFi S2S transit URL.");
}
return new S2STransitUrl(clusterName, targetPortId);
}
private Matcher matchUrl(URL url, Pattern pattern) {
final Matcher uriMatcher = pattern.matcher(url.getPath());
if (!uriMatcher.matches()) {
throw new IllegalArgumentException("Unexpected transit URI: " + url);
}
return uriMatcher;
}
protected static class S2STransitUrl {
final String clusterName;
final String targetPortId;
public S2STransitUrl(String clusterName, String targetPortId) {
this.clusterName = clusterName;
this.targetPortId = targetPortId;
}
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.util.Tuple;
import java.net.URI;
import java.util.Set;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.ATTR_OUTPUT_TABLES;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.parseTableNames;
/**
* Analyze provenance events for PutHiveStreamingProcessor.
* <li>qualifiedName=tableName@clusterName (example: myTable@cl1)
* <li>name=tableName (example: myTable)
*/
public class PutHiveStreaming extends AbstractHiveAnalyzer {
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
final URI uri = parseUri(event.getTransitUri());
final String clusterName = context.getClusterResolver().fromHostNames(uri.getHost());
final Set<Tuple<String, String>> outputTables = parseTableNames(null, event.getAttribute(ATTR_OUTPUT_TABLES));
if (outputTables.isEmpty()) {
return null;
}
final DataSetRefs refs = new DataSetRefs(event.getComponentId());
outputTables.forEach(tableName -> {
final Referenceable ref = createTableRef(clusterName, tableName);
refs.addOutput(ref);
});
return refs;
}
@Override
public String targetComponentTypePattern() {
return "^PutHiveStreaming$";
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import java.util.List;
/**
* Analyze a CREATE event and create 'nifi_data' when there is no specific Analyzer implementation found.
* <li>qualifiedName=NiFiComponentId@clusterName (example: processor GUID@cl1)
* <li>name=NiFiComponentType (example: GenerateFlowFile)
*/
public class Create extends UnknownInput {
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
// Check if this component is a processor that generates data.
final String componentId = event.getComponentId();
final List<ConnectionStatus> incomingConnections = context.findConnectionTo(componentId);
if (incomingConnections != null && !incomingConnections.isEmpty()) {
return null;
}
return super.analyze(context, event);
}
@Override
public ProvenanceEventType targetProvenanceEventType() {
return ProvenanceEventType.CREATE;
}
}

View File

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.nifi.provenance.ProvenanceEventType;
/**
* Analyze a FETCH event and create 'nifi_data' when there is no specific Analyzer implementation found.
* <li>qualifiedName=NiFiComponentId@clusterName (example: processor GUID@cl1)
* <li>name=NiFiComponentType (example: FetchXXX)
*/
public class Fetch extends UnknownInput {
@Override
public ProvenanceEventType targetProvenanceEventType() {
return ProvenanceEventType.FETCH;
}
}

View File

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.nifi.provenance.ProvenanceEventType;
/**
* Analyze a RECEIVE event and create 'nifi_data' when there is no specific Analyzer implementation found.
* <li>qualifiedName=NiFiComponentId@clusterName (example: processor GUID@cl1)
* <li>name=NiFiComponentType (example: GetXXX)
*/
public class Receive extends UnknownInput {
@Override
public ProvenanceEventType targetProvenanceEventType() {
return ProvenanceEventType.RECEIVE;
}
}

View File

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.nifi.provenance.ProvenanceEventType;
/**
* Analyze a REMOTE_INVOCATION event and create 'nifi_data' when there is no specific Analyzer implementation found.
* <li>qualifiedName=NiFiComponentId@clusterName (example: processor GUID@cl1)
* <li>name=NiFiComponentType (example: XXX)
*/
public class RemoteInvocation extends UnknownOutput {
@Override
public ProvenanceEventType targetProvenanceEventType() {
return ProvenanceEventType.REMOTE_INVOCATION;
}
}

View File

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.nifi.provenance.ProvenanceEventType;
/**
* Analyze a SEND event and create 'nifi_data' when there is no specific Analyzer implementation found.
* <li>qualifiedName=NiFiComponentId@clusterName (example: processor GUID@cl1)
* <li>name=NiFiComponentType (example: PutXXX)
*/
public class Send extends UnknownOutput {
@Override
public ProvenanceEventType targetProvenanceEventType() {
return ProvenanceEventType.SEND;
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AbstractNiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_DESCRIPTION;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
public abstract class UnknownDataSet extends AbstractNiFiProvenanceEventAnalyzer {
protected static final String TYPE = "nifi_data";
protected Referenceable createDataSetRef(AnalysisContext context, ProvenanceEventRecord event) {
final Referenceable ref = new Referenceable(TYPE);
ref.set(ATTR_NAME, event.getComponentType());
ref.set(ATTR_QUALIFIED_NAME, toQualifiedName(context.getNiFiClusterName(), event.getComponentId()));
ref.set(ATTR_DESCRIPTION, event.getEventType() + " was performed by " + event.getComponentType());
return ref;
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
public abstract class UnknownInput extends UnknownDataSet {
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
final String componentId = event.getComponentId();
final DataSetRefs refs = new DataSetRefs(componentId);
final Referenceable ref = createDataSetRef(context, event);
refs.addInput(ref);
return refs;
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer.unknown;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
public abstract class UnknownOutput extends UnknownDataSet {
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
final String componentId = event.getComponentId();
final DataSetRefs refs = new DataSetRefs(componentId);
final Referenceable ref = createDataSetRef(context, event);
refs.addOutput(ref);
return refs;
}
}

View File

@ -0,0 +1,178 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.lineage;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.notification.hook.HookNotification;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.NiFiFlow;
import org.apache.nifi.atlas.NiFiFlowPath;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.apache.nifi.atlas.AtlasUtils.toStr;
import static org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName;
import static org.apache.nifi.atlas.NiFiAtlasHook.NIFI_USER;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NIFI_FLOW;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_URL;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH;
public abstract class AbstractLineageStrategy implements LineageStrategy {
protected Logger logger = LoggerFactory.getLogger(getClass());
private LineageContext lineageContext;
public void setLineageContext(LineageContext lineageContext) {
this.lineageContext = lineageContext;
}
protected DataSetRefs executeAnalyzer(AnalysisContext analysisContext, ProvenanceEventRecord event) {
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(event.getComponentType(), event.getTransitUri(), event.getEventType());
if (analyzer == null) {
return null;
}
if (logger.isDebugEnabled()) {
logger.debug("Analyzer {} is found for event: {}", analyzer, event);
}
return analyzer.analyze(analysisContext, event);
}
protected void addDataSetRefs(NiFiFlow nifiFlow, DataSetRefs refs) {
final Set<NiFiFlowPath> flowPaths = refs.getComponentIds().stream()
.map(componentId -> {
final NiFiFlowPath flowPath = nifiFlow.findPath(componentId);
if (flowPath == null) {
logger.warn("FlowPath for {} was not found.", componentId);
}
return flowPath;
})
.filter(Objects::nonNull)
.collect(Collectors.toSet());
addDataSetRefs(nifiFlow, flowPaths, refs);
}
protected void addDataSetRefs(NiFiFlow nifiFlow, Set<NiFiFlowPath> flowPaths, DataSetRefs refs) {
// create reference to NiFi flow path.
final Referenceable flowRef = toReferenceable(nifiFlow);
final String clusterName = nifiFlow.getClusterName();
final String url = nifiFlow.getUrl();
for (NiFiFlowPath flowPath : flowPaths) {
final Referenceable flowPathRef = toReferenceable(flowPath, flowRef, clusterName, url);
addDataSetRefs(refs, flowPathRef);
}
}
private Referenceable toReferenceable(NiFiFlow nifiFlow) {
final Referenceable flowRef = new Referenceable(TYPE_NIFI_FLOW);
flowRef.set(ATTR_NAME, nifiFlow.getFlowName());
flowRef.set(ATTR_QUALIFIED_NAME, nifiFlow.getQualifiedName());
flowRef.set(ATTR_URL, nifiFlow.getUrl());
return flowRef;
}
protected Referenceable toReferenceable(NiFiFlowPath flowPath, NiFiFlow nifiFlow) {
return toReferenceable(flowPath, toReferenceable(nifiFlow),
nifiFlow.getClusterName(), nifiFlow.getUrl());
}
private Referenceable toReferenceable(NiFiFlowPath flowPath, Referenceable flowRef, String clusterName, String nifiUrl) {
final Referenceable flowPathRef = new Referenceable(TYPE_NIFI_FLOW_PATH);
flowPathRef.set(ATTR_NAME, flowPath.getName());
flowPathRef.set(ATTR_QUALIFIED_NAME, flowPath.getId() + "@" + clusterName);
flowPathRef.set(ATTR_NIFI_FLOW, flowRef);
flowPathRef.set(ATTR_URL, flowPath.createDeepLinkURL(nifiUrl));
// Referenceable has to have GUID assigned, otherwise it will not be stored due to lack of required attribute.
// If a Referencible has GUID, Atlas does not validate all required attributes.
flowPathRef.set(ATTR_INPUTS, flowPath.getInputs().stream().map(this::toReferenceable).collect(Collectors.toList()));
flowPathRef.set(ATTR_OUTPUTS, flowPath.getOutputs().stream().map(this::toReferenceable).collect(Collectors.toList()));
return flowPathRef;
}
private Referenceable toReferenceable(AtlasObjectId id) {
return StringUtils.isEmpty(id.getGuid())
? new Referenceable(id.getTypeName(), id.getUniqueAttributes())
: new Referenceable(id.getGuid(), id.getTypeName(), id.getUniqueAttributes());
}
protected void createEntity(Referenceable ... entities) {
final HookNotification.EntityCreateRequest msg = new HookNotification.EntityCreateRequest(NIFI_USER, entities);
lineageContext.addMessage(msg);
}
@SuppressWarnings("unchecked")
private boolean addDataSetRefs(Set<Referenceable> refsToAdd, Referenceable nifiFlowPath, String targetAttribute) {
if (refsToAdd != null && !refsToAdd.isEmpty()) {
// If nifiFlowPath already has a given dataSetRef, then it needs not to be created.
final Function<Referenceable, String> toTypedQualifiedName = ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME)));
final Collection<Referenceable> refs = Optional.ofNullable((Collection<Referenceable>) nifiFlowPath.get(targetAttribute)).orElseGet(ArrayList::new);
final Set<String> existingRefTypedQualifiedNames = refs.stream().map(toTypedQualifiedName).collect(Collectors.toSet());
refsToAdd.stream().filter(ref -> !existingRefTypedQualifiedNames.contains(toTypedQualifiedName.apply(ref)))
.forEach(ref -> {
if (ref.getId().isUnassigned()) {
// Create new entity.
logger.debug("Found a new DataSet reference from {} to {}, sending an EntityCreateRequest",
new Object[]{toTypedQualifiedName.apply(nifiFlowPath), toTypedQualifiedName.apply(ref)});
final HookNotification.EntityCreateRequest createDataSet = new HookNotification.EntityCreateRequest(NIFI_USER, ref);
lineageContext.addMessage(createDataSet);
}
refs.add(ref);
});
if (refs.size() > existingRefTypedQualifiedNames.size()) {
// Something has been added.
nifiFlowPath.set(targetAttribute, refs);
return true;
}
}
return false;
}
protected void addDataSetRefs(DataSetRefs dataSetRefs, Referenceable flowPathRef) {
final boolean inputsAdded = addDataSetRefs(dataSetRefs.getInputs(), flowPathRef, ATTR_INPUTS);
final boolean outputsAdded = addDataSetRefs(dataSetRefs.getOutputs(), flowPathRef, ATTR_OUTPUTS);
if (inputsAdded || outputsAdded) {
lineageContext.addMessage(new HookNotification.EntityPartialUpdateRequest(NIFI_USER, TYPE_NIFI_FLOW_PATH,
ATTR_QUALIFIED_NAME, (String) flowPathRef.get(ATTR_QUALIFIED_NAME), flowPathRef));
}
}
}

View File

@ -0,0 +1,255 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.lineage;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.NiFiFlow;
import org.apache.nifi.atlas.NiFiFlowPath;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.apache.nifi.provenance.lineage.ComputeLineageResult;
import org.apache.nifi.provenance.lineage.LineageNode;
import org.apache.nifi.provenance.lineage.LineageNodeType;
import org.apache.nifi.util.Tuple;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.CRC32;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.AtlasUtils.toStr;
import static org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE;
import static org.apache.nifi.provenance.ProvenanceEventType.DROP;
public class CompleteFlowPathLineage extends AbstractLineageStrategy {
@Override
public ProvenanceEventType[] getTargetEventTypes() {
return new ProvenanceEventType[]{DROP};
}
@Override
public void processEvent(AnalysisContext analysisContext, NiFiFlow nifiFlow, ProvenanceEventRecord event) {
if (!ProvenanceEventType.DROP.equals(event.getEventType())) {
return;
}
final ComputeLineageResult lineage = analysisContext.queryLineage(event.getEventId());
// Construct a tree model to traverse backwards.
final Map<String, List<LineageNode>> lineageTree = new HashMap<>();
analyzeLineageTree(lineage, lineageTree);
final LineagePath lineagePath = new LineagePath();
extractLineagePaths(analysisContext, lineageTree, lineagePath, event);
analyzeLineagePath(analysisContext, lineagePath);
// Input and output data set are both required to report lineage.
List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths = new ArrayList<>();
if (lineagePath.isComplete()) {
createCompleteFlowPath(nifiFlow, lineagePath, createdFlowPaths);
for (Tuple<NiFiFlowPath, DataSetRefs> createdFlowPath : createdFlowPaths) {
final NiFiFlowPath flowPath = createdFlowPath.getKey();
createEntity(toReferenceable(flowPath, nifiFlow));
addDataSetRefs(nifiFlow, Collections.singleton(flowPath), createdFlowPath.getValue());
}
createdFlowPaths.clear();
}
}
private List<LineageNode> findParentEvents(Map<String, List<LineageNode>> lineageTree, ProvenanceEventRecord event) {
List<LineageNode> parentNodes = lineageTree.get(String.valueOf(event.getEventId()));
return parentNodes == null || parentNodes.isEmpty() ? null : parentNodes.stream()
// In case it's not a provenance event (i.e. FLOWFILE_NODE), get one level higher parents.
.flatMap(n -> !LineageNodeType.PROVENANCE_EVENT_NODE.equals(n.getNodeType())
? lineageTree.get(n.getIdentifier()).stream() : Stream.of(n))
.collect(Collectors.toList());
}
private void extractLineagePaths(AnalysisContext context, Map<String, List<LineageNode>> lineageTree,
LineagePath lineagePath, ProvenanceEventRecord lastEvent) {
lineagePath.getEvents().add(lastEvent);
List<LineageNode> parentEvents = findParentEvents(lineageTree, lastEvent);
final boolean createSeparateParentPath = lineagePath.shouldCreateSeparatePath(lastEvent.getEventType());
if (createSeparateParentPath && (parentEvents == null || parentEvents.isEmpty())) {
// Try expanding the lineage.
// This is for the FlowFiles those are FORKed (or JOINed ... etc) other FlowFile(s).
// FlowFiles routed to 'original' may have these event types, too, however they have parents fetched together.
// For example, with these inputs: CREATE(F1), FORK (F1 -> F2, F3), DROP(F1), SEND (F2), SEND(F3), DROP(F2), DROP(F3)
// Then when DROP(F1) is queried, FORK(F1) and CREATE(F1) are returned.
// For DROP(F2), SEND(F2) and FORK(F2) are returned.
// For DROP(F3), SEND(F3) and FORK(F3) are returned.
// In this case, FORK(F2) and FORK(F3) have to query their parents again, to get CREATE(F1).
final ComputeLineageResult joinedParents = context.findParents(lastEvent.getEventId());
analyzeLineageTree(joinedParents, lineageTree);
parentEvents = findParentEvents(lineageTree, lastEvent);
}
if (parentEvents == null || parentEvents.isEmpty()) {
logger.debug("{} does not have any parent, stop extracting lineage path.", lastEvent);
return;
}
if (createSeparateParentPath) {
// Treat those as separated lineage_path
parentEvents.stream()
.map(parentEvent -> context.getProvenanceEvent(Long.parseLong(parentEvent.getIdentifier())))
.filter(Objects::nonNull)
.forEach(parent -> {
final LineagePath parentPath = new LineagePath();
lineagePath.getParents().add(parentPath);
extractLineagePaths(context, lineageTree, parentPath, parent);
});
} else {
// Simply traverse upwards.
if (parentEvents.size() > 1) {
throw new IllegalStateException(String.format("Having more than 1 parents for event type %s" +
" is not expected. Should ask NiFi developer for investigation. %s",
lastEvent.getEventType(), lastEvent));
}
final ProvenanceEventRecord parentEvent = context.getProvenanceEvent(Long.parseLong(parentEvents.get(0).getIdentifier()));
if (parentEvent != null) {
extractLineagePaths(context, lineageTree, lineagePath, parentEvent);
}
}
}
private void analyzeLineagePath(AnalysisContext analysisContext, LineagePath lineagePath) {
final List<ProvenanceEventRecord> events = lineagePath.getEvents();
final DataSetRefs parentRefs = new DataSetRefs(events.get(0).getComponentId());
events.forEach(event -> {
final DataSetRefs refs = executeAnalyzer(analysisContext, event);
if (refs == null || refs.isEmpty()) {
return;
}
refs.getInputs().forEach(parentRefs::addInput);
refs.getOutputs().forEach(parentRefs::addOutput);
});
lineagePath.setRefs(parentRefs);
// Analyse parents.
lineagePath.getParents().forEach(parent -> analyzeLineagePath(analysisContext, parent));
}
private void analyzeLineageTree(ComputeLineageResult lineage, Map<String, List<LineageNode>> lineageTree) {
lineage.getEdges().forEach(edge -> lineageTree
.computeIfAbsent(edge.getDestination().getIdentifier(), k -> new ArrayList<>())
.add(edge.getSource()));
}
/**
* Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
*
* <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
* in order to distinguish FlowPaths based on the complete path for a given FlowFile.
* For example, even if two lineagePaths have identical componentIds/inputs/outputs,
* if those parents have different inputs, those should be treated as different paths.</p>
*
* @param nifiFlow A reference to current NiFiFlow
* @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
* @param createdFlowPaths A list to buffer created NiFiFlowPaths,
* in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
*/
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
final List<ProvenanceEventRecord> events = lineagePath.getEvents();
Collections.reverse(events);
final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
final String firstComponentId = events.get(0).getComponentId();
final DataSetRefs dataSetRefs = lineagePath.getRefs();
// Process parents first.
Referenceable queueBetweenParent = null;
if (!lineagePath.getParents().isEmpty()) {
// Add queue between this lineage path and parent.
queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
// The first event knows why this lineage has parents, e.g. FORK or JOIN.
final String firstEventType = events.get(0).getEventType().name();
queueBetweenParent.set(ATTR_NAME, firstEventType);
dataSetRefs.addInput(queueBetweenParent);
for (LineagePath parent : lineagePath.getParents()) {
parent.getRefs().addOutput(queueBetweenParent);
createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
}
}
// Create a variant path.
// Calculate a hash from component_ids and input and output resource ids.
final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs()
.stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
final CRC32 crc32 = new CRC32();
crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty)
.sorted().distinct()
.collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
final long hash = crc32.getValue();
lineagePath.setLineagePathHash(hash);
final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
// In order to differentiate a queue between parents and this flow_path, add the hash into the queue qname.
// E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
if (queueBetweenParent != null) {
queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
}
// If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
String previousComponentId = null;
List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
for (ProvenanceEventRecord event : events) {
if (!event.getComponentId().equals(previousComponentId)) {
uniqueEventsForName.add(event);
}
previousComponentId = event.getComponentId();
}
final String pathName = uniqueEventsForName.stream()
// Processor name can be configured by user and more meaningful if available.
// If the component is already removed, it may not be available here.
.map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType))
.collect(Collectors.joining(", "));
flowPath.setName(pathName);
final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
// To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
}

View File

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.lineage;
import org.apache.atlas.notification.hook.HookNotification;
public interface LineageContext {
void addMessage(HookNotification.HookNotificationMessage message);
}

View File

@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.lineage;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import java.util.ArrayList;
import java.util.List;
public class LineagePath {
private List<ProvenanceEventRecord> events = new ArrayList<>();
private List<LineagePath> parents = new ArrayList<>();
private DataSetRefs refs;
private long lineagePathHash;
/**
* NOTE: The list contains provenance events in reversed order, i.e. the last one first.
*/
public List<ProvenanceEventRecord> getEvents() {
return events;
}
public List<LineagePath> getParents() {
return parents;
}
public DataSetRefs getRefs() {
return refs;
}
public void setRefs(DataSetRefs refs) {
this.refs = refs;
}
public boolean shouldCreateSeparatePath(ProvenanceEventType eventType) {
switch (eventType) {
case CLONE:
case JOIN:
case FORK:
case REPLAY:
return true;
}
return false;
}
public boolean isComplete() {
// If the FlowFile is DROPed right after create child FlowFile, then the path is not worth for reporting.
final boolean isDroppedImmediately = events.size() == 2
&& events.get(0).getEventType().equals(ProvenanceEventType.DROP)
&& shouldCreateSeparatePath(events.get(1).getEventType());
return !isDroppedImmediately && hasInput() && hasOutput();
}
public boolean hasInput() {
return (refs != null && !refs.getInputs().isEmpty()) || parents.stream().anyMatch(parent -> parent.hasInput());
}
public boolean hasOutput() {
return (refs != null && !refs.getOutputs().isEmpty()) || parents.stream().anyMatch(parent -> parent.hasOutput());
}
public long getLineagePathHash() {
return lineagePathHash;
}
public void setLineagePathHash(long lineagePathHash) {
this.lineagePathHash = lineagePathHash;
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.lineage;
import org.apache.nifi.atlas.NiFiFlow;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
public interface LineageStrategy {
default ProvenanceEventType[] getTargetEventTypes(){
return new ProvenanceEventType[0];
}
void setLineageContext(LineageContext lineageContext);
void processEvent(AnalysisContext analysisContext, NiFiFlow nifiFlow, ProvenanceEventRecord event);
}

View File

@ -0,0 +1,205 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.lineage;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.NiFiFlow;
import org.apache.nifi.atlas.NiFiFlowPath;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.lineage.ComputeLineageResult;
import org.apache.nifi.provenance.lineage.LineageEdge;
import org.apache.nifi.provenance.lineage.LineageNode;
import org.apache.nifi.provenance.lineage.LineageNodeType;
import java.util.List;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE;
public class SimpleFlowPathLineage extends AbstractLineageStrategy {
@Override
public void processEvent(AnalysisContext analysisContext, NiFiFlow nifiFlow, ProvenanceEventRecord event) {
final DataSetRefs refs = executeAnalyzer(analysisContext, event);
if (refs == null || (refs.isEmpty())) {
return;
}
if ("Remote Input Port".equals(event.getComponentType()) || "Remote Output Port".equals(event.getComponentType())) {
processRemotePortEvent(analysisContext, nifiFlow, event, refs);
} else {
addDataSetRefs(nifiFlow, refs);
}
}
/**
* Create a flow_path entity corresponding to the target RemoteGroupPort when a SEND/RECEIVE event are received.
* Because such entity can not be created in advance while analyzing flow statically,
* as ReportingTask can not determine whether a component id is a RemoteGroupPort,
* since connectionStatus is the only available information in ReportingContext.
* ConnectionStatus only knows component id, component type is unknown.
* For example, there is no difference to tell if a connected component is a funnel or a RemoteGroupPort.
*/
private void processRemotePortEvent(AnalysisContext analysisContext, NiFiFlow nifiFlow, ProvenanceEventRecord event, DataSetRefs analyzedRefs) {
final boolean isRemoteInputPort = "Remote Input Port".equals(event.getComponentType());
// Create a RemoteInputPort Process.
// event.getComponentId returns UUID for RemoteGroupPort as a client of S2S, and it's different from a remote port UUID (portDataSetid).
// See NIFI-4571 for detail.
final Referenceable remotePortDataSet = isRemoteInputPort ? analyzedRefs.getOutputs().iterator().next() : analyzedRefs.getInputs().iterator().next();
final String portProcessId = event.getComponentId();
final NiFiFlowPath remotePortProcess = new NiFiFlowPath(portProcessId);
remotePortProcess.setName(event.getComponentType());
remotePortProcess.addProcessor(portProcessId);
// For RemoteInputPort, need to find the previous component connected to this port,
// which passed this particular FlowFile.
// That is only possible by calling lineage API.
if (isRemoteInputPort) {
final ProvenanceEventRecord previousEvent = findPreviousProvenanceEvent(analysisContext, event);
if (previousEvent == null) {
logger.warn("Previous event was not found: {}", new Object[]{event});
return;
}
// Set groupId from incoming connection if available.
final List<ConnectionStatus> incomingConnections = nifiFlow.getIncomingConnections(portProcessId);
if (incomingConnections == null || incomingConnections.isEmpty()) {
logger.warn("Incoming relationship was not found: {}", new Object[]{event});
return;
}
final ConnectionStatus connection = incomingConnections.get(0);
remotePortProcess.setGroupId(connection.getGroupId());
final Referenceable remotePortProcessRef = toReferenceable(remotePortProcess, nifiFlow);
createEntity(remotePortProcessRef);
// Create a queue.
Referenceable queueFromStaticFlowPathToRemotePortProcess = new Referenceable(TYPE_NIFI_QUEUE);
queueFromStaticFlowPathToRemotePortProcess.set(ATTR_NAME, "queue");
queueFromStaticFlowPathToRemotePortProcess.set(ATTR_QUALIFIED_NAME, nifiFlow.toQualifiedName(portProcessId));
// Create lineage: Static flow_path -> queue
DataSetRefs staticFlowPathRefs = new DataSetRefs(previousEvent.getComponentId());
staticFlowPathRefs.addOutput(queueFromStaticFlowPathToRemotePortProcess);
addDataSetRefs(nifiFlow, staticFlowPathRefs);
// Create lineage: Queue -> RemoteInputPort process -> RemoteInputPort dataSet
DataSetRefs remotePortRefs = new DataSetRefs(portProcessId);
remotePortRefs.addInput(queueFromStaticFlowPathToRemotePortProcess);
remotePortRefs.addOutput(remotePortDataSet);
addDataSetRefs(remotePortRefs, remotePortProcessRef);
} else {
// For RemoteOutputPort, it's possible that multiple processors are connected.
// In that case, the received FlowFile is cloned and passed to each connection.
// So we need to create multiple DataSetRefs.
final List<ConnectionStatus> connections = nifiFlow.getOutgoingConnections(portProcessId);
if (connections == null || connections.isEmpty()) {
logger.warn("Incoming connection was not found: {}", new Object[]{event});
return;
}
// Set group id from outgoing connection if available.
remotePortProcess.setGroupId(connections.get(0).getGroupId());
final Referenceable remotePortProcessRef = toReferenceable(remotePortProcess, nifiFlow);
createEntity(remotePortProcessRef);
// Create lineage: RemoteOutputPort dataSet -> RemoteOutputPort process
DataSetRefs remotePortRefs = new DataSetRefs(portProcessId);
remotePortRefs.addInput(remotePortDataSet);
addDataSetRefs(remotePortRefs, remotePortProcessRef);
for (ConnectionStatus connection : connections) {
final String destinationId = connection.getDestinationId();
final NiFiFlowPath destFlowPath = nifiFlow.findPath(destinationId);
if (destFlowPath == null) {
// If the destination of a connection is a Remote Input Port,
// then its corresponding flow path may not be created yet.
// In such direct RemoteOutputPort to RemoteInputPort case, do not add a queue from this RemoteOutputPort
// as a queue will be created by the connected RemoteInputPort to connect this RemoteOutputPort.
continue;
}
// Create a queue.
Referenceable queueFromRemotePortProcessToStaticFlowPath = new Referenceable(TYPE_NIFI_QUEUE);
queueFromRemotePortProcessToStaticFlowPath.set(ATTR_NAME, "queue");
queueFromRemotePortProcessToStaticFlowPath.set(ATTR_QUALIFIED_NAME, nifiFlow.toQualifiedName(destinationId));
// Create lineage: Queue -> Static flow_path
DataSetRefs staticFlowPathRefs = new DataSetRefs(destinationId);
staticFlowPathRefs.addInput(queueFromRemotePortProcessToStaticFlowPath);
addDataSetRefs(nifiFlow, staticFlowPathRefs);
// Create lineage: RemoteOutputPort dataSet -> RemoteOutputPort process -> Queue
remotePortRefs.addOutput(queueFromRemotePortProcessToStaticFlowPath);
addDataSetRefs(remotePortRefs, remotePortProcessRef);
}
// Add RemoteOutputPort process, so that it can be found even if it is connected to RemoteInputPort directory without any processor in between.
nifiFlow.getFlowPaths().put(remotePortProcess.getId(), remotePortProcess);
}
}
private ProvenanceEventRecord findPreviousProvenanceEvent(AnalysisContext context, ProvenanceEventRecord event) {
final ComputeLineageResult lineage = context.queryLineage(event.getEventId());
if (lineage == null) {
logger.warn("Lineage was not found: {}", new Object[]{event});
return null;
}
// If no previous provenance node found due to expired or other reasons, just log a warning msg and do nothing.
final LineageNode previousProvenanceNode = traverseLineage(lineage, String.valueOf(event.getEventId()));
if (previousProvenanceNode == null) {
logger.warn("Traverse lineage could not find any preceding provenance event node: {}", new Object[]{event});
return null;
}
final long previousEventId = Long.parseLong(previousProvenanceNode.getIdentifier());
return context.getProvenanceEvent(previousEventId);
}
/**
* Recursively traverse lineage graph until a preceding provenance event is found.
*/
private LineageNode traverseLineage(ComputeLineageResult lineage, String eventId) {
final LineageNode previousNode = lineage.getEdges().stream()
.filter(edge -> edge.getDestination().getIdentifier().equals(String.valueOf(eventId)))
.findFirst().map(LineageEdge::getSource).orElse(null);
if (previousNode == null) {
return null;
}
if (previousNode.getNodeType().equals(LineageNodeType.PROVENANCE_EVENT_NODE)) {
return previousNode;
}
return traverseLineage(lineage, previousNode.getIdentifier());
}
}

View File

@ -0,0 +1,714 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.reporting;
import com.sun.jersey.api.client.ClientResponse;
import org.apache.atlas.AtlasServiceException;
import org.apache.commons.lang3.StringUtils;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.config.SslConfigs;
import org.apache.nifi.annotation.behavior.DynamicProperty;
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
import org.apache.nifi.annotation.behavior.Stateful;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnScheduled;
import org.apache.nifi.annotation.lifecycle.OnUnscheduled;
import org.apache.nifi.atlas.NiFiAtlasHook;
import org.apache.nifi.atlas.NiFiAtlasClient;
import org.apache.nifi.atlas.NiFiFlow;
import org.apache.nifi.atlas.NiFiFlowAnalyzer;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.StandardAnalysisContext;
import org.apache.nifi.atlas.provenance.lineage.CompleteFlowPathLineage;
import org.apache.nifi.atlas.provenance.lineage.LineageStrategy;
import org.apache.nifi.atlas.provenance.lineage.SimpleFlowPathLineage;
import org.apache.nifi.atlas.resolver.ClusterResolver;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.atlas.resolver.RegexClusterResolver;
import org.apache.nifi.atlas.security.AtlasAuthN;
import org.apache.nifi.atlas.security.Basic;
import org.apache.nifi.atlas.security.Kerberos;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.PropertyValue;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.state.Scope;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.controller.status.ProcessGroupStatus;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceRepository;
import org.apache.nifi.reporting.AbstractReportingTask;
import org.apache.nifi.reporting.EventAccess;
import org.apache.nifi.reporting.ReportingContext;
import org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer;
import org.apache.nifi.ssl.SSLContextService;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.ServiceLoader;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Stream;
import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer.PROVENANCE_BATCH_SIZE;
import static org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer.PROVENANCE_START_POSITION;
@Tags({"atlas", "lineage"})
@CapabilityDescription("Publishes NiFi flow data set level lineage to Apache Atlas." +
" By reporting flow information to Atlas, an end-to-end Process and DataSet lineage such as across NiFi environments and other systems" +
" connected by technologies, for example NiFi Site-to-Site, Kafka topic or Hive tables." +
" There are limitations and required configurations for both NiFi and Atlas. See 'Additional Details' for further description.")
@Stateful(scopes = Scope.LOCAL, description = "Stores the Reporting Task's last event Id so that on restart the task knows where it left off.")
@DynamicProperty(name = "hostnamePattern.<ClusterName>", value = "hostname Regex patterns", description = RegexClusterResolver.PATTERN_PROPERTY_PREFIX_DESC)
// In order for each reporting task instance to have its own static objects such as KafkaNotification.
@RequiresInstanceClassLoading
public class ReportLineageToAtlas extends AbstractReportingTask {
static final PropertyDescriptor ATLAS_URLS = new PropertyDescriptor.Builder()
.name("atlas-urls")
.displayName("Atlas URLs")
.description("Comma separated URL of Atlas Servers" +
" (e.g. http://atlas-server-hostname:21000 or https://atlas-server-hostname:21443)." +
" For accessing Atlas behind Knox gateway, specify Knox gateway URL" +
" (e.g. https://knox-hostname:8443/gateway/{topology-name}/atlas).")
.required(true)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
static final AllowableValue ATLAS_AUTHN_BASIC = new AllowableValue("basic", "Basic", "Use username and password.");
static final AllowableValue ATLAS_AUTHN_KERBEROS = new AllowableValue("kerberos", "Kerberos", "Use Kerberos keytab file.");
static final PropertyDescriptor ATLAS_AUTHN_METHOD = new PropertyDescriptor.Builder()
.name("atlas-authentication-method")
.displayName("Atlas Authentication Method")
.description("Specify how to authenticate this reporting task to Atlas server.")
.required(true)
.allowableValues(ATLAS_AUTHN_BASIC, ATLAS_AUTHN_KERBEROS)
.defaultValue(ATLAS_AUTHN_BASIC.getValue())
.build();
public static final PropertyDescriptor ATLAS_USER = new PropertyDescriptor.Builder()
.name("atlas-username")
.displayName("Atlas Username")
.description("User name to communicate with Atlas.")
.required(false)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
public static final PropertyDescriptor ATLAS_PASSWORD = new PropertyDescriptor.Builder()
.name("atlas-password")
.displayName("Atlas Password")
.description("Password to communicate with Atlas.")
.required(false)
.sensitive(true)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
static final PropertyDescriptor ATLAS_CONF_DIR = new PropertyDescriptor.Builder()
.name("atlas-conf-dir")
.displayName("Atlas Configuration Directory")
.description("Directory path that contains 'atlas-application.properties' file." +
" If not specified and 'Create Atlas Configuration File' is disabled," +
" then, 'atlas-application.properties' file under root classpath is used.")
.required(false)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
public static final PropertyDescriptor ATLAS_NIFI_URL = new PropertyDescriptor.Builder()
.name("atlas-nifi-url")
.displayName("NiFi URL for Atlas")
.description("NiFi URL is used in Atlas to represent this NiFi cluster (or standalone instance)." +
" It is recommended to use one that can be accessible remotely instead of using 'localhost'.")
.required(true)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.URL_VALIDATOR)
.build();
public static final PropertyDescriptor ATLAS_DEFAULT_CLUSTER_NAME = new PropertyDescriptor.Builder()
.name("atlas-default-cluster-name")
.displayName("Atlas Default Cluster Name")
.description("Cluster name for Atlas entities reported by this ReportingTask." +
" If not specified, 'atlas.cluster.name' in Atlas Configuration File is used." +
" Cluster name mappings can be configured by user defined properties." +
" See additional detail for detail.")
.required(false)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
static final PropertyDescriptor ATLAS_CONF_CREATE = new PropertyDescriptor.Builder()
.name("atlas-conf-create")
.displayName("Create Atlas Configuration File")
.description("If enabled, 'atlas-application.properties' file will be created in 'Atlas Configuration Directory'" +
" automatically when this Reporting Task starts." +
" Note that the existing configuration file will be overwritten.")
.required(true)
.expressionLanguageSupported(false)
.allowableValues("true", "false")
.defaultValue("false")
.build();
static final PropertyDescriptor SSL_CONTEXT_SERVICE = new PropertyDescriptor.Builder()
.name("ssl-context-service")
.displayName("SSL Context Service")
.description("Specifies the SSL Context Service to use for communicating with Atlas and Kafka.")
.required(false)
.identifiesControllerService(SSLContextService.class)
.build();
static final PropertyDescriptor KAFKA_BOOTSTRAP_SERVERS = new PropertyDescriptor.Builder()
.name("kafka-bootstrap-servers")
.displayName("Kafka Bootstrap Servers")
.description("Kafka Bootstrap Servers to send Atlas hook notification messages based on NiFi provenance events." +
" E.g. 'localhost:9092'" +
" NOTE: Once this reporting task has started, restarting NiFi is required to changed this property" +
" as Atlas library holds a unmodifiable static reference to Kafka client.")
.required(false)
.expressionLanguageSupported(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.build();
static final AllowableValue SEC_PLAINTEXT = new AllowableValue("PLAINTEXT", "PLAINTEXT", "PLAINTEXT");
static final AllowableValue SEC_SSL = new AllowableValue("SSL", "SSL", "SSL");
static final AllowableValue SEC_SASL_PLAINTEXT = new AllowableValue("SASL_PLAINTEXT", "SASL_PLAINTEXT", "SASL_PLAINTEXT");
static final AllowableValue SEC_SASL_SSL = new AllowableValue("SASL_SSL", "SASL_SSL", "SASL_SSL");
static final PropertyDescriptor KAFKA_SECURITY_PROTOCOL = new PropertyDescriptor.Builder()
.name("kafka-security-protocol")
.displayName("Kafka Security Protocol")
.description("Protocol used to communicate with Kafka brokers to send Atlas hook notification messages." +
" Corresponds to Kafka's 'security.protocol' property.")
.required(true)
.expressionLanguageSupported(false)
.allowableValues(SEC_PLAINTEXT, SEC_SSL, SEC_SASL_PLAINTEXT, SEC_SASL_SSL)
.defaultValue(SEC_PLAINTEXT.getValue())
.build();
public static final PropertyDescriptor NIFI_KERBEROS_PRINCIPAL = new PropertyDescriptor.Builder()
.name("nifi-kerberos-principal")
.displayName("NiFi Kerberos Principal")
.description("The Kerberos principal for this NiFi instance to access Atlas API and Kafka brokers." +
" If not set, it is expected to set a JAAS configuration file in the JVM properties defined in the bootstrap.conf file." +
" This principal will be set into 'sasl.jaas.config' Kafka's property.")
.required(false)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(true)
.build();
public static final PropertyDescriptor NIFI_KERBEROS_KEYTAB = new PropertyDescriptor.Builder()
.name("nifi-kerberos-keytab")
.displayName("NiFi Kerberos Keytab")
.description("The Kerberos keytab for this NiFi instance to access Atlas API and Kafka brokers." +
" If not set, it is expected to set a JAAS configuration file in the JVM properties defined in the bootstrap.conf file." +
" This principal will be set into 'sasl.jaas.config' Kafka's property.")
.required(false)
.addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
.expressionLanguageSupported(true)
.build();
static final PropertyDescriptor KAFKA_KERBEROS_SERVICE_NAME = new PropertyDescriptor.Builder()
.name("kafka-kerberos-service-name-kafka")
.displayName("Kafka Kerberos Service Name")
.description("The Kerberos principal name that Kafka runs for Atlas notification." +
" This can be defined either in Kafka's JAAS config or in Kafka's config." +
" Corresponds to Kafka's 'security.protocol' property." +
" It is ignored unless one of the SASL options of the <Security Protocol> are selected.")
.required(false)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.expressionLanguageSupported(true)
.defaultValue("kafka")
.build();
static final AllowableValue LINEAGE_STRATEGY_SIMPLE_PATH = new AllowableValue("SimplePath", "Simple Path",
"Map NiFi provenance events and target Atlas DataSets to statically created 'nifi_flow_path' Atlas Processes." +
" See also 'Additional Details'.");
static final AllowableValue LINEAGE_STRATEGY_COMPLETE_PATH = new AllowableValue("CompletePath", "Complete Path",
"Create separate 'nifi_flow_path' Atlas Processes for each distinct input and output DataSet combinations" +
" by looking at the complete route for a given FlowFile. See also 'Additional Details.");
static final PropertyDescriptor NIFI_LINEAGE_STRATEGY = new PropertyDescriptor.Builder()
.name("nifi-lineage-strategy")
.displayName("NiFi Lineage Strategy")
.description("Specifies granularity on how NiFi data flow should be reported to Atlas.")
.required(true)
.allowableValues(LINEAGE_STRATEGY_SIMPLE_PATH, LINEAGE_STRATEGY_COMPLETE_PATH)
.defaultValue(LINEAGE_STRATEGY_SIMPLE_PATH.getValue())
.build();
private static final String ATLAS_PROPERTIES_FILENAME = "atlas-application.properties";
private static final String ATLAS_PROPERTY_CLUSTER_NAME = "atlas.cluster.name";
private static final String ATLAS_PROPERTY_ENABLE_TLS = "atlas.enableTLS";
private static final String ATLAS_KAFKA_PREFIX = "atlas.kafka.";
private static final String ATLAS_PROPERTY_KAFKA_BOOTSTRAP_SERVERS = ATLAS_KAFKA_PREFIX + "bootstrap.servers";
private static final String ATLAS_PROPERTY_KAFKA_CLIENT_ID = ATLAS_KAFKA_PREFIX + ProducerConfig.CLIENT_ID_CONFIG;
private final ServiceLoader<ClusterResolver> clusterResolverLoader = ServiceLoader.load(ClusterResolver.class);
private volatile NiFiAtlasClient atlasClient;
private volatile Properties atlasProperties;
private volatile boolean isTypeDefCreated = false;
private volatile String defaultClusterName;
private volatile ProvenanceEventConsumer consumer;
private volatile ClusterResolvers clusterResolvers;
private volatile NiFiAtlasHook nifiAtlasHook;
private volatile LineageStrategy lineageStrategy;
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(ATLAS_URLS);
properties.add(ATLAS_AUTHN_METHOD);
properties.add(ATLAS_USER);
properties.add(ATLAS_PASSWORD);
properties.add(ATLAS_CONF_DIR);
properties.add(ATLAS_NIFI_URL);
properties.add(ATLAS_DEFAULT_CLUSTER_NAME);
properties.add(NIFI_LINEAGE_STRATEGY);
properties.add(PROVENANCE_START_POSITION);
properties.add(PROVENANCE_BATCH_SIZE);
properties.add(SSL_CONTEXT_SERVICE);
// Following properties are required if ATLAS_CONF_CREATE is enabled.
// Otherwise should be left blank.
properties.add(ATLAS_CONF_CREATE);
properties.add(NIFI_KERBEROS_PRINCIPAL);
properties.add(NIFI_KERBEROS_KEYTAB);
properties.add(KAFKA_KERBEROS_SERVICE_NAME);
properties.add(KAFKA_BOOTSTRAP_SERVERS);
properties.add(KAFKA_SECURITY_PROTOCOL);
return properties;
}
@Override
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) {
for (ClusterResolver resolver : clusterResolverLoader) {
final PropertyDescriptor propertyDescriptor = resolver.getSupportedDynamicPropertyDescriptor(propertyDescriptorName);
if(propertyDescriptor != null) {
return propertyDescriptor;
}
}
return null;
}
private void parseAtlasUrls(final PropertyValue atlasUrlsProp, final Consumer<String> urlStrConsumer) {
final String atlasUrlsStr = atlasUrlsProp.evaluateAttributeExpressions().getValue();
if (atlasUrlsStr != null && !atlasUrlsStr.isEmpty()) {
Arrays.stream(atlasUrlsStr.split(","))
.map(String::trim)
.forEach(urlStrConsumer);
}
}
@Override
protected Collection<ValidationResult> customValidate(ValidationContext context) {
final Collection<ValidationResult> results = new ArrayList<>();
final boolean isSSLContextServiceSet = context.getProperty(SSL_CONTEXT_SERVICE).isSet();
final ValidationResult.Builder invalidSSLService = new ValidationResult.Builder()
.subject(SSL_CONTEXT_SERVICE.getDisplayName()).valid(false);
parseAtlasUrls(context.getProperty(ATLAS_URLS), input -> {
final ValidationResult.Builder builder = new ValidationResult.Builder().subject(ATLAS_URLS.getDisplayName()).input(input);
try {
final URL url = new URL(input);
if ("https".equalsIgnoreCase(url.getProtocol()) && !isSSLContextServiceSet) {
results.add(invalidSSLService.explanation("required by HTTPS Atlas access").build());
} else {
results.add(builder.explanation("Valid URI").valid(true).build());
}
} catch (Exception e) {
results.add(builder.explanation("Contains invalid URI: " + e).valid(false).build());
}
});
final String atlasAuthNMethod = context.getProperty(ATLAS_AUTHN_METHOD).getValue();
final AtlasAuthN atlasAuthN = getAtlasAuthN(atlasAuthNMethod);
results.addAll(atlasAuthN.validate(context));
clusterResolverLoader.forEach(resolver -> results.addAll(resolver.validate(context)));
if (context.getProperty(ATLAS_CONF_CREATE).asBoolean()) {
Stream.of(ATLAS_CONF_DIR, ATLAS_DEFAULT_CLUSTER_NAME, KAFKA_BOOTSTRAP_SERVERS)
.filter(p -> !context.getProperty(p).isSet())
.forEach(p -> results.add(new ValidationResult.Builder()
.subject(p.getDisplayName())
.explanation("required to create Atlas configuration file.")
.valid(false).build()));
validateKafkaProperties(context, results, isSSLContextServiceSet, invalidSSLService);
}
return results;
}
private void validateKafkaProperties(ValidationContext context, Collection<ValidationResult> results, boolean isSSLContextServiceSet, ValidationResult.Builder invalidSSLService) {
final String kafkaSecurityProtocol = context.getProperty(KAFKA_SECURITY_PROTOCOL).getValue();
if ((SEC_SSL.equals(kafkaSecurityProtocol) || SEC_SASL_SSL.equals(kafkaSecurityProtocol))
&& !isSSLContextServiceSet) {
results.add(invalidSSLService.explanation("required by SSL Kafka connection").build());
}
if (SEC_SASL_PLAINTEXT.equals(kafkaSecurityProtocol) || SEC_SASL_SSL.equals(kafkaSecurityProtocol)) {
Stream.of(NIFI_KERBEROS_PRINCIPAL, NIFI_KERBEROS_KEYTAB, KAFKA_KERBEROS_SERVICE_NAME)
.filter(p -> !context.getProperty(p).isSet())
.forEach(p -> results.add(new ValidationResult.Builder()
.subject(p.getDisplayName())
.explanation("required by Kafka SASL authentication.")
.valid(false).build()));
}
}
@OnScheduled
public void setup(ConfigurationContext context) throws IOException {
// initAtlasClient has to be done first as it loads AtlasProperty.
initAtlasClient(context);
initLineageStrategy(context);
initClusterResolvers(context);
}
private void initLineageStrategy(ConfigurationContext context) throws IOException {
nifiAtlasHook = new NiFiAtlasHook(atlasClient);
final String strategy = context.getProperty(NIFI_LINEAGE_STRATEGY).getValue();
if (LINEAGE_STRATEGY_SIMPLE_PATH.equals(strategy)) {
lineageStrategy = new SimpleFlowPathLineage();
} else if (LINEAGE_STRATEGY_COMPLETE_PATH.equals(strategy)) {
lineageStrategy = new CompleteFlowPathLineage();
}
lineageStrategy.setLineageContext(nifiAtlasHook);
initProvenanceConsumer(context);
}
private void initClusterResolvers(ConfigurationContext context) {
final Set<ClusterResolver> loadedClusterResolvers = new LinkedHashSet<>();
clusterResolverLoader.forEach(resolver -> {
resolver.configure(context);
loadedClusterResolvers.add(resolver);
});
clusterResolvers = new ClusterResolvers(Collections.unmodifiableSet(loadedClusterResolvers), defaultClusterName);
}
private void initAtlasClient(ConfigurationContext context) throws IOException {
List<String> urls = new ArrayList<>();
parseAtlasUrls(context.getProperty(ATLAS_URLS), urls::add);
final boolean isAtlasApiSecure = urls.stream().anyMatch(url -> url.toLowerCase().startsWith("https"));
final String atlasAuthNMethod = context.getProperty(ATLAS_AUTHN_METHOD).getValue();
final String confDirStr = context.getProperty(ATLAS_CONF_DIR).evaluateAttributeExpressions().getValue();
final File confDir = confDirStr != null && !confDirStr.isEmpty() ? new File(confDirStr) : null;
atlasProperties = new Properties();
final File atlasPropertiesFile = new File(confDir, ATLAS_PROPERTIES_FILENAME);
final Boolean createAtlasConf = context.getProperty(ATLAS_CONF_CREATE).asBoolean();
if (!createAtlasConf) {
// Load existing properties file.
if (atlasPropertiesFile.isFile()) {
getLogger().info("Loading {}", new Object[]{atlasPropertiesFile});
try (InputStream in = new FileInputStream(atlasPropertiesFile)) {
atlasProperties.load(in);
}
} else {
final String fileInClasspath = "/" + ATLAS_PROPERTIES_FILENAME;
try (InputStream in = ReportLineageToAtlas.class.getResourceAsStream(fileInClasspath)) {
getLogger().info("Loading {} from classpath", new Object[]{fileInClasspath});
if (in == null) {
throw new ProcessException(String.format("Could not find %s in classpath." +
" Please add it to classpath," +
" or specify %s a directory containing Atlas properties file," +
" or enable %s to generate it.",
fileInClasspath, ATLAS_CONF_DIR.getDisplayName(), ATLAS_CONF_CREATE.getDisplayName()));
}
atlasProperties.load(in);
}
}
}
// Resolve default cluster name.
defaultClusterName = context.getProperty(ATLAS_DEFAULT_CLUSTER_NAME).evaluateAttributeExpressions().getValue();
if (defaultClusterName == null || defaultClusterName.isEmpty()) {
// If default cluster name is not specified by processor configuration, then load it from Atlas config.
defaultClusterName = atlasProperties.getProperty(ATLAS_PROPERTY_CLUSTER_NAME);
}
// If default cluster name is still not defined, processor should not be able to start.
if (defaultClusterName == null || defaultClusterName.isEmpty()) {
throw new ProcessException("Default cluster name is not defined.");
}
final AtlasAuthN atlasAuthN = getAtlasAuthN(atlasAuthNMethod);
atlasAuthN.configure(context);
// Create Atlas configuration file if necessary.
if (createAtlasConf) {
atlasProperties.put(ATLAS_PROPERTY_CLUSTER_NAME, defaultClusterName);
atlasProperties.put(ATLAS_PROPERTY_ENABLE_TLS, String.valueOf(isAtlasApiSecure));
setKafkaConfig(atlasProperties, context);
atlasAuthN.populateProperties(atlasProperties);
try (FileOutputStream fos = new FileOutputStream(atlasPropertiesFile)) {
String ts = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSX")
.withZone(ZoneOffset.UTC)
.format(Instant.now());
atlasProperties.store(fos, "Generated by Apache NiFi ReportLineageToAtlas ReportingTask at " + ts);
}
}
atlasClient = NiFiAtlasClient.getInstance();
try {
atlasClient.initialize(urls.toArray(new String[]{}), atlasAuthN, confDir);
} catch (final NullPointerException e) {
throw new ProcessException(String.format("Failed to initialize Atlas client due to %s." +
" Make sure 'atlas-application.properties' is in the directory specified with %s" +
" or under root classpath if not specified.", e, ATLAS_CONF_DIR.getDisplayName()), e);
}
}
private AtlasAuthN getAtlasAuthN(String atlasAuthNMethod) {
final AtlasAuthN atlasAuthN;
switch (atlasAuthNMethod) {
case "basic" :
atlasAuthN = new Basic();
break;
case "kerberos" :
atlasAuthN = new Kerberos();
break;
default:
throw new IllegalArgumentException(atlasAuthNMethod + " is not supported as an Atlas authentication method.");
}
return atlasAuthN;
}
private void initProvenanceConsumer(final ConfigurationContext context) throws IOException {
consumer = new ProvenanceEventConsumer();
consumer.setStartPositionValue(context.getProperty(PROVENANCE_START_POSITION).getValue());
consumer.setBatchSize(context.getProperty(PROVENANCE_BATCH_SIZE).asInteger());
consumer.addTargetEventType(lineageStrategy.getTargetEventTypes());
consumer.setLogger(getLogger());
consumer.setScheduled(true);
}
@OnUnscheduled
public void onUnscheduled() {
if (consumer != null) {
consumer.setScheduled(false);
}
if (nifiAtlasHook != null) {
nifiAtlasHook.close();
nifiAtlasHook = null;
}
}
@Override
public void onTrigger(ReportingContext context) {
final String clusterNodeId = context.getClusterNodeIdentifier();
final boolean isClustered = context.isClustered();
if (isClustered && isEmpty(clusterNodeId)) {
// Clustered, but this node's ID is unknown. Not ready for processing yet.
return;
}
// If standalone or being primary node in a NiFi cluster, this node is responsible for doing primary tasks.
final boolean isResponsibleForPrimaryTasks = !isClustered || getNodeTypeProvider().isPrimary();
// Create Entity defs in Atlas if there's none yet.
if (!isTypeDefCreated) {
try {
if (isResponsibleForPrimaryTasks) {
// Create NiFi type definitions in Atlas type system.
atlasClient.registerNiFiTypeDefs(false);
} else {
// Otherwise, just check existence of NiFi type definitions.
if (!atlasClient.isNiFiTypeDefsRegistered()) {
getLogger().debug("NiFi type definitions are not ready in Atlas type system yet.");
return;
}
}
isTypeDefCreated = true;
} catch (AtlasServiceException e) {
throw new RuntimeException("Failed to check and create NiFi flow type definitions in Atlas due to " + e, e);
}
}
// Regardless of whether being a primary task node, each node has to analyse NiFiFlow.
// Assuming each node has the same flow definition, that is guaranteed by NiFi cluster management mechanism.
final NiFiFlow nifiFlow = createNiFiFlow(context);
if (isResponsibleForPrimaryTasks) {
try {
atlasClient.registerNiFiFlow(nifiFlow);
} catch (AtlasServiceException e) {
throw new RuntimeException("Failed to register NiFI flow. " + e, e);
}
}
// NOTE: There is a race condition between the primary node and other nodes.
// If a node notifies an event related to a NiFi component which is not yet created by NiFi primary node,
// then the notification message will fail due to having a reference to a non-existing entity.
consumeNiFiProvenanceEvents(context, nifiFlow);
}
private NiFiFlow createNiFiFlow(ReportingContext context) {
final ProcessGroupStatus rootProcessGroup = context.getEventAccess().getGroupStatus("root");
final String flowName = rootProcessGroup.getName();
final String nifiUrl = context.getProperty(ATLAS_NIFI_URL).evaluateAttributeExpressions().getValue();
final String clusterName;
try {
final String nifiHostName = new URL(nifiUrl).getHost();
clusterName = clusterResolvers.fromHostNames(nifiHostName);
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Failed to parse NiFi URL, " + e.getMessage(), e);
}
NiFiFlow existingNiFiFlow = null;
try {
// Retrieve Existing NiFiFlow from Atlas.
existingNiFiFlow = atlasClient.fetchNiFiFlow(rootProcessGroup.getId(), clusterName);
} catch (AtlasServiceException e) {
if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())){
getLogger().debug("Existing flow was not found for {}@{}", new Object[]{rootProcessGroup.getId(), clusterName});
} else {
throw new RuntimeException("Failed to fetch existing NiFI flow. " + e, e);
}
}
final NiFiFlow nifiFlow = existingNiFiFlow != null ? existingNiFiFlow : new NiFiFlow(rootProcessGroup.getId());
nifiFlow.setFlowName(flowName);
nifiFlow.setUrl(nifiUrl);
nifiFlow.setClusterName(clusterName);
final NiFiFlowAnalyzer flowAnalyzer = new NiFiFlowAnalyzer();
flowAnalyzer.analyzeProcessGroup(nifiFlow, rootProcessGroup);
flowAnalyzer.analyzePaths(nifiFlow);
return nifiFlow;
}
private void consumeNiFiProvenanceEvents(ReportingContext context, NiFiFlow nifiFlow) {
final EventAccess eventAccess = context.getEventAccess();
final AnalysisContext analysisContext = new StandardAnalysisContext(nifiFlow, clusterResolvers,
// FIXME: This class cast shouldn't be necessary to query lineage. Possible refactor target in next major update.
(ProvenanceRepository)eventAccess.getProvenanceRepository());
consumer.consumeEvents(eventAccess, context.getStateManager(), events -> {
for (ProvenanceEventRecord event : events) {
try {
lineageStrategy.processEvent(analysisContext, nifiFlow, event);
} catch (Exception e) {
// If something went wrong, log it and continue with other records.
getLogger().error("Skipping failed analyzing event {} due to {}.", new Object[]{event, e, e});
}
}
nifiAtlasHook.commitMessages();
});
}
private void setKafkaConfig(Map<Object, Object> mapToPopulate, PropertyContext context) {
final String kafkaBootStrapServers = context.getProperty(KAFKA_BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
mapToPopulate.put(ATLAS_PROPERTY_KAFKA_BOOTSTRAP_SERVERS, kafkaBootStrapServers);
mapToPopulate.put(ATLAS_PROPERTY_KAFKA_CLIENT_ID, String.format("%s.%s", getName(), getIdentifier()));
final String kafkaSecurityProtocol = context.getProperty(KAFKA_SECURITY_PROTOCOL).getValue();
mapToPopulate.put(ATLAS_KAFKA_PREFIX + "security.protocol", kafkaSecurityProtocol);
// Translate SSLContext Service configuration into Kafka properties
final SSLContextService sslContextService = context.getProperty(SSL_CONTEXT_SERVICE).asControllerService(SSLContextService.class);
if (sslContextService != null && sslContextService.isKeyStoreConfigured()) {
mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, sslContextService.getKeyStoreFile());
mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG, sslContextService.getKeyStorePassword());
final String keyPass = sslContextService.getKeyPassword() == null ? sslContextService.getKeyStorePassword() : sslContextService.getKeyPassword();
mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEY_PASSWORD_CONFIG, keyPass);
mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, sslContextService.getKeyStoreType());
}
if (sslContextService != null && sslContextService.isTrustStoreConfigured()) {
mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, sslContextService.getTrustStoreFile());
mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, sslContextService.getTrustStorePassword());
mapToPopulate.put(ATLAS_KAFKA_PREFIX + SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, sslContextService.getTrustStoreType());
}
if (SEC_SASL_PLAINTEXT.equals(kafkaSecurityProtocol) || SEC_SASL_SSL.equals(kafkaSecurityProtocol)) {
setKafkaJaasConfig(mapToPopulate, context);
}
}
/**
* Populate Kafka JAAS properties for Atlas notification.
* Since Atlas 0.8.1 uses Kafka client 0.10.0.0, we can not use 'sasl.jaas.config' property
* as it is available since 0.10.2, implemented by KAFKA-4259.
* Instead, this method uses old property names.
* @param mapToPopulate Map of configuration properties
* @param context Context
*/
private void setKafkaJaasConfig(Map<Object, Object> mapToPopulate, PropertyContext context) {
String keytab = context.getProperty(NIFI_KERBEROS_KEYTAB).evaluateAttributeExpressions().getValue();
String principal = context.getProperty(NIFI_KERBEROS_PRINCIPAL).evaluateAttributeExpressions().getValue();
String serviceName = context.getProperty(KAFKA_KERBEROS_SERVICE_NAME).evaluateAttributeExpressions().getValue();
if(StringUtils.isNotBlank(keytab) && StringUtils.isNotBlank(principal) && StringUtils.isNotBlank(serviceName)) {
mapToPopulate.put("atlas.jaas.KafkaClient.loginModuleControlFlag", "required");
mapToPopulate.put("atlas.jaas.KafkaClient.loginModuleName", "com.sun.security.auth.module.Krb5LoginModule");
mapToPopulate.put("atlas.jaas.KafkaClient.option.keyTab", keytab);
mapToPopulate.put("atlas.jaas.KafkaClient.option.principal", principal);
mapToPopulate.put("atlas.jaas.KafkaClient.option.serviceName", serviceName);
mapToPopulate.put("atlas.jaas.KafkaClient.option.storeKey", "True");
mapToPopulate.put("atlas.jaas.KafkaClient.option.useKeyTab", "True");
mapToPopulate.put("atlas.jaas.ticketBased-KafkaClient.loginModuleControlFlag", "required");
mapToPopulate.put("atlas.jaas.ticketBased-KafkaClient.loginModuleName", "com.sun.security.auth.module.Krb5LoginModule");
mapToPopulate.put("atlas.jaas.ticketBased-KafkaClient.option.useTicketCache", "true");
mapToPopulate.put(ATLAS_KAFKA_PREFIX + "sasl.kerberos.service.name", serviceName);
}
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.resolver;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.context.PropertyContext;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
public interface ClusterResolver {
default Collection<ValidationResult> validate(final ValidationContext validationContext) {
return Collections.emptySet();
}
PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName);
/**
* Implementation should clear previous configurations when this method is called again.
* @param context passed from ReportingTask
*/
void configure(PropertyContext context);
/**
* Resolve a cluster name from a list of host names or an ip addresses.
* @param hostNames hostname or ip address
* @return resolved cluster name or null
*/
default String fromHostNames(String ... hostNames) {
return null;
}
/**
* Resolve a cluster name from hints, such as Zookeeper Quorum, client port and znode path
* @param hints Contains variables to resolve a cluster name
* @return resolved cluster name or null
*/
default String fromHints(Map<String, String> hints) {
return null;
}
}

View File

@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.resolver;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.context.PropertyContext;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
public class ClusterResolvers implements ClusterResolver {
private final Set<ClusterResolver> resolvers;
private final String defaultClusterName;
public ClusterResolvers(Set<ClusterResolver> resolvers, String defaultClusterName) {
this.resolvers = resolvers;
this.defaultClusterName = defaultClusterName;
}
@Override
public PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) {
for (ClusterResolver resolver : resolvers) {
final PropertyDescriptor descriptor = resolver.getSupportedDynamicPropertyDescriptor(propertyDescriptorName);
if (descriptor != null) {
return descriptor;
}
}
return null;
}
@Override
public Collection<ValidationResult> validate(ValidationContext validationContext) {
Collection<ValidationResult> results = new ArrayList<>();
for (ClusterResolver resolver : resolvers) {
results.addAll(resolver.validate(validationContext));
}
return results;
}
@Override
public void configure(PropertyContext context) {
for (ClusterResolver resolver : resolvers) {
resolver.configure(context);
}
}
@Override
public String fromHostNames(String ... hostNames) {
for (ClusterResolver resolver : resolvers) {
final String clusterName = resolver.fromHostNames(hostNames);
if (clusterName != null && !clusterName.isEmpty()) {
return clusterName;
}
}
return defaultClusterName;
}
@Override
public String fromHints(Map<String, String> hints) {
for (ClusterResolver resolver : resolvers) {
final String clusterName = resolver.fromHints(hints);
if (clusterName != null && !clusterName.isEmpty()) {
return clusterName;
}
}
return defaultClusterName;
}
}

View File

@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.resolver;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.processor.util.StandardValidators;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class RegexClusterResolver implements ClusterResolver {
public static final String PATTERN_PROPERTY_PREFIX = "hostnamePattern.";
public static final String PATTERN_PROPERTY_PREFIX_DESC = "White space delimited (including new line) Regular Expressions" +
" to resolve a 'Cluster Name' from a hostname or IP address of a transit URI of NiFi provenance record.";
private Map<String, Set<Pattern>> clusterNamePatterns;
@Override
public PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) {
if (propertyDescriptorName.startsWith(PATTERN_PROPERTY_PREFIX)) {
return new PropertyDescriptor
.Builder().name(propertyDescriptorName)
.description(PATTERN_PROPERTY_PREFIX_DESC)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.dynamic(true)
.sensitive(false)
.build();
}
return null;
}
@Override
public Collection<ValidationResult> validate(ValidationContext validationContext) {
final List<ValidationResult> validationResults = new ArrayList<>();
consumeConfigurations(validationContext.getAllProperties(),
(clusterNamePatterns, patterns) -> {},
(entry, e) -> {
final ValidationResult result = new ValidationResult.Builder()
.subject(entry.getKey())
.input(entry.getValue())
.explanation(e.getMessage())
.valid(false)
.build();
validationResults.add(result);
});
return validationResults;
}
@Override
public void configure(PropertyContext context) {
clusterNamePatterns = new HashMap<>();
consumeConfigurations(context.getAllProperties(),
(clusterName, patterns) -> clusterNamePatterns.put(clusterName, patterns),
null);
}
private void consumeConfigurations(final Map<String, String> allProperties,
final BiConsumer<String, Set<Pattern>> consumer,
final BiConsumer<Map.Entry<String, String>, RuntimeException> errorHandler) {
allProperties.entrySet().stream()
.filter(entry -> entry.getKey().startsWith(PATTERN_PROPERTY_PREFIX))
.forEach(entry -> {
final String clusterName;
final Set<Pattern> patterns;
try {
clusterName = entry.getKey().substring(PATTERN_PROPERTY_PREFIX.length());
final String[] regexsArray = entry.getValue().split("\\s");
final List<String> regexs = Arrays.stream(regexsArray)
.map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
patterns = parseClusterNamePatterns(clusterName, regexs);
consumer.accept(clusterName, patterns);
} catch (RuntimeException e) {
if (errorHandler != null) {
errorHandler.accept(entry, e);
} else {
throw e;
}
}
});
}
private Set<Pattern> parseClusterNamePatterns(final String clusterName, List<String> regexs) {
if (clusterName == null || clusterName.isEmpty()) {
throw new IllegalArgumentException("Empty cluster name is not allowed.");
}
if (regexs.size() == 0) {
throw new IllegalArgumentException(
String.format("At least one cluster name pattern is required, [%s].", clusterName));
}
return regexs.stream().map(Pattern::compile).collect(Collectors.toSet());
}
@Override
public String fromHostNames(String ... hostNames) {
for (Map.Entry<String, Set<Pattern>> entry : clusterNamePatterns.entrySet()) {
for (Pattern pattern : entry.getValue()) {
for (String hostname : hostNames) {
if (pattern.matcher(hostname).matches()) {
return entry.getKey();
}
}
}
}
return null;
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.security;
import org.apache.atlas.AtlasClientV2;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.context.PropertyContext;
import java.util.Collection;
import java.util.Optional;
import java.util.Properties;
public interface AtlasAuthN {
AtlasClientV2 createClient(final String[] baseUrls);
Collection<ValidationResult> validate(final ValidationContext context);
void configure(final PropertyContext context);
/**
* Populate required Atlas application properties.
* This method is called when Atlas reporting task generates atlas-application.properties.
*/
default void populateProperties(final Properties properties){};
default Optional<ValidationResult> validateRequiredField(ValidationContext context, PropertyDescriptor prop) {
if (!context.getProperty(prop).isSet()) {
return Optional.of(new ValidationResult.Builder()
.subject(prop.getDisplayName())
.valid(false)
.explanation(String.format("required by '%s' auth.", this.getClass().getSimpleName()))
.build());
}
return Optional.empty();
}
}

View File

@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.security;
import org.apache.atlas.AtlasClientV2;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.util.StringUtils;
import java.util.Collection;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.ATLAS_PASSWORD;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.ATLAS_USER;
public class Basic implements AtlasAuthN {
private String user;
private String password;
@Override
public Collection<ValidationResult> validate(ValidationContext context) {
return Stream.of(
validateRequiredField(context, ATLAS_USER),
validateRequiredField(context, ATLAS_PASSWORD)
).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
}
@Override
public void configure(PropertyContext context) {
user = context.getProperty(ATLAS_USER).evaluateAttributeExpressions().getValue();
password = context.getProperty(ATLAS_PASSWORD).evaluateAttributeExpressions().getValue();
if (StringUtils.isEmpty(user)) {
throw new IllegalArgumentException("User is required for basic auth.");
}
if (StringUtils.isEmpty(password)){
throw new IllegalArgumentException("Password is required for basic auth.");
}
}
@Override
public AtlasClientV2 createClient(String[] baseUrls) {
return new AtlasClientV2(baseUrls, new String[]{user, password});
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.security;
import org.apache.atlas.AtlasClientV2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.util.StringUtils;
import java.io.IOException;
import java.util.Collection;
import java.util.Optional;
import java.util.Properties;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.NIFI_KERBEROS_KEYTAB;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.NIFI_KERBEROS_PRINCIPAL;
public class Kerberos implements AtlasAuthN {
private String principal;
private String keytab;
@Override
public Collection<ValidationResult> validate(ValidationContext context) {
return Stream.of(
validateRequiredField(context, NIFI_KERBEROS_PRINCIPAL),
validateRequiredField(context, NIFI_KERBEROS_KEYTAB)
).filter(Optional::isPresent).map(Optional::get).collect(Collectors.toList());
}
@Override
public void populateProperties(Properties properties) {
properties.put("atlas.authentication.method.kerberos", "true");
}
@Override
public void configure(PropertyContext context) {
principal = context.getProperty(NIFI_KERBEROS_PRINCIPAL).evaluateAttributeExpressions().getValue();
keytab = context.getProperty(NIFI_KERBEROS_KEYTAB).evaluateAttributeExpressions().getValue();
if (StringUtils.isEmpty(principal)) {
throw new IllegalArgumentException("Principal is required for Kerberos auth.");
}
if (StringUtils.isEmpty(keytab)){
throw new IllegalArgumentException("Keytab is required for Kerberos auth.");
}
}
@Override
public AtlasClientV2 createClient(String[] baseUrls) {
final Configuration hadoopConf = new Configuration();
hadoopConf.set("hadoop.security.authentication", "kerberos");
UserGroupInformation.setConfiguration(hadoopConf);
final UserGroupInformation ugi;
try {
ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab);
} catch (IOException e) {
throw new RuntimeException("Failed to login with Kerberos due to: " + e, e);
}
return new AtlasClientV2(ugi, null, baseUrls);
}
}

View File

@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# By component type:
org.apache.nifi.atlas.provenance.analyzer.NiFiRemotePort
org.apache.nifi.atlas.provenance.analyzer.NiFiRootGroupPort
org.apache.nifi.atlas.provenance.analyzer.KafkaTopic
org.apache.nifi.atlas.provenance.analyzer.PutHiveStreaming
# By transit URI:
org.apache.nifi.atlas.provenance.analyzer.Hive2JDBC
org.apache.nifi.atlas.provenance.analyzer.HDFSPath
org.apache.nifi.atlas.provenance.analyzer.HBaseTable
org.apache.nifi.atlas.provenance.analyzer.FilePath
# By event type, if none of above analyzers matches
org.apache.nifi.atlas.provenance.analyzer.unknown.Create
org.apache.nifi.atlas.provenance.analyzer.unknown.Receive
org.apache.nifi.atlas.provenance.analyzer.unknown.Fetch
org.apache.nifi.atlas.provenance.analyzer.unknown.Send
org.apache.nifi.atlas.provenance.analyzer.unknown.RemoteInvocation

View File

@ -0,0 +1,15 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.atlas.resolver.RegexClusterResolver

View File

@ -0,0 +1,15 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.nifi.atlas.reporting.ReportLineageToAtlas

View File

@ -0,0 +1,538 @@
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<head>
<meta charset="utf-8" />
<title>ReportLineageToAtlas</title>
<link rel="stylesheet" href="/nifi-docs/css/component-usage.css" type="text/css" />
</head>
<body>
<h2>ReportLineageToAtlas</h2>
Table of contents:
<ul>
<li><a href="#how-it-works">Information reported to Atlas</a></li>
<li><a href="#nifi-atlas-types">NiFi Atlas Types</a></li>
<li><a href="#cluster-name">Cluster Name Resolution</a></li>
<li><a href="#nifi-flow-structure">NiFi flow structure</a>
<ul>
<li><a href="#path-separation">Path Separation Logic</a></li>
</ul>
</li>
<li><a href="#nifi-data-lineage">NiFi data lineage</a>
<ul>
<li><a href="#lineage-strategy">NiFi Lineage Strategy</a></li>
<li><a href="#provenance-events">NiFi Provenance Event Analysis</a></li>
<li><a href="#datasets-and-processors">Supported DataSets and Processors</a></li>
</ul>
</li>
<li><a href="#runs-in-cluster">How it runs in NiFi cluster</a></li>
<li><a href="#limitations">Limitations</a></li>
<li><a href="#atlas-configs">Atlas Server Configurations</a></li>
<li><a href="#atlas-emulator">Atlas Server Emulator</a></li>
</ul>
<h3 id="how-it-works">Information reported to Atlas</h3>
<p>This reporting task stores two types of NiFi flow information, 'NiFi flow structure' and 'NiFi data lineage'.</p>
<p>'NiFi flow structure' tells what components are running within a NiFi flow and how these are connected. It is reported by analyzing current NiFi flow structure, specifically NiFi component relationships.</p>
<p>'NiFi data lineage' tells what part of NiFi flow interacts with different DataSets such as HDFS files or Hive tables ... etc. It is reported by analyzing NiFi provenance events.</p>
<object data="nifi_atlas.svg" type="image/svg+xml" width="60%"></object>
<p>Technically each information is sent using different protocol, Atlas REST API v2, and Notification via a Kafka topic as shown in above image.</p>
<p>As both information types use the same <a href="#nifi-atlas-types">NiFi Atlas Types</a> and <a href="#cluster-name">Cluster Name Resolution</a> concepts, it is recommended to start reading those sections first.</p>
<h3 id="nifi-atlas-types">NiFi Atlas Types</h3>
<p>This reporting task creates following NiFi specific types in Atlas Type system when it runs if these type definitions are not found.</p>
<p>Green boxes represent sub-types of DataSet and blue ones are sub-types of Process. Gray lines represent entity ownership.
Red lines represent lineage.</p>
<object data="nifi_types.svg" type="image/svg+xml" width="60%"></object>
<ul>
<li>nifi_flow
<p>Represents a NiFI data flow.</p>
<p>As shown in the above diagram, nifi_flow owns other nifi_component types.
This owning relationship is defined by Atlas 'owned' constraint so that when a 'nifi_flow' entity is removed, all owned NiFi component entities are removed in cascading manner.</p>
<p>When this reporting task runs, it analyzes and traverse the entire flow structure, and create NiFi component entities in Atlas.
At later runs, it compares the current flow structure with the one stored in Atlas to figure out if any changes has been made since the last time the flow was reported. The reporting task updates NiFi component entities in Atlas if needed.<p>
<p>NiFi components those are removed from a NiFi flow also get deleted from Atlas.
However those entities can still be seen in Atlas search results or lineage graphs since Atlas uses 'Soft Delete' by default.
See <a href="#delete-handler">Atlas Delete Handler</a> for further detail.</p>
</li>
Attributes:
<ul>
<li>qualifiedName: Root ProcessGroup ID@clusterName (e.g. 86420a14-2fab-3e1e-4331-fb6ab42f58e0@cl1)</li>
<li>name: Name of the Root ProcessGroup.</li>
<li>url: URL of the NiFi instance. This can be specified via reporting task 'NiFi URL for Atlas' property.</li>
</ul>
</ul>
<ul>
<li>nifi_flow_path <p>Part of a NiFi data flow containing one or more processing NiFi components such as Processors and RemoteGroupPorts. The reporting task divides a NiFi flow into multiple flow paths. See <a href="#path-separation">Path Separation Logic</a> for details.</p></li>
Attributes:
<ul>
<li>qualifiedName: The first NiFi component Id in a path@clusterName (e.g. 529e6722-9b49-3b66-9c94-00da9863ca2d@cl1)</li>
<li>name: NiFi component namess within a path are concatenated (e.g. GenerateFlowFile, PutFile, LogAttribute)</li>
<li>url: A deep link to the first NiFi component in corresponding NiFi UI</li>
</ul>
</ul>
<ul>
<li>nifi_input/output_port <p>Represents a RootGroupPort which can be accessed by RemoteProcessGroup via Site-to-Site protocol.</p></li>
Attributes:
<ul>
<li>qualifiedName: Port ID@clusterName (e.g. 3f6d405e-6e3d-38c9-c5af-ce158f8e593d@cl1)</li>
<li>name: Name of the Port.</li>
</ul>
</ul>
<ul>
<li>nifi_data <p>Represents <a href="#unknown-datasets">Unknown DataSets</a> created by CREATE/SEND/RECEIVE NiFi provenance events those do not have particular provenance event analyzer.</p></li>
Attributes:
<ul>
<li>qualifiedName: ID of a Processor which generated the provenance event@clusterName (e.g. db8bb12c-5cd3-3011-c971-579f460ebedf@cl1)</li>
<li>name: Name of the Processor.</li>
</ul>
</ul>
<ul>
<li>nifi_queue <p>A internal DataSet of NiFi flows which connects nifi_flow_paths. Atlas lineage graph requires a DataSet in between Process entities.</p></li>
Attributes:
<ul>
<li>qualifiedName: ID of the first Processor in the destination nifi_flow_path.</li>
<li>name: Name of the Processor.</li>
</ul>
</ul>
<h3 id="cluster-name">Cluster Name Resolution</h3>
<p>An entity in Atlas can be identified by its GUID for any existing objects, or type name and unique attribute can be used if GUID is not known. Qualified name is commonly used as the unique attribute.</p>
<p>Since one Atlas instance can be used to manage multiple environments, i.e clusters, Atlas has to manage objects in different clusters those may have the same name. For example, a Hive table 'request_logs' in a 'cluster-A' and 'cluster-B'. In such case, cluster name embedded in qualified names are crucial.</p>
<p>For these requirements, a qualified name has 'componentId@clusterName' format. E.g. A Hive table qualified name would be dbName.tableName@clusterName (default.request_logs@cluster-A).</p>
<p>From this NiFi reporting task standpoint, a cluster name is need to be resolved at following situations:
<ul>
<li>To register NiFi component entities. Which cluster name should be used to represent the current NiFi cluster?</li>
<li>To create lineages from NiFi component to other DataSets. Which cluster does the DataSet resides?</li>
</ul>
</p>
<p>To answer such questions, ReportLineageToAtlas reporting task provides a way to define mappings from ip address or hostname to a cluster name.
The mapping can be defined by Dynamic Properties with a name in 'hostnamePattern.ClusterName' format, having its value as a set of Regular Expression Patterns to match ip addresses or host names to a particular cluster name.</p>
<p>As an example, following mapping definition would resolve cluster name 'cluster-A' for ip address such as '192.168.30.123' or hostname 'namenode1.a.example.com', and 'cluster-B' for '192.168.40.223' or 'nifi3.b.example.com'.</p>
<pre>
# Dynamic Property Name for cluster-A
hostnamePattern.cluster-A
# Value can have multiple Regular Expression patterns separated by new line
192\.168\.30\.\d+
[^\.]+\.a\.example\.com
# Dynamic Property Name for cluster-B
hostnamePattern.cluster-B
# Values
192\.168\.40\.\d+
[^\.]+\.b\.example\.com
</pre>
<p>If any cluster name mapping does not match, then a name defined at 'Atlas Default Cluster Name' is used.</p>
<h3 id="nifi-flow-structure">NiFi flow structure</h3>
This section describes how a structure of NiFi flow is reported to Atlas.
<h4 id="path-separation">Path Separation Logic</h4>
<p>To provide a meaningful lineage granularity in Atlas, this reporting task divide a NiFi flow into paths.
The logic has following concepts:</p>
<ul>
<li>
<p>Focuses only on Processors and RootGroupPorts. Input / Output ports in child Process Groups, Process Group hierarchy or Funnels do not contribute path separation.</p>
<p>For example, following two flows are identical in path separation logic:</p>
<ul>
<li>
<pre>Root group Input port -> Processor 0 -> Funnel -> Processor 1 -> Input port of a child Process Group -> Processor 2</pre>
</li>
<li>
<pre>Root group Input port -> Processor 0 -> Processor 1 -> Processor 2</pre>
</li>
</ul>
<p>Both flows will be treated as a single path that consists of Root group Input port, Processor 0, 1 and 2.</p>
</li>
<li>
<p>Any Processor with multiple incoming relationship from other Processors is treated like a 'Common
route' or 'Functional route', and is managed as a separate path.</p>
<p>For example, following flow:</p>
<pre>Processor 0 -> Processor 1 -> Processor 2
Processor 3 -> Processor2</pre>
<p>Will produce following paths as result:</p>
<pre>Processor 0, 1
Processor 2
Processor 3</pre>
</li>
<li><p>Self cyclic relationships are ignored.</p></li>
</ul>
<p>Based on these concepts, path separation is done by following steps:</p>
<ol>
<li>Select starting components (Processor and RootGroup InputPort) those do not have any input relationship from other Processors.</li>
<li>For each starting component, create a 'nifi_flow_path'. The same path may already exist if other path arrived here before.</li>
<li>Traverse outgoing relationships.</li>
<li>If any Processor with more than 1 incoming Processor relationships is found, then split the component as new 'nifi_flow_path'. When starting as a new path, a 'nifi-queue' is created. The queue is added to the current path outputs, and the new path inputs. Back to step 2.</li>
<li>Traverse outgoing paths as long as there is one.</li>
</ol>
<h3 id="nifi-data-lineage">NiFi data lineage</h3>
This section describes how NiFi data lineage is reported to Atlas.
<h4 id="lineage-strategy">NiFi Lineage Strategy</h4>
<p>To meet different use-cases, this reporting task provides 'NiFi Lineage Strategy' property to control how to report Atlas the DataSet and Process lineage tracked by NiFi flow.</p>
<p><em>NOTE:</em>It is recommended to try possible options to see which strategy meets your use-case before running the reporting task at a production environment. Different strategies create entities differently, and if multiple strategies are used (or switched from one another), Atlas lineage graph would be noisy. As many entities will be created by this reporting task over time, it might be troublesome to clean entities to change strategy afterward especially Atlas manages data reported by not only NiFi.</p>
<p>In order to test or debug how this reporting task behaves, <a href="#atlas-emulator">Atlas Server Emulator</a> may be useful, instead of sending data to a real Atlas.</p>
<ul>
<li>Simple Path
<p>Maps data I/O provenance events such as SEND/RECEIVE to 'nifi_flow_path' created by <a href="#nifi-flow-structure">NiFi flow structure</a> analysis.</p>
<p>It tracks DataSet lineage at 'nifi_flow_path' process level, instead of event level, to report a simple data lineage graph in Atlas. If different DataSets go through the same 'nifi_flow_path', all of those input DataSets are shown as if it is impacting every output DataSets. For example, if there are A.txt and B.txt processed by the same GetFile processor then eventually ingested to HDFS path-A and path-B respectively by PutHDFS using NiFi Expression Language to decide where to store FlowFiles. Then Atlas lineage graph will show as if both A.txt and B.txt are ingested to HDFS path-A, when you pick path-A to see which DataSets are ingested into it, because both A.txt and B.txt went through the same GetFile and PutHDFS processors.</p>
<p>This strategy generates the least amount of data in Atlas. It might be useful when you prefer a big picture in Atlas that can summarize how each DataSets and Processes are connected among NiFi and other software. NiFi provenance events can be used to investigate more details if needed as it stores event (FlowFile) level complete lineage.</p>
</li>
<li>Complete Path
<p>Focuses on DROP provenance event type. Because it represents the end of a particular FlowFile lifecycle. By traversing provenance events backward from a DROP event, the entire lineage can be reported for a given FlowFile including where it is created, then where it goes.
</p>
<p>However, reporting complete flow path for every single FlowFile will produce too many entities in Atlas. Also, it may not be the best approach for Atlas as it is designed to manage DataSet level lineage rather than event level as of today. In order to keep the amount of data at minimum, this strategy calculates hash from Input and Output DataSets of a lineage path, so that the same complete path routes will become the same Atlas entity.</p>
<p>If different FlowFiles went through the exact same route, then those provenance data only create a single 'nifi_flow_path' Atlas entity. On the other hand, a single part of NiFi flow can generate different FlowFile lineage paths, those will be reported as different 'nifi_flow_path' entities. Typically when NiFi Expression Language is used for NiFi Processor configuration to connect DataSets.</p>
<p><em>NOTE:</em>While Simple Path strategy can report lineage by looking at each individual NiFi provenance event record, Complete Path strategy has to query parent events. It needs more computing resource (CPU and I/O) when NiFi provenance event queries are performed.</p>
</li>
</ul>
<p>To illustrate the difference between lineage strategies, let's look at a sample NiFi flow as shown in the screenshots below.</p>
<img src="sample-flow-path.png" />
<p>With 'Simple Path', Atlas lineage is reported like below when '/tmp/input/A1.csv' is selected. Since 'Simple Path' simply maps I/O events to a 'nifi_flow_path', '/tmp/output/B1.csv' is shown in the lineage graph because that file is written by the 'GetFile, PutFile...' process.</p>
<img src="sample-flow-path-simple.png" />
<p>With 'Complete Path', Atlas lineage is reported like below. This time, 'GetFile, PutFile...' process is not linked to '/tmp/output/B1.csv' because 'Complete Path' strategy created two different 'nifi_flow_path' entities one for '/tmp/input/A1.csv -> /tmp/output/A1.csv' and another for '/tmp/input/B1.csv -> /tmp/output/B1.csv'.</p>
<p>However, once the data records ingested from A.csv and B.csv got into a bigger DataSet, 'nifi-test' Kafka topic in this example (or whatever DataSet such as a database table or a concatenated file ... etc), record level lineage telling where it came from is no longer able to be tracked. So the resulting '/tmp/consumed/B_2..' is shown in the same lineage graph, although file does not contain any data came from '/tmp/input/A1.csv'.</p>
<img src="sample-flow-path-complete.png" />
<h3 id="provenance-events">NiFi Provenance Event Analysis</h3>
<p>To create lineage describing which NiFi component interacts with what DataSets, DataSet entity and Process entity need to be created in Atlas. Specifically, at least 3 entities are required to draw a lineage graph on Atlas UI. A Process entity, and a DataSet which is referred by a Process 'inputs' attribute, and a DataSet referred from 'outputs' attribute. For example:</p>
<pre>
# With following entities
guid: 1
typeName: fs_path (extends DataSet)
qualifiedName: /data/A1.csv@BranchOffice1
guid: 2
typeName: nifi_flow_path (extends Process)
name: GetFile, PutHDFS
qualifiedName: 529e6722-9b49-3b66-9c94-00da9863ca2d@BranchOffice1
inputs: refer guid(1)
outputs: refer guid(3)
guid: 3
typeName: hdfs_path (extends DataSet)
qualifiedName: /data/input/A1.csv@Analytics
# Atlas draws lineage graph
/data/A1.csv -> GetFile, PutHDFS -> /data/input/A1.csv
</pre>
<p>To identify such Process and DataSet Atlas entities, this reporting task uses NiFi Provenance Events. At least, the reporting task needs to derive following information from a NiFi Provenance event record:
<ul>
<li>typeName (e.g. fs_path, hive_table)</li>
<li>qualifiedName in uniqueId@clusterName (e.g. /data/A1.csv@BranchOffice1)</li>
</ul>
</p>
<p>'clusterName' in 'qualifiedName' attribute is resolved by mapping ip-address or hostname available at NiFi Provenance event 'transitUri' to a cluster name. See <a href="cluster-name">Cluster Name Resolution</a> for detail.</p>
<p>For 'typeName' and 'qualifiedName', different analysis rules are needed for different DataSet. ReportLineageToAtlas provides an extension point called 'NiFiProvenanceEventAnalyzer' to implement such analysis logic for particular DataSets.</p>
<p>When a Provenance event is analyzed, registered NiFiProvenanceEventAnalyzer implementations are searched in following order to find a best matching analyzer implementation:
<ol>
<li>By component type (e.g. KafkaTopic)</li>
<li>By transit URI protocol (e.g. HDFSPath)</li>
<li>By event type, if none of above analyzers matches (e.g. Create)</li>
</ol>
</p>
<h4 id="datasets-and-processors">Supported DataSets and Processors</h4>
<p>
Currently, following NiFi components are supported by this reporting task:
</p>
<table>
<tr>
<th>Analyzer</th>
<th colspan="3">covered NiFi components</th>
<th colspan="2">Atlas DataSet</th>
<th>Description</th>
</tr>
<tr>
<th></th>
<th>name</th>
<th>eventType</th>
<th>transitUri example</th>
<th>typeName</th>
<th>qualifiedName</th>
<th></th>
</tr>
<tr>
<td>NiFiRemotePort</td>
<td>
Remote Input Port<br/>
Remote Output Port
</td>
<td>
SEND<br/>
RECEIVE<br/>
</td>
<td>
<ul>
<li>http://nifi1.example.com:8080/nifi-api/data-transfer/input-ports/35dbc0ab-015e-1000-144c-a8d71255027d/transactions/89335043-f105-4de7-a0ef-46f2ef0c7c51/flow-files</li>
<li>nifi://nifi1.example.com:8081/cb729f05-b2ee-4488-909d-c6696cc34588</li>
</ul>
</td>
<td>
nifi_input_port<br/>
nifi_output_port
</td>
<td>remotePortGUID@clusterName<br/>(e.g. 35dbc0ab-015e-1000-144c-a8d71255027d@cl1)</td>
<td><strong>NOTE:</strong>Only HTTP S2S protocol is supported. RAW support may be added in the future as it needs NiFi code modification. See <a href="https://issues.apache.org/jira/browse/NIFI-4654">NIFI-4654</a> for detail.</td>
</tr>
<tr>
<td>NiFiRootGroupPort</td>
<td>
Root group Input Port<br/>
Root group Output Port
</td>
<td>
RECEIVE<br/>
SEND<br/>
</td>
<td>(Same as Remote Input/Output Port)</td>
<td>(Same as above)</td>
<td>(Same as above)</td>
<td></td>
</tr>
<tr>
<td>KafkaTopic</td>
<td>
PublishKafka<br/>
ConsumeKafka<br/>
PublishKafka_0_10<br/>
ConsumeKafka_0_10<br/>
PublishKafkaRecord_0_10<br/>
ConsumeKafkaRecord_0_10<br/>
</td>
<td>
SEND<br/>
RECEIVE<br/>
SEND<br/>
RECEIVE<br/>
SEND<br/>
RECEIVE<br/>
</td>
<td>
PLAINTEXT://kafka1.example.com:9092/sample-topic<br/>
(Protocol can be either PLAINTEXT, SSL, SASL_PLAINTEXT or SASL_SSL)
</td>
<td>kafka_topic</td>
<td>topicName@clusterName<br/>(e.g. testTopic@cl1)</td>
<td><strong>NOTE:</strong>With Atlas 0.8.2, the same topic name in different clusters can not be created using the pre-built 'kafka_topic'. See <a href="https://issues.apache.org/jira/browse/ATLAS-2286">ATLAS-2286</a>.</td>
</tr>
<tr>
<td>PutHiveStreaming</td>
<td>PutHiveStreaming</td>
<td>SEND</td>
<td>thrift://hive.example.com:9083</td>
<td>hive_table</td>
<td>tableName@clusterName<br/>(e.g. myTable@cl1)</td>
<td></td>
</tr>
<tr>
<td>Hive2JDBC</td>
<td>
PutHiveQL<br/>
SelectHiveQL
</td>
<td>
SEND<br/>
RECEIVE<br/>
</td>
<td>jdbc:hive2://hive.example.com:10000/default</td>
<td>hive_table</td>
<td>tableName@clusterName<br/>(e.g. myTable@cl1)</td>
<td>The corresponding Processors parse Hive QL to set 'query.input.tables' and 'query.output.tables' FlowFile attributes. These attribute values are used to create qualified name.</td>
</tr>
<tr>
<td>HDFSPath</td>
<td>
DeleteHDFS<br/>
FetchHDFS<br/>
FetchParquet<br/>
GetHDFS<br/>
GetHDFSSequenceFIle<br/>
PutHDFS<br/>
PutParquet<br/>
</td>
<td>
REMOTE_INVOCATION<br/>
FETCH<br/>
FETCH<br/>
RECEIVE<br/>
RECEIVE<br/>
SEND<br/>
SEND<br/>
</td>
<td>hdfs://nn.example.com:8020/user/nifi/5262553828219</td>
<td>hdfs_path</td>
<td>/path/fileName@clusterName<br/>(e.g. /app/warehouse/hive/db/default@cl1)</td>
</tr>
<tr>
<td>HBaseTable</td>
<td>
FetchHBaseRow<br/>
GetHBase<br/>
PutHBaseCell<br/>
PutHBaseJSON<br/>
PutHBaseRecord<br/>
</td>
<td>
FETCH<br/>
RECEIVE<br/>
SEND<br/>
SEND<br/>
SEND<br/>
</td>
<td>hbase://hmaster.example.com:16000/tableA/rowX</td>
<td>hbase_table</td>
<td>tableName@clusterName<br/>(e.g. myTable@cl1)</td>
</tr>
<tr>
<td>FilePath</td>
<td>
PutFile<br/>
GetFile<br/>
... etc
</td>
<td>
SEND<br/>
RECEIVE<br/>
... etc
</td>
<td>file:///tmp/a.txt</td>
<td>fs_path</td>
<td>/path/fileName@hostname<br/>(e.g. /tmp/dir/filename.txt@host.example.com)</td>
</tr>
<tr id="unknown-datasets">
<td>unknown.Create<br/>Receive, Fetch<br/>Send, RemoteInvocation</td>
<td>Other Processors those generates listed event types</td>
<td>
CREATE<br/>
RECEIVE<br/>
FETCH<br/>
SEND<br/>
REMOTE_INVOCATION
</td>
<td></td>
<td>nifi_data</td>
<td>processorGuid@clusterName<br/>db8bb12c-5cd3-3011-c971-579f460ebedf@cl1</td>
</tr>
</table>
<h3 id="runs-in-cluster">How it runs in NiFi cluster</h3>
When this reporting task runs in a NiFi cluster, following tasks are executed only by the primary node:
<ul>
<li>Create <a href="#nifi-atlas-types">NiFi Atlas Types</a> in Atlas type system</li>
<li>Maintain NiFi flow structure and metadata in Atlas which consists of NiFi component entities such as 'nifi_flow', 'nifi_flow_path' and 'nifi_input(output)_port'.</li>
</ul>
While every node (including primary node) performs following:
<ul>
<li>Analyzes NiFi provenance events stored in a provenance event repository on it, to create lineage between 'nifi_flow_path' and other DataSet (e.g. Hive tables or HDFS path).</li>
</ul>
<h3 id="limitations">Limitations</h3>
<ul>
<li>
<em>Requires Atlas 0.8 incubating or later</em>:
<p>This reporting task requires Atlas REST API version 2, which is introduced at Atlas 0.8-incubating.
Older versions of Atlas are not supported.</p>
</li>
<li>
<em>Limited DataSets and Processors support</em>:
<p>In order to report lineage to Atlas, this reporting task must know what a given processor does with a certain DataSet. Then create an 'Atlas Object Id' for a DataSet which uniquely identifies an entity in Atlas. Atlas Object Id has unique properties map, and mostly 'qualifiedName' is set in the unique properties map to identify an entity. The format of a qualifiedName depends on each DataSet.</p>
<p>To create this Atlas Object ID, we have to implement Processor-specific code that analyzes configured properties.
See <a href="#datasets-and-processors">Supported DataSets and Processors</a> for details.</p>
</li>
<li>
<em>Restart NiFi is required to update some ReportingTask properties</em>
<p>As underlying Atlas client library caches configurations when it runs the first time, some properties of this reporting task can not be updated by stopping, configure and restarting the reporting task. </p>
<p>NiFi process needs to be restarted in such case.</p>
</li>
</ul>
<h3 id="atlas-configs">Atlas Server Configurations</h3>
<ul>
<li id="delete-handler">
<em>Delete Handler</em>:
<p>Atlas uses 'SoftDeleteHandler' by default which mark relationships deleted, but still can be seen in Atlas UI. Soft delete model is useful if you would like to capture every lineage ever defined,
but if you prefer seeing current state of a NiFi flow, Hard delete would be more appropriate.</p>
<p>To change this behavior, set following in 'atlas-application.properties' on Atlas server, then restart Atlas.
HardDeleteHandlerV1 physically removes lineage:</p>
<pre>atlas.DeleteHandlerV1.impl=org.apache.atlas.repository.store.graph.v1.HardDeleteHandlerV1</pre>
</li>
</ul>
<h3 id="atlas-emulator">Atlas Server Emulator</h3>
<p>If you have Apache NiFi project source code on your local machine, you can run Atlas Server Emulator which is included in 'nifi-atlas-reporting-task' test module. The emulator listens on 21000 port for Atlas REST API v2, and 9092 port for Kafka by default. A running NiFi instance can use the emulator to report information from this reporting task. It can be helpful when you need to debug how the reporting task works, or try out different reporting strategies.</p>
<p>See <a href="https://github.com/apache/nifi/tree/master/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/test/java/org/apache/nifi/atlas/emulator/README.md">Apache Atlas Server Emulator</a> readme file for further details.</p>
</body>
</html>

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 65 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 50 KiB

View File

@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.nifi.atlas.reporting.ReportLineageToAtlas;
import org.apache.nifi.atlas.security.AtlasAuthN;
import org.apache.nifi.atlas.security.Basic;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.util.MockPropertyValue;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.nifi.atlas.NiFiTypes.NIFI_TYPES;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class ITNiFiAtlasClient {
private static final Logger logger = LoggerFactory.getLogger(ITNiFiAtlasClient.class);
private NiFiAtlasClient atlasClient;
@Before
public void setup() {
atlasClient = NiFiAtlasClient.getInstance();
// Add your atlas server ip address into /etc/hosts as atlas.example.com
PropertyContext propertyContext = mock(PropertyContext.class);
when(propertyContext.getProperty(ReportLineageToAtlas.ATLAS_USER)).thenReturn(new MockPropertyValue("admin"));
when(propertyContext.getProperty(ReportLineageToAtlas.ATLAS_PASSWORD)).thenReturn(new MockPropertyValue("admin"));
final AtlasAuthN atlasAuthN = new Basic();
atlasAuthN.configure(propertyContext);
atlasClient.initialize(new String[]{"http://atlas.example.com:21000/"}, atlasAuthN, null);
}
@Test
public void testFetchNiFiFlow() throws Exception {
final NiFiFlow nifiFlow = atlasClient.fetchNiFiFlow("1fc2e0a6-0160-1000-2660-72a0db49f37c", "DEBUG");
}
@Test
public void testDeleteTypeDefs() throws Exception {
atlasClient.deleteTypeDefs(NIFI_TYPES);
}
@Test
public void testRegisterNiFiTypeDefs() throws Exception {
atlasClient.registerNiFiTypeDefs(true);
}
@Test
public void testSearch() throws Exception {
final AtlasObjectId atlasObjectId = new AtlasObjectId("kafka_topic", "topic", "nifi-test");
final AtlasEntity.AtlasEntityWithExtInfo entityDef = atlasClient.searchEntityDef(atlasObjectId);
logger.info("entityDef={}", entityDef);
}
}

View File

@ -0,0 +1,275 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.nifi.atlas.reporting.ITReportLineageToAtlas;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.controller.status.PortStatus;
import org.apache.nifi.controller.status.ProcessGroupStatus;
import org.apache.nifi.controller.status.ProcessorStatus;
import org.apache.nifi.util.Tuple;
import org.junit.Before;
import org.junit.Test;
import java.util.Map;
import java.util.function.Function;
import static org.apache.nifi.atlas.AtlasUtils.toQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE;
import static org.junit.Assert.assertEquals;
/**
* Test {@link NiFiFlowAnalyzer} with simple mock code.
* More complex and detailed tests are available in {@link ITReportLineageToAtlas}.
*/
public class TestNiFiFlowAnalyzer {
private int componentId = 0;
@Before
public void before() throws Exception {
componentId = 0;
}
private ProcessGroupStatus createEmptyProcessGroupStatus() {
final ProcessGroupStatus processGroupStatus = new ProcessGroupStatus();
processGroupStatus.setId(nextComponentId());
processGroupStatus.setName("Flow name");
return processGroupStatus;
}
@Test
public void testEmptyFlow() throws Exception {
ProcessGroupStatus rootPG = createEmptyProcessGroupStatus();
final NiFiFlowAnalyzer analyzer = new NiFiFlowAnalyzer();
final NiFiFlow nifiFlow = new NiFiFlow(rootPG.getId());
nifiFlow.setClusterName("cluster1");
analyzer.analyzeProcessGroup(nifiFlow, rootPG);
assertEquals("1234-5678-0000-0000@cluster1", nifiFlow.getQualifiedName());
}
private ProcessorStatus createProcessor(ProcessGroupStatus pgStatus, String type) {
final ProcessorStatus processor = new ProcessorStatus();
processor.setName(type);
processor.setId(nextComponentId());
processor.setGroupId(pgStatus.getId());
pgStatus.getProcessorStatus().add(processor);
return processor;
}
private String nextComponentId() {
return String.format("1234-5678-0000-%04d", componentId++);
}
private void connect(ProcessGroupStatus pg0, Object o0, Object o1) {
Function<Object, Tuple<String, String>> toTupple = o -> {
Tuple<String, String> comp;
if (o instanceof ProcessorStatus) {
ProcessorStatus p = (ProcessorStatus) o;
comp = new Tuple<>(p.getId(), p.getName());
} else if (o instanceof PortStatus) {
PortStatus p = (PortStatus) o;
comp = new Tuple<>(p.getId(), p.getName());
} else {
throw new IllegalArgumentException("Not supported");
}
return comp;
};
connect(pg0, toTupple.apply(o0), toTupple.apply(o1));
}
private void connect(ProcessGroupStatus pg0, Tuple<String, String> comp0, Tuple<String, String> comp1) {
ConnectionStatus conn = new ConnectionStatus();
conn.setId(nextComponentId());
conn.setGroupId(pg0.getId());
conn.setSourceId(comp0.getKey());
conn.setSourceName(comp0.getValue());
conn.setDestinationId(comp1.getKey());
conn.setDestinationName(comp1.getValue());
pg0.getConnectionStatus().add(conn);
}
@Test
public void testSingleProcessor() throws Exception {
ProcessGroupStatus rootPG = createEmptyProcessGroupStatus();
final ProcessorStatus pr0 = createProcessor(rootPG, "GenerateFlowFile");
final NiFiFlowAnalyzer analyzer = new NiFiFlowAnalyzer();
final NiFiFlow nifiFlow = new NiFiFlow(rootPG.getId());
analyzer.analyzeProcessGroup(nifiFlow, rootPG);
assertEquals(1, nifiFlow.getProcessors().size());
analyzer.analyzePaths(nifiFlow);
final Map<String, NiFiFlowPath> paths = nifiFlow.getFlowPaths();
assertEquals(1, paths.size());
// first path
final NiFiFlowPath path0 = paths.get(pr0.getId());
assertEquals(path0.getId(), path0.getProcessComponentIds().get(0));
assertEquals(rootPG.getId(), path0.getGroupId());
// Should be able to find a path from a given processor GUID.
final NiFiFlowPath pathForPr0 = nifiFlow.findPath(pr0.getId());
assertEquals(path0, pathForPr0);
}
@Test
public void testProcessorsWithinSinglePath() throws Exception {
ProcessGroupStatus rootPG = createEmptyProcessGroupStatus();
final ProcessorStatus pr0 = createProcessor(rootPG, "GenerateFlowFile");
final ProcessorStatus pr1 = createProcessor(rootPG, "UpdateAttribute");
connect(rootPG, pr0, pr1);
final NiFiFlowAnalyzer analyzer = new NiFiFlowAnalyzer();
final NiFiFlow nifiFlow = new NiFiFlow(rootPG.getId());
analyzer.analyzeProcessGroup(nifiFlow, rootPG);
assertEquals(2, nifiFlow.getProcessors().size());
analyzer.analyzePaths(nifiFlow);
final Map<String, NiFiFlowPath> paths = nifiFlow.getFlowPaths();
assertEquals(1, paths.size());
// Should be able to find a path from a given processor GUID.
final NiFiFlowPath pathForPr0 = nifiFlow.findPath(pr0.getId());
final NiFiFlowPath pathForPr1 = nifiFlow.findPath(pr1.getId());
final NiFiFlowPath path0 = paths.get(pr0.getId());
assertEquals(path0, pathForPr0);
assertEquals(path0, pathForPr1);
}
@Test
public void testMultiPaths() throws Exception {
ProcessGroupStatus rootPG = createEmptyProcessGroupStatus();
final ProcessorStatus pr0 = createProcessor(rootPG, "GenerateFlowFile");
final ProcessorStatus pr1 = createProcessor(rootPG, "UpdateAttribute");
final ProcessorStatus pr2 = createProcessor(rootPG, "ListenTCP");
final ProcessorStatus pr3 = createProcessor(rootPG, "LogAttribute");
connect(rootPG, pr0, pr1);
connect(rootPG, pr2, pr3);
final NiFiFlowAnalyzer analyzer = new NiFiFlowAnalyzer();
final NiFiFlow nifiFlow = new NiFiFlow(rootPG.getId());
analyzer.analyzeProcessGroup(nifiFlow, rootPG);
assertEquals(4, nifiFlow.getProcessors().size());
analyzer.analyzePaths(nifiFlow);
final Map<String, NiFiFlowPath> paths = nifiFlow.getFlowPaths();
assertEquals(2, paths.size());
// Order is not guaranteed
final NiFiFlowPath pathA = paths.get(pr0.getId());
final NiFiFlowPath pathB = paths.get(pr2.getId());
assertEquals(2, pathA.getProcessComponentIds().size());
assertEquals(2, pathB.getProcessComponentIds().size());
// Should be able to find a path from a given processor GUID.
final NiFiFlowPath pathForPr0 = nifiFlow.findPath(pr0.getId());
final NiFiFlowPath pathForPr1 = nifiFlow.findPath(pr1.getId());
final NiFiFlowPath pathForPr2 = nifiFlow.findPath(pr2.getId());
final NiFiFlowPath pathForPr3 = nifiFlow.findPath(pr3.getId());
assertEquals(pathA, pathForPr0);
assertEquals(pathA, pathForPr1);
assertEquals(pathB, pathForPr2);
assertEquals(pathB, pathForPr3);
}
@Test
public void testMultiPathsJoint() throws Exception {
ProcessGroupStatus rootPG = createEmptyProcessGroupStatus();
final ProcessorStatus pr0 = createProcessor(rootPG, "org.apache.nifi.processors.standard.GenerateFlowFile");
final ProcessorStatus pr1 = createProcessor(rootPG, "org.apache.nifi.processors.standard.UpdateAttribute");
final ProcessorStatus pr2 = createProcessor(rootPG, "org.apache.nifi.processors.standard.ListenTCP");
final ProcessorStatus pr3 = createProcessor(rootPG, "org.apache.nifi.processors.standard.LogAttribute");
// Result should be as follows:
// pathA = 0 -> 1 (-> 3)
// pathB = 2 (-> 3)
// pathC = 3
connect(rootPG, pr0, pr1);
connect(rootPG, pr1, pr3);
connect(rootPG, pr2, pr3);
final NiFiFlowAnalyzer analyzer = new NiFiFlowAnalyzer();
final NiFiFlow nifiFlow = new NiFiFlow(rootPG.getId());
nifiFlow.setClusterName("cluster1");
analyzer.analyzeProcessGroup(nifiFlow, rootPG);
assertEquals(4, nifiFlow.getProcessors().size());
analyzer.analyzePaths(nifiFlow);
final Map<String, NiFiFlowPath> paths = nifiFlow.getFlowPaths();
assertEquals(3, paths.size());
// Order is not guaranteed
final NiFiFlowPath pathA = paths.get(pr0.getId());
final NiFiFlowPath pathB = paths.get(pr2.getId());
final NiFiFlowPath pathC = paths.get(pr3.getId());
assertEquals(2, pathA.getProcessComponentIds().size());
assertEquals(1, pathB.getProcessComponentIds().size());
assertEquals(1, pathC.getProcessComponentIds().size());
// A queue is added as input for the joint point.
assertEquals(1, pathC.getInputs().size());
final AtlasObjectId queue = pathC.getInputs().iterator().next();
assertEquals(TYPE_NIFI_QUEUE, queue.getTypeName());
assertEquals(toQualifiedName("cluster1", pathC.getId()), queue.getUniqueAttributes().get(ATTR_QUALIFIED_NAME));
// Should be able to find a path from a given processor GUID.
final NiFiFlowPath pathForPr0 = nifiFlow.findPath(pr0.getId());
final NiFiFlowPath pathForPr1 = nifiFlow.findPath(pr1.getId());
final NiFiFlowPath pathForPr2 = nifiFlow.findPath(pr2.getId());
final NiFiFlowPath pathForPr3 = nifiFlow.findPath(pr3.getId());
assertEquals(pathA, pathForPr0);
assertEquals(pathA, pathForPr1);
assertEquals(pathB, pathForPr2);
assertEquals(pathC, pathForPr3);
}
}

View File

@ -0,0 +1,609 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.emulator;
import org.apache.atlas.model.discovery.AtlasSearchResult;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.model.typedef.AtlasEntityDef;
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.notification.hook.HookNotification;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.AtlasUtils;
import org.apache.nifi.atlas.NiFiTypes;
import org.codehaus.jackson.map.ObjectMapper;
import org.eclipse.jetty.server.Connector;
import org.eclipse.jetty.server.Handler;
import org.eclipse.jetty.server.Server;
import org.eclipse.jetty.server.ServerConnector;
import org.eclipse.jetty.server.handler.ContextHandlerCollection;
import org.eclipse.jetty.server.handler.ResourceHandler;
import org.eclipse.jetty.servlet.ServletContextHandler;
import org.eclipse.jetty.servlet.ServletHandler;
import org.eclipse.jetty.util.resource.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.servlet.ServletException;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.nifi.atlas.AtlasUtils.isGuidAssigned;
import static org.apache.nifi.atlas.AtlasUtils.toStr;
import static org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_GUID;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME;
import static org.apache.nifi.util.StringUtils.isEmpty;
/**
* Emulate Atlas API v2 server for NiFi implementation testing.
*/
public class AtlasAPIV2ServerEmulator {
private static final Logger logger = LoggerFactory.getLogger(AtlasAPIV2ServerEmulator.class);
private Server server;
private ServerConnector httpConnector;
private AtlasNotificationServerEmulator notificationServerEmulator;
private EmbeddedKafka embeddedKafka;
public static void main(String[] args) throws Exception {
final AtlasAPIV2ServerEmulator emulator = new AtlasAPIV2ServerEmulator();
emulator.start();
}
public void start() throws Exception {
if (server == null) {
createServer();
}
server.start();
logger.info("Starting {} on port {}", AtlasAPIV2ServerEmulator.class.getSimpleName(), httpConnector.getLocalPort());
embeddedKafka = new EmbeddedKafka(false);
embeddedKafka.start();
notificationServerEmulator.consume(m -> {
if (m instanceof HookNotification.EntityCreateRequest) {
HookNotification.EntityCreateRequest em = (HookNotification.EntityCreateRequest) m;
for (Referenceable ref : em.getEntities()) {
final AtlasEntity entity = toEntity(ref);
createEntityByNotification(entity);
}
} else if (m instanceof HookNotification.EntityPartialUpdateRequest) {
HookNotification.EntityPartialUpdateRequest em
= (HookNotification.EntityPartialUpdateRequest) m;
final AtlasEntity entity = toEntity(em.getEntity());
entity.setAttribute(em.getAttribute(), em.getAttributeValue());
updateEntityByNotification(entity);
}
});
}
@SuppressWarnings("unchecked")
private void createEntityByNotification(AtlasEntity entity) {
final String key = toTypedQname(entity);
final AtlasEntity exEntity = atlasEntitiesByTypedQname.get(key);
if (exEntity != null) {
convertReferenceableToObjectId(entity.getAttributes()).forEach((k, v) -> {
Object r = v;
final Object exAttr = exEntity.getAttribute(k);
if (exAttr != null && exAttr instanceof Collection) {
((Collection) exAttr).addAll((Collection) v);
r = exAttr;
}
exEntity.setAttribute(k, r);
});
} else {
String guid = String.valueOf(guidSeq.getAndIncrement());
entity.setGuid(guid);
atlasEntitiesByTypedQname.put(key, entity);
atlasEntitiesByGuid.put(guid, entity);
}
}
@SuppressWarnings("unchecked")
private static List<Map<String, Object>> resolveIOReference(Object _refs) {
if (_refs == null) {
return Collections.emptyList();
}
final Collection<Map<String, Object>> refs = (Collection<Map<String, Object>>) _refs;
return refs.stream().map(ref -> {
final String typeName = toStr(ref.get(ATTR_TYPENAME));
final String qualifiedName = toStr(((Map<String, Object>) ref.get("uniqueAttributes")).get(ATTR_QUALIFIED_NAME));
final String guid = toStr(ref.get(ATTR_GUID));
if (isEmpty(guid)) {
final String typedQname = toTypedQualifiedName(typeName, qualifiedName);
final AtlasEntity referredEntity = atlasEntitiesByTypedQname.get(typedQname);
if (referredEntity == null) {
throw new RuntimeException("Entity does not exist for " + typedQname);
}
ref.put(ATTR_GUID, referredEntity.getGuid());
}
return ref;
}).collect(Collectors.toList());
}
private void updateEntityByNotification(AtlasEntity entity) {
final String inputGuid = entity.getGuid();
final String inputTypedQname = toTypedQname(entity);
final AtlasEntity exEntity = isGuidAssigned(inputGuid)
? atlasEntitiesByGuid.get(inputGuid)
: atlasEntitiesByTypedQname.get(inputTypedQname);
if (exEntity != null) {
convertReferenceableToObjectId(entity.getAttributes()).forEach((k, v) -> {
final Object r;
switch (k) {
case "inputs":
case "outputs":
{
// If a reference doesn't have guid, then find it.
r = resolveIOReference(v);
}
break;
default:
r = v;
}
exEntity.setAttribute(k, r);
});
} else {
throw new RuntimeException("Existing entity to be updated was not found for, " + entity);
}
}
private void createServer() throws Exception {
server = new Server();
final ContextHandlerCollection handlerCollection = new ContextHandlerCollection();
final ServletContextHandler staticContext = new ServletContextHandler();
staticContext.setContextPath("/");
final ServletContextHandler atlasApiV2Context = new ServletContextHandler();
atlasApiV2Context.setContextPath("/api/atlas/v2/");
handlerCollection.setHandlers(new Handler[]{staticContext, atlasApiV2Context});
server.setHandler(handlerCollection);
final ResourceHandler resourceHandler = new ResourceHandler();
resourceHandler.setBaseResource(Resource.newClassPathResource("public", false, false));
staticContext.setHandler(resourceHandler);
final ServletHandler servletHandler = new ServletHandler();
atlasApiV2Context.insertHandler(servletHandler);
httpConnector = new ServerConnector(server);
httpConnector.setPort(21000);
server.setConnectors(new Connector[] {httpConnector});
servletHandler.addServletWithMapping(TypeDefsServlet.class, "/types/typedefs/");
servletHandler.addServletWithMapping(EntityBulkServlet.class, "/entity/bulk/");
servletHandler.addServletWithMapping(EntityGuidServlet.class, "/entity/guid/*");
servletHandler.addServletWithMapping(SearchByUniqueAttributeServlet.class, "/entity/uniqueAttribute/type/*");
servletHandler.addServletWithMapping(SearchBasicServlet.class, "/search/basic/");
servletHandler.addServletWithMapping(LineageServlet.class, "/debug/lineage/");
notificationServerEmulator = new AtlasNotificationServerEmulator();
}
public void stop() throws Exception {
notificationServerEmulator.stop();
embeddedKafka.stop();
server.stop();
}
private static void respondWithJson(HttpServletResponse resp, Object entity) throws IOException {
respondWithJson(resp, entity, HttpServletResponse.SC_OK);
}
private static void respondWithJson(HttpServletResponse resp, Object entity, int statusCode) throws IOException {
resp.setContentType("application/json");
resp.setStatus(statusCode);
final ServletOutputStream out = resp.getOutputStream();
new ObjectMapper().writer().writeValue(out, entity);
out.flush();
}
private static <T> T readInputJSON(HttpServletRequest req, Class<? extends T> clazz) throws IOException {
return new ObjectMapper().reader().withType(clazz).readValue(req.getInputStream());
}
private static final AtlasTypesDef atlasTypesDef = new AtlasTypesDef();
// key = type::qualifiedName
private static final Map<String, AtlasEntity> atlasEntitiesByTypedQname = new HashMap<>();
private static final Map<String, AtlasEntity> atlasEntitiesByGuid = new HashMap<>();
public static class TypeDefsServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
final String name = req.getParameter("name");
AtlasTypesDef result = atlasTypesDef;
if (name != null && !name.isEmpty()) {
result = new AtlasTypesDef();
final Optional<AtlasEntityDef> entityDef = atlasTypesDef.getEntityDefs().stream().filter(en -> en.getName().equals(name)).findFirst();
if (entityDef.isPresent()) {
result.getEntityDefs().add(entityDef.get());
}
}
respondWithJson(resp, result);
}
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
final AtlasTypesDef newTypes = readInputJSON(req, AtlasTypesDef.class);
final Map<String, AtlasEntityDef> defs = new HashMap<>();
for (AtlasEntityDef existingDef : atlasTypesDef.getEntityDefs()) {
defs.put(existingDef.getName(), existingDef);
}
for (AtlasEntityDef entityDef : newTypes.getEntityDefs()) {
defs.put(entityDef.getName(), entityDef);
}
atlasTypesDef.setEntityDefs(defs.values().stream().collect(Collectors.toList()));
respondWithJson(resp, atlasTypesDef);
}
@Override
protected void doPut(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
super.doPut(req, resp);
}
}
private static final AtomicInteger guidSeq = new AtomicInteger(0);
public static class EntityBulkServlet extends HttpServlet {
@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
final AtlasEntity.AtlasEntitiesWithExtInfo withExtInfo = readInputJSON(req, AtlasEntity.AtlasEntitiesWithExtInfo.class);
final Map<String, String> guidAssignments = new HashMap<>();
withExtInfo.getEntities().forEach(entity -> {
atlasEntitiesByTypedQname.put(toTypedQname(entity), entity);
String guid = entity.getGuid();
if (!AtlasUtils.isGuidAssigned(guid)) {
final String _guid = String.valueOf(guidSeq.getAndIncrement());
guidAssignments.put(guid, _guid);
entity.setGuid(_guid);
guid = _guid;
}
atlasEntitiesByGuid.put(guid, entity);
});
final EntityMutationResponse mutationResponse = new EntityMutationResponse();
mutationResponse.setGuidAssignments(guidAssignments);
respondWithJson(resp, mutationResponse);
}
@Override
protected void doDelete(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
atlasEntitiesByTypedQname.clear();
atlasEntitiesByGuid.clear();
resp.setStatus(200);
}
}
public static class SearchBasicServlet extends HttpServlet {
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
final AtlasSearchResult result = new AtlasSearchResult();
result.setEntities(atlasEntitiesByTypedQname.values().stream()
.map(entity -> new AtlasEntityHeader(entity.getTypeName(), entity.getAttributes())).collect(Collectors.toList()));
respondWithJson(resp, result);
}
}
public static class EntityGuidServlet extends HttpServlet {
private static Pattern URL_PATTERN = Pattern.compile(".+/guid/([^/]+)");
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
final Matcher matcher = URL_PATTERN.matcher(req.getRequestURI());
if (matcher.matches()) {
final String guid = matcher.group(1);
final AtlasEntity entity = atlasEntitiesByGuid.get(guid);
if (entity != null) {
respondWithJson(resp, createSearchResult(entity));
return;
}
}
resp.setStatus(404);
}
}
private static AtlasEntity.AtlasEntityWithExtInfo createSearchResult(AtlasEntity entity) {
entity.setAttribute(ATTR_INPUTS, resolveIOReference(entity.getAttribute(ATTR_INPUTS)));
entity.setAttribute(ATTR_OUTPUTS, resolveIOReference(entity.getAttribute(ATTR_OUTPUTS)));
return new AtlasEntity.AtlasEntityWithExtInfo(entity);
}
public static class SearchByUniqueAttributeServlet extends HttpServlet {
private static Pattern URL_PATTERN = Pattern.compile(".+/uniqueAttribute/type/([^/]+)");
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
// http://localhost:21000/api/atlas/v2/entity/uniqueAttribute/type/nifi_flow_path?attr:qualifiedName=2e9a2852-228f-379b-0000-000000000000@example
final Matcher matcher = URL_PATTERN.matcher(req.getRequestURI());
if (matcher.matches()) {
final String typeName = matcher.group(1);
final String qualifiedName = req.getParameter("attr:qualifiedName");
final AtlasEntity entity = atlasEntitiesByTypedQname.get(toTypedQualifiedName(typeName, qualifiedName));
if (entity != null) {
respondWithJson(resp, createSearchResult(entity));
return;
}
}
resp.setStatus(404);
}
}
private static AtlasEntity toEntity(Referenceable ref) {
return new AtlasEntity(ref.getTypeName(), convertReferenceableToObjectId(ref.getValuesMap()));
}
private static Map<String, Object> convertReferenceableToObjectId(Map<String, Object> values) {
final Map<String, Object> result = new HashMap<>();
for (String k : values.keySet()) {
Object v = values.get(k);
result.put(k, toV2(v));
}
return result;
}
@SuppressWarnings("unchecked")
private static Object toV2(Object v) {
Object r = v;
if (v instanceof Referenceable) {
r = toMap((Referenceable) v);
} else if (v instanceof Map) {
r = convertReferenceableToObjectId((Map<String, Object>) v);
} else if (v instanceof Collection) {
r = ((Collection) v).stream().map(AtlasAPIV2ServerEmulator::toV2).collect(Collectors.toList());
}
return r;
}
private static Map<String, Object> toMap(Referenceable ref) {
final HashMap<String, Object> result = new HashMap<>();
result.put(ATTR_TYPENAME, ref.getTypeName());
final HashMap<String, String> uniqueAttrs = new HashMap<>();
uniqueAttrs.put(ATTR_QUALIFIED_NAME, (String) ref.getValuesMap().get(ATTR_QUALIFIED_NAME));
result.put("uniqueAttributes", uniqueAttrs);
result.put(ATTR_GUID, ref.getId() != null ? ref.getId()._getId() : null);
return result;
}
private static String toTypedQname(AtlasEntity entity) {
return toTypedQualifiedName(entity.getTypeName(), (String) entity.getAttribute("qualifiedName"));
}
public static class LineageServlet extends HttpServlet {
private Node toNode(AtlasEntity entity) {
Node node = new Node();
node.setName(entity.getAttribute(NiFiTypes.ATTR_NAME).toString());
node.setQualifiedName(entity.getAttribute(ATTR_QUALIFIED_NAME).toString());
node.setType(entity.getTypeName());
return node;
}
private Link toLink(AtlasEntity s, AtlasEntity t, Map<String, Integer> nodeIndices) {
final Integer sid = nodeIndices.get(toTypedQname(s));
final Integer tid = nodeIndices.get(toTypedQname(t));
return new Link(sid, tid);
}
private String toLinkKey(Integer s, Integer t) {
return s + "::" + t;
}
@SuppressWarnings("unchecked")
private AtlasEntity getReferredEntity(Map<String, Object> reference) {
final String guid = toStr(reference.get(ATTR_GUID));
final String qname = ((Map<String, String>) reference.get("uniqueAttributes")).get(ATTR_QUALIFIED_NAME);
return isGuidAssigned(guid)
? atlasEntitiesByGuid.get(guid)
: atlasEntitiesByTypedQname.get(toTypedQualifiedName((String) reference.get(ATTR_TYPENAME), qname));
}
@SuppressWarnings("unchecked")
private void traverse(Set<AtlasEntity> seen, AtlasEntity s, List<Link> links, Map<String, Integer> nodeIndices, Map<String, List<AtlasEntity>> outgoingEntities) {
// To avoid cyclic links.
if (seen.contains(s)) {
return;
}
seen.add(s);
// Traverse entities those are updated by this entity.
final Object outputs = s.getAttribute(ATTR_OUTPUTS);
if (outputs != null) {
for (Map<String, Object> output : ((List<Map<String, Object>>) outputs)) {
final AtlasEntity t = getReferredEntity(output);
if (t != null) {
links.add(toLink(s, t, nodeIndices));
traverse(seen, t, links, nodeIndices, outgoingEntities);
}
}
}
// Add link to the input objects for this entity.
final Object inputs = s.getAttribute(NiFiTypes.ATTR_INPUTS);
if (inputs != null) {
for (Map<String, Object> input : ((List<Map<String, Object>>) inputs)) {
final AtlasEntity t = getReferredEntity(input);
if (t != null) {
links.add(toLink(t, s, nodeIndices));
}
}
}
// Traverse entities those consume this entity as their input.
final List<AtlasEntity> outGoings = Stream.of(outgoingEntities.getOrDefault(toTypedQname(s), Collections.emptyList()),
outgoingEntities.getOrDefault(s.getGuid(), Collections.emptyList())).flatMap(List::stream).collect(Collectors.toList());
outGoings.forEach(o -> {
links.add(toLink(s, o, nodeIndices));
traverse(seen, o, links, nodeIndices, outgoingEntities);
});
}
@SuppressWarnings("unchecked")
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
final Lineage result = new Lineage();
final List<Node> nodes = new ArrayList<>();
final List<Link> links = new ArrayList<>();
final Map<String, Integer> nodeIndices = new HashMap<>();
// DataSet to outgoing Processes, either by guid or typed qname.
final Map<String, List<AtlasEntity>> outgoingEntities = new HashMap<>();
result.setNodes(nodes);
// Add all nodes.
atlasEntitiesByTypedQname.entrySet().forEach(entry -> {
nodeIndices.put(entry.getKey(), nodes.size());
final AtlasEntity entity = entry.getValue();
nodes.add(toNode(entity));
// Capture inputs
final Object inputs = entity.getAttribute(NiFiTypes.ATTR_INPUTS);
if (inputs != null) {
for (Map<String, Object> input : ((List<Map<String, Object>>) inputs)) {
final AtlasEntity t = getReferredEntity(input);
if (t != null) {
final String typedQname = toTypedQualifiedName(t.getTypeName(), toStr(t.getAttribute(ATTR_QUALIFIED_NAME)));
final String guid = t.getGuid();
outgoingEntities.computeIfAbsent(typedQname, k -> new ArrayList<>()).add(entity);
outgoingEntities.computeIfAbsent(guid, k -> new ArrayList<>()).add(entity);
}
}
}
});
// Correct all flow_path
final Map<String, List<AtlasEntity>> entities = atlasEntitiesByTypedQname.values().stream()
.collect(Collectors.groupingBy(AtlasEntity::getTypeName));
final HashSet<AtlasEntity> seen = new HashSet<>();
// Add nifi_flow
if (entities.containsKey(NiFiTypes.TYPE_NIFI_FLOW)) {
if (entities.containsKey(NiFiTypes.TYPE_NIFI_FLOW_PATH)) {
final List<AtlasEntity> flowPaths = entities.get(NiFiTypes.TYPE_NIFI_FLOW_PATH);
// Find the starting flow_paths
final List<AtlasEntity> heads = flowPaths.stream()
.filter(p -> {
Object inputs = p.getAttribute(NiFiTypes.ATTR_INPUTS);
return inputs == null || ((Collection) inputs).isEmpty()
// This condition matches the head processor but has some inputs those are created by notification.
|| ((Collection<Map<String, Object>>) inputs).stream().anyMatch(m -> !"nifi_queue".equals(m.get("typeName")));
})
.collect(Collectors.toList());
final List<AtlasEntity> inputPorts = entities.get(NiFiTypes.TYPE_NIFI_INPUT_PORT);
if (inputPorts != null) {
heads.addAll(inputPorts);
}
heads.forEach(s -> {
// Link it to parent NiFi Flow.
final Object nifiFlowRef = s.getAttribute("nifiFlow");
if (nifiFlowRef != null) {
final AtlasEntity nifiFlow = getReferredEntity((Map<String, Object>) nifiFlowRef);
if (nifiFlow != null) {
links.add(toLink(nifiFlow, s, nodeIndices));
}
}
// Traverse recursively
traverse(seen, s, links, nodeIndices, outgoingEntities);
});
}
}
final List<Link> uniqueLinks = new ArrayList<>();
final Set linkKeys = new HashSet<>();
for (Link link : links) {
final String linkKey = toLinkKey(link.getSource(), link.getTarget());
if (!linkKeys.contains(linkKey)) {
uniqueLinks.add(link);
linkKeys.add(linkKey);
}
}
// Links from nifi_flow to nifi_flow_path/nifi_input_port should be narrow (0.1).
// Others will share 1.0.
final Map<Boolean, List<Link>> flowToPathOrElse = uniqueLinks.stream().collect(Collectors.groupingBy(l -> {
final String stype = nodes.get(l.getSource()).getType();
final String ttype = nodes.get(l.getTarget()).getType();
return "nifi_flow".equals(stype) && ("nifi_flow_path".equals(ttype) || ("nifi_input_port".equals(ttype)));
}));
flowToPathOrElse.forEach((f2p, ls) -> {
if (f2p) {
ls.forEach(l -> l.setValue(0.1));
} else {
// Group links by its target, and configure each weight value.
// E.g. 1 -> 3 and 2 -> 3, then 1 (0.5) -> 3 and 2 (0.5) -> 3.
ls.stream().collect(Collectors.groupingBy(Link::getTarget))
.forEach((t, ls2SameTgt) -> ls2SameTgt.forEach(l -> l.setValue(1.0 / (double) ls2SameTgt.size())));
}
});
result.setLinks(uniqueLinks);
respondWithJson(resp, result);
}
}
}

View File

@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.emulator;
import org.apache.atlas.notification.MessageDeserializer;
import org.apache.atlas.notification.NotificationInterface;
import org.apache.atlas.notification.hook.HookNotification;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.Arrays;
import java.util.Properties;
import java.util.function.Consumer;
public class AtlasNotificationServerEmulator {
// EntityPartialUpdateRequest
// EntityCreateRequest
// NotificationInterface
private volatile boolean isStopped;
public void consume(Consumer<HookNotification.HookNotificationMessage> c) {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("group.id", "test");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList("ATLAS_HOOK"));
isStopped = false;
while (!isStopped) {
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
final MessageDeserializer deserializer = NotificationInterface.NotificationType.HOOK.getDeserializer();
final HookNotification.HookNotificationMessage m
= (HookNotification.HookNotificationMessage) deserializer.deserialize(record.value());
c.accept(m);
}
}
consumer.close();
}
public void stop() {
isStopped = true;
}
}

View File

@ -0,0 +1,232 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.emulator;
import kafka.server.KafkaConfig;
import kafka.server.KafkaServerStartable;
import org.apache.commons.io.FileUtils;
import org.apache.zookeeper.server.ServerCnxnFactory;
import org.apache.zookeeper.server.ServerConfig;
import org.apache.zookeeper.server.ZooKeeperServer;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.quorum.QuorumPeerConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.net.ServerSocket;
import java.util.Properties;
/**
* Embedded Kafka server, primarily to be used for testing.
*/
public class EmbeddedKafka {
private final KafkaServerStartable kafkaServer;
private final Properties zookeeperConfig;
private final Properties kafkaConfig;
private final ZooKeeperServer zkServer;
private final Logger logger = LoggerFactory.getLogger(EmbeddedKafka.class);
private final int kafkaPort;
private final int zookeeperPort;
private boolean started;
/**
* Will create instance of the embedded Kafka server. Kafka and Zookeeper
* configuration properties will be loaded from 'server.properties' and
* 'zookeeper.properties' located at the root of the classpath.
*/
public EmbeddedKafka(boolean useRandomPort) {
this(loadPropertiesFromClasspath("/server.properties"), loadPropertiesFromClasspath("/zookeeper.properties"), useRandomPort);
}
/**
* Will create instance of the embedded Kafka server.
*
* @param kafkaConfig
* Kafka configuration properties
* @param zookeeperConfig
* Zookeeper configuration properties
*/
public EmbeddedKafka(Properties kafkaConfig, Properties zookeeperConfig, boolean useRandomPort) {
this.cleanupKafkaWorkDir();
this.zookeeperConfig = zookeeperConfig;
this.kafkaConfig = kafkaConfig;
if (useRandomPort) {
this.kafkaPort = this.availablePort();
this.zookeeperPort = this.availablePort();
this.kafkaConfig.setProperty("port", String.valueOf(this.kafkaPort));
this.kafkaConfig.setProperty("zookeeper.connect", "localhost:" + this.zookeeperPort);
this.zookeeperConfig.setProperty("clientPort", String.valueOf(this.zookeeperPort));
} else {
this.kafkaPort = Integer.parseInt(kafkaConfig.getProperty("port"));
this.zookeeperPort = Integer.parseInt(zookeeperConfig.getProperty("clientPort"));
}
this.zkServer = new ZooKeeperServer();
this.kafkaServer = new KafkaServerStartable(new KafkaConfig(kafkaConfig));
}
/**
*
* @return port for Kafka server
*/
public int getKafkaPort() {
if (!this.started) {
throw new IllegalStateException("Kafka server is not started. Kafka port can't be determined.");
}
return this.kafkaPort;
}
/**
*
* @return port for Zookeeper server
*/
public int getZookeeperPort() {
if (!this.started) {
throw new IllegalStateException("Kafka server is not started. Zookeeper port can't be determined.");
}
return this.zookeeperPort;
}
/**
* Will start embedded Kafka server. Its data directories will be created
* at 'kafka-tmp' directory relative to the working directory of the current
* runtime. The data directories will be deleted upon JVM exit.
*
*/
public void start() {
if (!this.started) {
logger.info("Starting Zookeeper server");
this.startZookeeper();
logger.info("Starting Kafka server");
this.kafkaServer.startup();
logger.info("Embedded Kafka is started at localhost:" + this.kafkaServer.serverConfig().port()
+ ". Zookeeper connection string: " + this.kafkaConfig.getProperty("zookeeper.connect"));
this.started = true;
}
}
/**
* Will stop embedded Kafka server, cleaning up all working directories.
*/
public void stop() {
if (this.started) {
logger.info("Shutting down Kafka server");
this.kafkaServer.shutdown();
this.kafkaServer.awaitShutdown();
logger.info("Shutting down Zookeeper server");
this.shutdownZookeeper();
logger.info("Embedded Kafka is shut down.");
this.cleanupKafkaWorkDir();
this.started = false;
}
}
/**
*
*/
private void cleanupKafkaWorkDir() {
File kafkaTmp = new File("target/kafka-tmp");
try {
FileUtils.deleteDirectory(kafkaTmp);
} catch (Exception e) {
logger.warn("Failed to delete " + kafkaTmp.getAbsolutePath());
}
}
/**
* Will start Zookeeper server via {@link ServerCnxnFactory}
*/
private void startZookeeper() {
QuorumPeerConfig quorumConfiguration = new QuorumPeerConfig();
try {
quorumConfiguration.parseProperties(this.zookeeperConfig);
ServerConfig configuration = new ServerConfig();
configuration.readFrom(quorumConfiguration);
FileTxnSnapLog txnLog = new FileTxnSnapLog(new File(configuration.getDataLogDir()), new File(configuration.getDataDir()));
zkServer.setTxnLogFactory(txnLog);
zkServer.setTickTime(configuration.getTickTime());
zkServer.setMinSessionTimeout(configuration.getMinSessionTimeout());
zkServer.setMaxSessionTimeout(configuration.getMaxSessionTimeout());
ServerCnxnFactory zookeeperConnectionFactory = ServerCnxnFactory.createFactory();
zookeeperConnectionFactory.configure(configuration.getClientPortAddress(),
configuration.getMaxClientCnxns());
zookeeperConnectionFactory.startup(zkServer);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} catch (Exception e) {
throw new IllegalStateException("Failed to start Zookeeper server", e);
}
}
/**
* Will shut down Zookeeper server.
*/
private void shutdownZookeeper() {
zkServer.shutdown();
}
/**
* Will load {@link Properties} from properties file discovered at the
* provided path relative to the root of the classpath.
*/
private static Properties loadPropertiesFromClasspath(String path) {
try {
Properties kafkaProperties = new Properties();
kafkaProperties.load(Class.class.getResourceAsStream(path));
return kafkaProperties;
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
/**
* Will determine the available port used by Kafka/Zookeeper servers.
*/
private int availablePort() {
ServerSocket s = null;
try {
s = new ServerSocket(0);
s.setReuseAddress(true);
return s.getLocalPort();
} catch (Exception e) {
throw new IllegalStateException("Failed to discover available port.", e);
} finally {
try {
s.close();
} catch (IOException e) {
// ignore
}
}
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.emulator;
import org.junit.Assert;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.stream.IntStream;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
public class Lineage {
private List<Node> nodes;
private List<Link> links;
public List<Node> getNodes() {
return nodes;
}
public void setNodes(List<Node> nodes) {
this.nodes = nodes;
}
public List<Link> getLinks() {
return links;
}
public void setLinks(List<Link> links) {
this.links = links;
}
private String toFullQname(String type, String _qname) {
return type.startsWith("nifi_") && _qname.matches("[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}")
&& !_qname.endsWith("-0000-000000000000")
? _qname + "-0000-000000000000@example" : _qname;
}
public Node findNode(String type, String _qname) {
final String qname = toFullQname(type, _qname);
return nodes.stream().filter(n -> type.equals(n.getType()) && qname.equals(n.getQualifiedName()))
.findFirst().orElseGet(() -> {
Assert.fail(String.format("Node was not found for %s::%s", type, qname));
return null;
});
}
public Node findNode(String type, String name, String qname) {
final Node node = findNode(type, qname);
assertEquals(name, node.getName());
return node;
}
public int getNodeIndex(String type, String _qname) {
final String qname = toFullQname(type, _qname);
for (int i = 0; i < nodes.size(); i++) {
Node n = nodes.get(i);
if (type.equals(n.getType()) && qname.equals(n.getQualifiedName())) {
return i;
}
}
return -1;
}
public int[] getFlowPathVariationIndices(String _qname) {
final String qname = toFullQname(TYPE_NIFI_FLOW_PATH, _qname);
return IntStream.range(0, nodes.size()).filter(i -> {
Node n = nodes.get(i);
return TYPE_NIFI_FLOW_PATH.equals(n.getType()) && n.getQualifiedName().startsWith(qname);
}).toArray();
}
public void assertLink(Node s, Node t) {
assertLink(s.getType(), s.getName(), s.getQualifiedName(), t.getType(), t.getName(), t.getQualifiedName());
}
public void assertLink(String sType, String sName, String sQname, String tType, String tName, String tQname) {
int si = getNodeIndex(sType, sQname);
assertTrue(String.format("Source node was not found for %s::%s", sType, sQname), si > -1);
int ti = getNodeIndex(tType, tQname);
assertTrue(String.format("Target node was not found for %s::%s", tType, tQname), ti > -1);
assertNotNull(findNode(sType, sName, sQname));
assertNotNull(findNode(tType, tName, tQname));
final Callable<Boolean> exactMatch = () -> links.stream().anyMatch(l -> l.getSource() == si && l.getTarget() == ti);
final Callable<Boolean> valiationMatch = () -> {
int[] sis = TYPE_NIFI_FLOW_PATH.equals(sType) ? getFlowPathVariationIndices(sQname) : new int[]{si};
int[] tis = TYPE_NIFI_FLOW_PATH.equals(tType) ? getFlowPathVariationIndices(tQname) : new int[]{ti};
return links.stream().anyMatch(
l -> Arrays.stream(sis).anyMatch(s -> l.getSource() == s)
&& Arrays.stream(tis).anyMatch(t -> l.getTarget() == t));
};
final String msg = String.format("Link from %s::%s to %s::%s was not found", sType, sQname, tType, tQname);
try {
assertTrue(msg, exactMatch.call() || valiationMatch.call());
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.emulator;
public class Link {
private int source;
private int target;
private double value;
public Link() {
}
public Link(int source, int target) {
this.source = source;
this.target = target;
}
public int getSource() {
return source;
}
public void setSource(int source) {
this.source = source;
}
public int getTarget() {
return target;
}
public void setTarget(int target) {
this.target = target;
}
public double getValue() {
return value;
}
public void setValue(double value) {
this.value = value;
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.emulator;
public class Node {
private String name;
private String type;
private String qualifiedName;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getQualifiedName() {
return qualifiedName;
}
public void setQualifiedName(String qualifiedName) {
this.qualifiedName = qualifiedName;
}
}

View File

@ -0,0 +1,52 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
# Apache Atlas Server Emulator
Since [Apache Atlas](http://atlas.apache.org/) uses multiple data storage such as Apache HBase, Apache Kafka and Apache Solr, it can be time consuming to setup a test environment, or resetting those storage to store other test data.
In order to execute Atlas client side testing and debugging more aggressively, this Atlas Server Emulator is created.
It can be run as a Java process from your IDE, and let Apache NiFI ReportLineageToAtlas reporting task to call Atlas V2 REST APIs and send notification messages. The emulator runs a Jetty REST API server emulator and an embedded Kafka broker.
![](architecture.svg)
The emulator keeps created Atlas entities on RAM. So you can reset its state by simply restarting the emulator process.
## How to start
Run org.apache.nifi.atlas.emulator.AtlasAPIV2ServerEmulator.
## How to debug reported entities
Configure ReportLineageToAtlas reporting task as follows:
- Atlas URLs: http://localhost:21000
- Create Atlas Configuration File: true
- Kafka Bootstrap Servers: localhost:9092
- Fill required properties and leave other configurations default
Once the reporting task runs, you can visit http://localhost:21000/graph.html to see a lineage graph looks like this:
![](graph-example.png)
## ITReportLineageToAtlas
ITReportLineageToAtlas is a automated test class using multiple NiFi flow templates and Atlas Server Emulator to test the entire NiFiAtlasFlowLineage reporting task behavior including registered NiFiProvenanceEventAnalyzer implementations.
This test class is not executed as a normal unit test class because it is takes longer. However, it's recommended to run this class when the reporting task code is modified to confirm existing situations are still handled as expected.
You can see NiFi flows used at the test class by importing NiFi flow template files in test/resources/flow-templates to your NiFi instance.
Similarly, you can create new test methods by creating a NiFi flow template and load it from the method. NiFi provenance events can be generated programmatically. After ReportLineageToAtlas reporting task runs, test methods can verify the created Atlas entities and lineages.

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 76 KiB

View File

@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.junit.Test;
import org.mockito.Mockito;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.mockito.Matchers.matches;
import static org.mockito.Mockito.when;
public class TestHBaseTable {
@Test
public void testHBaseTable() {
final String processorName = "FetchHBaseRow";
final String transitUri = "hbase://0.example.com/tableA/rowB";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.FETCH);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(1, refs.getInputs().size());
assertEquals(0, refs.getOutputs().size());
Referenceable ref = refs.getInputs().iterator().next();
assertEquals("hbase_table", ref.getTypeName());
assertEquals("tableA", ref.get(ATTR_NAME));
assertEquals("tableA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
@Test
public void testHBaseTableWithMultipleZkHosts() {
final String processorName = "FetchHBaseRow";
final String transitUri = "hbase://zk0.example.com,zk2.example.com,zk3.example.com/tableA/rowB";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.FETCH);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(
matches("zk0.example.com"),
matches("zk2.example.com"),
matches("zk3.example.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(1, refs.getInputs().size());
assertEquals(0, refs.getOutputs().size());
Referenceable ref = refs.getInputs().iterator().next();
assertEquals("hbase_table", ref.getTypeName());
assertEquals("tableA", ref.get(ATTR_NAME));
assertEquals("tableA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.junit.Test;
import org.mockito.Mockito;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.mockito.Matchers.matches;
import static org.mockito.Mockito.when;
public class TestHDFSPath {
@Test
public void testHDFSPath() {
final String processorName = "PutHDFS";
// TODO: what if with HA namenode?
final String transitUri = "hdfs://0.example.com:8020/user/nifi/fileA";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(0, refs.getInputs().size());
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals("hdfs_path", ref.getTypeName());
assertEquals("/user/nifi/fileA", ref.get(ATTR_NAME));
assertEquals("/user/nifi/fileA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
}

View File

@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.junit.Test;
import org.mockito.Mockito;
import java.util.HashMap;
import java.util.Map;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.ATTR_INPUT_TABLES;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.ATTR_OUTPUT_TABLES;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.matches;
import static org.mockito.Mockito.when;
public class TestHive2JDBC {
/**
* If a provenance event does not have table name attributes,
* then a database lineage should be created.
*/
@Test
public void testDatabaseLineage() {
final String processorName = "PutHiveQL";
final String transitUri = "jdbc:hive2://0.example.com:10000/databaseA";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(0, refs.getInputs().size());
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals("hive_db", ref.getTypeName());
assertEquals("databaseA", ref.get(ATTR_NAME));
assertEquals("databaseA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
/**
* If a provenance event has table name attributes,
* then table lineages can be created.
*/
@Test
public void testTableLineage() {
final String processorName = "PutHiveQL";
final String transitUri = "jdbc:hive2://0.example.com:10000/databaseA";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
// E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(2, refs.getInputs().size());
// QualifiedName : Name
final Map<String, String> expectedInputRefs = new HashMap<>();
expectedInputRefs.put("databaseA.tableA1@cluster1", "tableA1");
expectedInputRefs.put("databaseA.tableA2@cluster1", "tableA2");
for (Referenceable ref : refs.getInputs()) {
final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
assertTrue(expectedInputRefs.containsKey(qName));
assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
}
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals("hive_table", ref.getTypeName());
assertEquals("tableB1", ref.get(ATTR_NAME));
assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
/**
* If a provenance event has table name attributes, then table lineages can be created.
* In this case, if its transit URI does not contain database name, use 'default'.
*/
@Test
public void testTableLineageWithDefaultTableName() {
final String processorName = "PutHiveQL";
final String transitUri = "jdbc:hive2://0.example.com:10000";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
// E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(2, refs.getInputs().size());
// QualifiedName : Name
final Map<String, String> expectedInputRefs = new HashMap<>();
expectedInputRefs.put("default.tableA1@cluster1", "tableA1");
expectedInputRefs.put("default.tableA2@cluster1", "tableA2");
for (Referenceable ref : refs.getInputs()) {
final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
assertTrue(expectedInputRefs.containsKey(qName));
assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
}
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals("hive_table", ref.getTypeName());
assertEquals("tableB1", ref.get(ATTR_NAME));
assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
}

View File

@ -0,0 +1,149 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.junit.Test;
import org.mockito.Mockito;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.mockito.Matchers.matches;
import static org.mockito.Mockito.when;
public class TestKafkaTopic {
@Test
public void testPublishKafka() {
final String processorName = "PublishKafka";
final String transitUri = "PLAINTEXT://0.example.com:6667/topicA";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(0, refs.getInputs().size());
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals("topicA", ref.get(ATTR_NAME));
assertEquals("topicA", ref.get("topic"));
assertEquals("topicA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
@Test
public void testPublishKafkaMultipleBrokers() {
final String processorName = "PublishKafka";
final String transitUri = "PLAINTEXT://0.example.com:6667,1.example.com:6667/topicA";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(0, refs.getInputs().size());
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals("topicA", ref.get(ATTR_NAME));
assertEquals("topicA", ref.get("topic"));
assertEquals("topicA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
@Test
public void testConsumeKafka() {
final String processorName = "ConsumeKafka";
final String transitUri = "PLAINTEXT://0.example.com:6667/topicA";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.RECEIVE);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(1, refs.getInputs().size());
assertEquals(0, refs.getOutputs().size());
Referenceable ref = refs.getInputs().iterator().next();
assertEquals("kafka_topic", ref.getTypeName());
assertEquals("topicA", ref.get(ATTR_NAME));
assertEquals("topicA", ref.get("topic"));
assertEquals("topicA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
@Test
public void testConsumeKafkaRecord_0_10() {
final String processorName = "ConsumeKafkaRecord_0_10";
final String transitUri = "PLAINTEXT://0.example.com:6667/topicA";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.RECEIVE);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(1, refs.getInputs().size());
assertEquals(0, refs.getOutputs().size());
Referenceable ref = refs.getInputs().iterator().next();
assertEquals("kafka_topic", ref.getTypeName());
assertEquals("topicA", ref.get(ATTR_NAME));
assertEquals("topicA", ref.get("topic"));
assertEquals("topicA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
}

View File

@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.atlas.reporting.ITReportLineageToAtlas;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.junit.Test;
import org.mockito.Mockito;
import java.util.ArrayList;
import java.util.List;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT;
import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.matches;
import static org.mockito.Mockito.when;
/**
* Tests for RemotePorts.
* More complex and detailed tests are available at {@link ITReportLineageToAtlas}.
*/
public class TestNiFiRemotePort {
@Test
public void testRemoteInputPort() {
final String componentType = "Remote Input Port";
final String transitUri = "http://0.example.com:8080/nifi-api/data-transfer/input-ports/port-guid/transactions/tx-guid/flow-files";
final ProvenanceEventRecord sendEvent = Mockito.mock(ProvenanceEventRecord.class);
when(sendEvent.getEventId()).thenReturn(123L);
when(sendEvent.getComponentId()).thenReturn("port-guid");
when(sendEvent.getComponentType()).thenReturn(componentType);
when(sendEvent.getTransitUri()).thenReturn(transitUri);
when(sendEvent.getEventType()).thenReturn(ProvenanceEventType.SEND);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final List<ConnectionStatus> connections = new ArrayList<>();
final ConnectionStatus connection = new ConnectionStatus();
connection.setDestinationId("port-guid");
connection.setDestinationName("inputPortA");
connections.add(connection);
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
when(context.findConnectionTo(matches("port-guid"))).thenReturn(connections);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(componentType, transitUri, sendEvent.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, sendEvent);
assertEquals(0, refs.getInputs().size());
assertEquals(1, refs.getOutputs().size());
assertEquals(1, refs.getComponentIds().size());
// Should report connected componentId.
assertTrue(refs.getComponentIds().contains("port-guid"));
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals(TYPE_NIFI_INPUT_PORT, ref.getTypeName());
assertEquals("inputPortA", ref.get(ATTR_NAME));
assertEquals("port-guid@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
@Test
public void testRemoteOutputPort() {
final String componentType = "Remote Output Port";
final String transitUri = "http://0.example.com:8080/nifi-api/data-transfer/output-ports/port-guid/transactions/tx-guid/flow-files";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentId()).thenReturn("port-guid");
when(record.getComponentType()).thenReturn(componentType);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.RECEIVE);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final List<ConnectionStatus> connections = new ArrayList<>();
final ConnectionStatus connection = new ConnectionStatus();
connection.setSourceId("port-guid");
connection.setSourceName("outputPortA");
connections.add(connection);
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
when(context.findConnectionFrom(matches("port-guid"))).thenReturn(connections);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(componentType, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(1, refs.getInputs().size());
assertEquals(0, refs.getOutputs().size());
Referenceable ref = refs.getInputs().iterator().next();
assertEquals(TYPE_NIFI_OUTPUT_PORT, ref.getTypeName());
assertEquals("outputPortA", ref.get(ATTR_NAME));
assertEquals("port-guid@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.junit.Test;
import org.mockito.Mockito;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.apache.nifi.atlas.provenance.analyzer.DatabaseAnalyzerUtil.ATTR_OUTPUT_TABLES;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.mockito.Matchers.matches;
import static org.mockito.Mockito.when;
public class TestPutHiveStreaming {
@Test
public void testTableLineage() {
final String processorName = "PutHiveStreaming";
final String transitUri = "thrift://0.example.com:9083";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getTransitUri()).thenReturn(transitUri);
when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseA.tableA");
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(0, refs.getInputs().size());
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
assertEquals("hive_table", ref.getTypeName());
assertEquals("tableA", ref.get(ATTR_NAME));
assertEquals("databaseA.tableA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
}

View File

@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.provenance.analyzer;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.nifi.atlas.provenance.AnalysisContext;
import org.apache.nifi.atlas.provenance.DataSetRefs;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer;
import org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzerFactory;
import org.apache.nifi.atlas.resolver.ClusterResolvers;
import org.apache.nifi.controller.status.ConnectionStatus;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import org.junit.Test;
import org.mockito.Mockito;
import java.util.ArrayList;
import java.util.List;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_NAME;
import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.mockito.Matchers.matches;
import static org.mockito.Mockito.when;
public class TestUnknownDataSet {
@Test
public void testGenerateFlowFile() {
final String processorName = "GenerateFlowFile";
final String processorId = "processor-1234";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getComponentId()).thenReturn(processorId);
when(record.getEventType()).thenReturn(ProvenanceEventType.CREATE);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final List<ConnectionStatus> connections = new ArrayList<>();
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
when(context.findConnectionTo(processorId)).thenReturn(connections);
when(context.getNiFiClusterName()).thenReturn("nifi-cluster");
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, null, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertEquals(1, refs.getInputs().size());
assertEquals(0, refs.getOutputs().size());
Referenceable ref = refs.getInputs().iterator().next();
assertEquals("nifi_data", ref.getTypeName());
assertEquals("GenerateFlowFile", ref.get(ATTR_NAME));
assertEquals("processor-1234@nifi-cluster", ref.get(ATTR_QUALIFIED_NAME));
}
@Test
public void testSomethingHavingIncomingConnection() {
final String processorName = "SomeProcessor";
final String processorId = "processor-1234";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
when(record.getComponentType()).thenReturn(processorName);
when(record.getComponentId()).thenReturn(processorId);
when(record.getEventType()).thenReturn(ProvenanceEventType.CREATE);
final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
final List<ConnectionStatus> connections = new ArrayList<>();
// The content of connection is not important, just create an empty status.
connections.add(new ConnectionStatus());
final AnalysisContext context = Mockito.mock(AnalysisContext.class);
when(context.getClusterResolver()).thenReturn(clusterResolvers);
when(context.findConnectionTo(processorId)).thenReturn(connections);
final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, null, record.getEventType());
assertNotNull(analyzer);
final DataSetRefs refs = analyzer.analyze(context, record);
assertNull("If the processor has incoming connections, no refs should be created", refs);
}
}

View File

@ -0,0 +1,209 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.reporting;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.ProvenanceEventType;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class SimpleProvenanceRecord implements ProvenanceEventRecord {
private long eventId;
private String componentId;
private String componentType;
private String transitUri;
private String flowFileUUID;
private ProvenanceEventType eventType;
private Map<String, String> attributes = new HashMap<>();
public static SimpleProvenanceRecord pr(String componentId, String componentType, ProvenanceEventType eventType) {
return pr(componentId, componentType, eventType, null, null);
}
public static SimpleProvenanceRecord pr(String componentId, String componentType, ProvenanceEventType eventType, String transitUri) {
return pr(componentId, componentType, eventType, transitUri, null);
}
public static SimpleProvenanceRecord pr(String componentId, String componentType, ProvenanceEventType eventType, String transitUri, String flowFileUUID) {
final SimpleProvenanceRecord pr = new SimpleProvenanceRecord();
pr.componentId = componentId.length() == 18 ? componentId + "-0000-000000000000" : componentId;
pr.componentType = componentType;
pr.transitUri = transitUri;
pr.eventType = eventType;
pr.flowFileUUID = flowFileUUID;
return pr;
}
public void setEventId(long eventId) {
this.eventId = eventId;
}
@Override
public String getComponentId() {
return componentId;
}
@Override
public String getComponentType() {
return componentType;
}
@Override
public String getTransitUri() {
return transitUri;
}
@Override
public ProvenanceEventType getEventType() {
return eventType;
}
@Override
public Map<String, String> getAttributes() {
return attributes;
}
@Override
public long getEventId() {
return eventId;
}
@Override
public long getEventTime() {
return 0;
}
@Override
public long getFlowFileEntryDate() {
return 0;
}
@Override
public long getLineageStartDate() {
return 0;
}
@Override
public long getFileSize() {
return 0;
}
@Override
public Long getPreviousFileSize() {
return null;
}
@Override
public long getEventDuration() {
return 0;
}
@Override
public Map<String, String> getPreviousAttributes() {
return null;
}
@Override
public Map<String, String> getUpdatedAttributes() {
return null;
}
@Override
public String getSourceSystemFlowFileIdentifier() {
return null;
}
@Override
public String getFlowFileUuid() {
return null;
}
@Override
public List<String> getParentUuids() {
return null;
}
@Override
public List<String> getChildUuids() {
return null;
}
@Override
public String getAlternateIdentifierUri() {
return null;
}
@Override
public String getDetails() {
return null;
}
@Override
public String getRelationship() {
return null;
}
@Override
public String getSourceQueueIdentifier() {
return null;
}
@Override
public String getContentClaimSection() {
return null;
}
@Override
public String getPreviousContentClaimSection() {
return null;
}
@Override
public String getContentClaimContainer() {
return null;
}
@Override
public String getPreviousContentClaimContainer() {
return null;
}
@Override
public String getContentClaimIdentifier() {
return null;
}
@Override
public String getPreviousContentClaimIdentifier() {
return null;
}
@Override
public Long getContentClaimOffset() {
return null;
}
@Override
public Long getPreviousContentClaimOffset() {
return null;
}
@Override
public String getBestEventIdentifier() {
return null;
}
}

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.reporting;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.util.MockProcessContext;
import org.apache.nifi.util.MockValidationContext;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.ATLAS_NIFI_URL;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.ATLAS_PASSWORD;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.ATLAS_URLS;
import static org.apache.nifi.atlas.reporting.ReportLineageToAtlas.ATLAS_USER;
import static org.junit.Assert.assertTrue;
public class TestReportLineageToAtlas {
private final Logger logger = LoggerFactory.getLogger(TestReportLineageToAtlas.class);
@Test
public void validateAtlasUrls() throws Exception {
final ReportLineageToAtlas reportingTask = new ReportLineageToAtlas();
final MockProcessContext processContext = new MockProcessContext(reportingTask);
final MockValidationContext validationContext = new MockValidationContext(processContext);
processContext.setProperty(ATLAS_NIFI_URL, "http://nifi.example.com:8080/nifi");
processContext.setProperty(ATLAS_USER, "admin");
processContext.setProperty(ATLAS_PASSWORD, "admin");
BiConsumer<Collection<ValidationResult>, Consumer<ValidationResult>> assertResults = (rs, a) -> {
assertTrue(rs.iterator().hasNext());
for (ValidationResult r : rs) {
logger.info("{}", r);
final String subject = r.getSubject();
if (ATLAS_URLS.getDisplayName().equals(subject)) {
a.accept(r);
}
}
};
// Default setting.
assertResults.accept(reportingTask.validate(validationContext),
r -> assertTrue("Atlas URLs is required", !r.isValid()));
// Invalid URL.
processContext.setProperty(ATLAS_URLS, "invalid");
assertResults.accept(reportingTask.validate(validationContext),
r -> assertTrue("Atlas URLs is invalid", !r.isValid()));
// Valid URL
processContext.setProperty(ATLAS_URLS, "http://atlas.example.com:21000");
assertTrue(processContext.isValid());
// Valid URL with Expression
processContext.setProperty(ATLAS_URLS, "http://atlas.example.com:${literal(21000)}");
assertTrue(processContext.isValid());
// Valid URLs
processContext.setProperty(ATLAS_URLS, "http://atlas1.example.com:21000, http://atlas2.example.com:21000");
assertTrue(processContext.isValid());
// Invalid and Valid URLs
processContext.setProperty(ATLAS_URLS, "invalid, http://atlas2.example.com:21000");
assertResults.accept(reportingTask.validate(validationContext),
r -> assertTrue("Atlas URLs is invalid", !r.isValid()));
}
}

View File

@ -0,0 +1,158 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.atlas.resolver;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.context.PropertyContext;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.Mockito;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import static org.mockito.Mockito.when;
public class TestRegexClusterResolver {
private PropertyContext context;
private ValidationContext validationContext;
public void setupMock(Map<String, String> properties) {
context = Mockito.mock(PropertyContext.class);
validationContext = Mockito.mock(ValidationContext.class);
when(validationContext.getAllProperties()).thenReturn(properties);
when(context.getAllProperties()).thenReturn(properties);
}
@Test
public void testEmptySettings() {
setupMock(Collections.EMPTY_MAP);
final RegexClusterResolver resolver = new RegexClusterResolver();
// It should be valid
final Collection<ValidationResult> validationResults = resolver.validate(validationContext);
Assert.assertEquals(0, validationResults.size());
resolver.configure(context);
Assert.assertNull(resolver.fromHostNames("example.com"));
}
@Test
public void testInvalidClusterName() {
final Map<String, String> properties = new HashMap<>();
properties.put(RegexClusterResolver.PATTERN_PROPERTY_PREFIX, ".*\\.example.com");
setupMock(properties);
final RegexClusterResolver resolver = new RegexClusterResolver();
final Collection<ValidationResult> validationResults = resolver.validate(validationContext);
Assert.assertEquals(1, validationResults.size());
final ValidationResult validationResult = validationResults.iterator().next();
Assert.assertEquals(RegexClusterResolver.PATTERN_PROPERTY_PREFIX, validationResult.getSubject());
try {
resolver.configure(context);
Assert.fail("Configure method should fail, too");
} catch (IllegalArgumentException e) {
}
}
@Test
public void testEmptyPattern() {
final Map<String, String> properties = new HashMap<>();
final String propertyName = RegexClusterResolver.PATTERN_PROPERTY_PREFIX + "Cluster1";
properties.put(propertyName, "");
setupMock(properties);
final RegexClusterResolver resolver = new RegexClusterResolver();
final Collection<ValidationResult> validationResults = resolver.validate(validationContext);
Assert.assertEquals(1, validationResults.size());
final ValidationResult validationResult = validationResults.iterator().next();
Assert.assertEquals(propertyName, validationResult.getSubject());
try {
resolver.configure(context);
Assert.fail("Configure method should fail, too");
} catch (IllegalArgumentException e) {
}
}
@Test
public void testSinglePattern() {
final Map<String, String> properties = new HashMap<>();
final String propertyName = RegexClusterResolver.PATTERN_PROPERTY_PREFIX + "Cluster1";
properties.put(propertyName, "^.*\\.example.com$");
setupMock(properties);
final RegexClusterResolver resolver = new RegexClusterResolver();
final Collection<ValidationResult> validationResults = resolver.validate(validationContext);
Assert.assertEquals(0, validationResults.size());
resolver.configure(context);
Assert.assertEquals("Cluster1", resolver.fromHostNames("host1.example.com"));
}
@Test
public void testMultiplePatterns() {
final Map<String, String> properties = new HashMap<>();
final String propertyName = RegexClusterResolver.PATTERN_PROPERTY_PREFIX + "Cluster1";
// Hostname or local ip address, delimited with a whitespace
properties.put(propertyName, "^.*\\.example.com$\n^192.168.1.[\\d]+$");
setupMock(properties);
final RegexClusterResolver resolver = new RegexClusterResolver();
final Collection<ValidationResult> validationResults = resolver.validate(validationContext);
Assert.assertEquals(0, validationResults.size());
resolver.configure(context);
Assert.assertEquals("Cluster1", resolver.fromHostNames("host1.example.com"));
Assert.assertEquals("Cluster1", resolver.fromHostNames("192.168.1.10"));
Assert.assertEquals("Cluster1", resolver.fromHostNames("192.168.1.22"));
Assert.assertNull(resolver.fromHostNames("192.168.2.30"));
}
@Test
public void testMultipleClusters() {
final Map<String, String> properties = new HashMap<>();
final String c1PropertyName = RegexClusterResolver.PATTERN_PROPERTY_PREFIX + "Cluster1";
final String c2PropertyName = RegexClusterResolver.PATTERN_PROPERTY_PREFIX + "Cluster2";
// Hostname or local ip address
properties.put(c1PropertyName, "^.*\\.c1\\.example.com$ ^192.168.1.[\\d]+$");
properties.put(c2PropertyName, "^.*\\.c2\\.example.com$ ^192.168.2.[\\d]+$");
setupMock(properties);
final RegexClusterResolver resolver = new RegexClusterResolver();
final Collection<ValidationResult> validationResults = resolver.validate(validationContext);
Assert.assertEquals(0, validationResults.size());
resolver.configure(context);
Assert.assertEquals("Cluster1", resolver.fromHostNames("host1.c1.example.com"));
Assert.assertEquals("Cluster1", resolver.fromHostNames("192.168.1.10"));
Assert.assertEquals("Cluster1", resolver.fromHostNames("192.168.1.22"));
Assert.assertEquals("Cluster2", resolver.fromHostNames("host2.c2.example.com"));
Assert.assertEquals("Cluster2", resolver.fromHostNames("192.168.2.10"));
Assert.assertEquals("Cluster2", resolver.fromHostNames("192.168.2.22"));
Assert.assertNull(resolver.fromHostNames("192.168.3.30"));
}
}

View File

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
atlas.cluster.name=AtlasCluster
# atlas.kafka.bootstrap.servers=atlas.example.com:6667
atlas.kafka.bootstrap.servers=localhost:9092

View File

@ -0,0 +1,781 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>d744e793-015f-1000-2e5c-32baa63b68a5</groupId>
<name>MultiInAndOuts</name>
<snippet>
<connections>
<id>891b4148-7d48-3971-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>3250aeb6-4026-3969-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>5609cb4f-8a95-3b7a-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>8be920c7-dd0f-316b-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>830.1815185546875</x>
<y>617.63232421875</y>
</bends>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>64769759-fd7d-314a-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>3250aeb6-4026-3969-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>f9ab76f0-a990-3e60-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>5609cb4f-8a95-3b7a-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>a4bfe4ec-570b-3126-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>fb2277b0-1f16-3330-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>853.1815185546875</x>
<y>831.63232421875</y>
</bends>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>457c7d76-e26e-387d-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>64769759-fd7d-314a-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>3e6c4539-8498-3883-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>531.1815185546875</x>
<y>192.63232421875</y>
</bends>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>970b343c-a719-35b4-0000-000000000000</id>
<type>FUNNEL</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>a4bfe4ec-570b-3126-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>45225bda-9d53-3b9b-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>3250aeb6-4026-3969-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>6f88b3d9-5723-356a-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>6cb08137-3eef-39dc-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>6f88b3d9-5723-356a-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>970b343c-a719-35b4-0000-000000000000</id>
<type>FUNNEL</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>7c4f4692-9315-388c-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>823.1815185546875</x>
<y>196.63232421875</y>
</bends>
<destination>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>970b343c-a719-35b4-0000-000000000000</id>
<type>FUNNEL</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>0fbd5a15-7fc2-3e41-0000-000000000000</groupId>
<id>894218d5-dfe9-3ee5-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<funnels>
<id>970b343c-a719-35b4-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>646.1815185546875</x>
<y>170.63232421875</y>
</position>
</funnels>
<processors>
<id>894218d5-dfe9-3ee5-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>645.20361328125</x>
<y>10.17828369140625</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>test</value>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Gen2</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>a4bfe4ec-570b-3126-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>test</value>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Gen1</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>3250aeb6-4026-3969-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>266.1815185546875</x>
<y>454.63232421875</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UA3</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>457c7d76-e26e-387d-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>266.1815185546875</x>
<y>751.63232421875</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>
<name>attributes-to-log-regex</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
<value>
<name>attributes-to-ignore-regex</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>false</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>.*</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>5609cb4f-8a95-3b7a-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>4.1815185546875</x>
<y>253.13232421875</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UA1</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>64769759-fd7d-314a-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>266.1815185546875</x>
<y>601.63232421875</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UA4</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>6f88b3d9-5723-356a-0000-000000000000</id>
<parentGroupId>0fbd5a15-7fc2-3e41-0000-000000000000</parentGroupId>
<position>
<x>548.1815185546875</x>
<y>253.13232421875</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UA2</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
</snippet>
<timestamp>11/20/2017 14:27:14 JST</timestamp>
</template>

View File

@ -0,0 +1,678 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>MultipleProcessGroups</name>
<snippet>
<processGroups>
<id>ce6cfba6-8e06-3976-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>2b39c0a2-8583-354d-0000-000000000000</id>
<parentGroupId>ce6cfba6-8e06-3976-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>80f8d788-9133-3f04-0000-000000000000</groupId>
<id>4cb7f9be-51ae-3330-0000-000000000000</id>
<type>INPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>b30ce5da-ddf4-33d3-0000-000000000000</groupId>
<id>3285c37a-3476-32e4-0000-000000000000</id>
<type>OUTPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>74d9fcda-11af-3a89-0000-000000000000</id>
<parentGroupId>ce6cfba6-8e06-3976-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>b30ce5da-ddf4-33d3-0000-000000000000</groupId>
<id>e1f4615f-5707-31ba-0000-000000000000</id>
<type>INPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>c0442ed1-7d55-3325-0000-000000000000</groupId>
<id>093e778e-7022-3bf1-0000-000000000000</id>
<type>OUTPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<processGroups>
<id>80f8d788-9133-3f04-0000-000000000000</id>
<parentGroupId>ce6cfba6-8e06-3976-0000-000000000000</parentGroupId>
<position>
<x>885.6999633789063</x>
<y>340.49998168945314</y>
</position>
<comments></comments>
<contents>
<connections>
<id>42775c61-b785-3182-0000-000000000000</id>
<parentGroupId>80f8d788-9133-3f04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>80f8d788-9133-3f04-0000-000000000000</groupId>
<id>767c7bd6-75e3-3f32-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>80f8d788-9133-3f04-0000-000000000000</groupId>
<id>4cb7f9be-51ae-3330-0000-000000000000</id>
<type>INPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<inputPorts>
<id>4cb7f9be-51ae-3330-0000-000000000000</id>
<parentGroupId>80f8d788-9133-3f04-0000-000000000000</parentGroupId>
<position>
<x>422.0000061035157</x>
<y>153.0</y>
</position>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<name>persist</name>
<state>STOPPED</state>
<type>INPUT_PORT</type>
</inputPorts>
<processors>
<id>767c7bd6-75e3-3f32-0000-000000000000</id>
<parentGroupId>80f8d788-9133-3f04-0000-000000000000</parentGroupId>
<position>
<x>370.0000061035157</x>
<y>382.0</y>
</position>
<bundle>
<artifact>nifi-hadoop-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Hadoop Configuration Resources</key>
<value>
<name>Hadoop Configuration Resources</name>
</value>
</entry>
<entry>
<key>Kerberos Principal</key>
<value>
<name>Kerberos Principal</name>
</value>
</entry>
<entry>
<key>Kerberos Keytab</key>
<value>
<name>Kerberos Keytab</name>
</value>
</entry>
<entry>
<key>Kerberos Relogin Period</key>
<value>
<name>Kerberos Relogin Period</name>
</value>
</entry>
<entry>
<key>Additional Classpath Resources</key>
<value>
<name>Additional Classpath Resources</name>
</value>
</entry>
<entry>
<key>Directory</key>
<value>
<name>Directory</name>
</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>
<name>Conflict Resolution Strategy</name>
</value>
</entry>
<entry>
<key>Block Size</key>
<value>
<name>Block Size</name>
</value>
</entry>
<entry>
<key>IO Buffer Size</key>
<value>
<name>IO Buffer Size</name>
</value>
</entry>
<entry>
<key>Replication</key>
<value>
<name>Replication</name>
</value>
</entry>
<entry>
<key>Permissions umask</key>
<value>
<name>Permissions umask</name>
</value>
</entry>
<entry>
<key>Remote Owner</key>
<value>
<name>Remote Owner</name>
</value>
</entry>
<entry>
<key>Remote Group</key>
<value>
<name>Remote Group</name>
</value>
</entry>
<entry>
<key>Compression codec</key>
<value>
<name>Compression codec</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Hadoop Configuration Resources</key>
</entry>
<entry>
<key>Kerberos Principal</key>
</entry>
<entry>
<key>Kerberos Keytab</key>
</entry>
<entry>
<key>Kerberos Relogin Period</key>
<value>4 hours</value>
</entry>
<entry>
<key>Additional Classpath Resources</key>
</entry>
<entry>
<key>Directory</key>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>fail</value>
</entry>
<entry>
<key>Block Size</key>
</entry>
<entry>
<key>IO Buffer Size</key>
</entry>
<entry>
<key>Replication</key>
</entry>
<entry>
<key>Permissions umask</key>
</entry>
<entry>
<key>Remote Owner</key>
</entry>
<entry>
<key>Remote Group</key>
</entry>
<entry>
<key>Compression codec</key>
<value>NONE</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>PutHDFS</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.hadoop.PutHDFS</type>
</processors>
</contents>
<name>Persist</name>
</processGroups>
<processGroups>
<id>b30ce5da-ddf4-33d3-0000-000000000000</id>
<parentGroupId>ce6cfba6-8e06-3976-0000-000000000000</parentGroupId>
<position>
<x>182.50003662109384</x>
<y>309.3000183105469</y>
</position>
<comments></comments>
<contents>
<connections>
<id>3daef46d-660d-3c4e-0000-000000000000</id>
<parentGroupId>b30ce5da-ddf4-33d3-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>b30ce5da-ddf4-33d3-0000-000000000000</groupId>
<id>3285c37a-3476-32e4-0000-000000000000</id>
<type>OUTPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>b30ce5da-ddf4-33d3-0000-000000000000</groupId>
<id>4f460ded-82b9-3775-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>4b147b10-c04d-3bb6-0000-000000000000</id>
<parentGroupId>b30ce5da-ddf4-33d3-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>b30ce5da-ddf4-33d3-0000-000000000000</groupId>
<id>4f460ded-82b9-3775-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>b30ce5da-ddf4-33d3-0000-000000000000</groupId>
<id>e1f4615f-5707-31ba-0000-000000000000</id>
<type>INPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<inputPorts>
<id>e1f4615f-5707-31ba-0000-000000000000</id>
<parentGroupId>b30ce5da-ddf4-33d3-0000-000000000000</parentGroupId>
<position>
<x>409.0000061035157</x>
<y>33.0</y>
</position>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<name>enrich</name>
<state>STOPPED</state>
<type>INPUT_PORT</type>
</inputPorts>
<outputPorts>
<id>3285c37a-3476-32e4-0000-000000000000</id>
<parentGroupId>b30ce5da-ddf4-33d3-0000-000000000000</parentGroupId>
<position>
<x>405.0000061035157</x>
<y>470.0</y>
</position>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<name>enriched</name>
<state>STOPPED</state>
<type>OUTPUT_PORT</type>
</outputPorts>
<processors>
<id>4f460ded-82b9-3775-0000-000000000000</id>
<parentGroupId>b30ce5da-ddf4-33d3-0000-000000000000</parentGroupId>
<position>
<x>364.0000061035157</x>
<y>198.0</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
</contents>
<name>Enrich</name>
</processGroups>
<processGroups>
<id>c0442ed1-7d55-3325-0000-000000000000</id>
<parentGroupId>ce6cfba6-8e06-3976-0000-000000000000</parentGroupId>
<position>
<x>402.0</x>
<y>3.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>11b451d9-699d-3eca-0000-000000000000</id>
<parentGroupId>c0442ed1-7d55-3325-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>c0442ed1-7d55-3325-0000-000000000000</groupId>
<id>093e778e-7022-3bf1-0000-000000000000</id>
<type>OUTPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>c0442ed1-7d55-3325-0000-000000000000</groupId>
<id>989dabb7-54b9-3c78-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<outputPorts>
<id>093e778e-7022-3bf1-0000-000000000000</id>
<parentGroupId>c0442ed1-7d55-3325-0000-000000000000</parentGroupId>
<position>
<x>528.0000061035157</x>
<y>360.0</y>
</position>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<name>raw-input</name>
<state>STOPPED</state>
<type>OUTPUT_PORT</type>
</outputPorts>
<processors>
<id>989dabb7-54b9-3c78-0000-000000000000</id>
<parentGroupId>c0442ed1-7d55-3325-0000-000000000000</parentGroupId>
<position>
<x>470.0000061035157</x>
<y>115.0</y>
</position>
<bundle>
<artifact>nifi-kafka-0-11-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>bootstrap.servers</key>
<value>
<name>bootstrap.servers</name>
</value>
</entry>
<entry>
<key>security.protocol</key>
<value>
<name>security.protocol</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.service.name</key>
<value>
<name>sasl.kerberos.service.name</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.principal</key>
<value>
<name>sasl.kerberos.principal</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.keytab</key>
<value>
<name>sasl.kerberos.keytab</name>
</value>
</entry>
<entry>
<key>ssl.context.service</key>
<value>
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService>
<name>ssl.context.service</name>
</value>
</entry>
<entry>
<key>topic</key>
<value>
<name>topic</name>
</value>
</entry>
<entry>
<key>topic_type</key>
<value>
<name>topic_type</name>
</value>
</entry>
<entry>
<key>honor-transactions</key>
<value>
<name>honor-transactions</name>
</value>
</entry>
<entry>
<key>group.id</key>
<value>
<name>group.id</name>
</value>
</entry>
<entry>
<key>auto.offset.reset</key>
<value>
<name>auto.offset.reset</name>
</value>
</entry>
<entry>
<key>key-attribute-encoding</key>
<value>
<name>key-attribute-encoding</name>
</value>
</entry>
<entry>
<key>message-demarcator</key>
<value>
<name>message-demarcator</name>
</value>
</entry>
<entry>
<key>message-header-encoding</key>
<value>
<name>message-header-encoding</name>
</value>
</entry>
<entry>
<key>header-name-regex</key>
<value>
<name>header-name-regex</name>
</value>
</entry>
<entry>
<key>max.poll.records</key>
<value>
<name>max.poll.records</name>
</value>
</entry>
<entry>
<key>max-uncommit-offset-wait</key>
<value>
<name>max-uncommit-offset-wait</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>bootstrap.servers</key>
<value>localhost:9092</value>
</entry>
<entry>
<key>security.protocol</key>
<value>PLAINTEXT</value>
</entry>
<entry>
<key>sasl.kerberos.service.name</key>
</entry>
<entry>
<key>sasl.kerberos.principal</key>
</entry>
<entry>
<key>sasl.kerberos.keytab</key>
</entry>
<entry>
<key>ssl.context.service</key>
</entry>
<entry>
<key>topic</key>
</entry>
<entry>
<key>topic_type</key>
<value>names</value>
</entry>
<entry>
<key>honor-transactions</key>
<value>true</value>
</entry>
<entry>
<key>group.id</key>
</entry>
<entry>
<key>auto.offset.reset</key>
<value>latest</value>
</entry>
<entry>
<key>key-attribute-encoding</key>
<value>utf-8</value>
</entry>
<entry>
<key>message-demarcator</key>
</entry>
<entry>
<key>message-header-encoding</key>
<value>UTF-8</value>
</entry>
<entry>
<key>header-name-regex</key>
</entry>
<entry>
<key>max.poll.records</key>
<value>10000</value>
</entry>
<entry>
<key>max-uncommit-offset-wait</key>
<value>1 secs</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>ConsumeKafka_0_11</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_0_11</type>
</processors>
</contents>
<name>Ingest</name>
</processGroups>
</contents>
<name>MultipleProcessGroups</name>
</processGroups>
</snippet>
<timestamp>10/17/2017 09:52:22 JST</timestamp>
</template>

View File

@ -0,0 +1,253 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>RemoteInvocation</name>
<snippet>
<processGroups>
<id>5e21f448-4886-3f1f-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>dc9982bb-2627-3c7e-0000-000000000000</id>
<parentGroupId>5e21f448-4886-3f1f-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5e21f448-4886-3f1f-0000-000000000000</groupId>
<id>7c6fb325-b566-3967-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>5e21f448-4886-3f1f-0000-000000000000</groupId>
<id>2607ed95-c6ef-3636-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>780287c8-76ab-3221-0000-000000000000</id>
<parentGroupId>5e21f448-4886-3f1f-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>260.0</x>
<y>338.0</y>
</bends>
<destination>
<groupId>5e21f448-4886-3f1f-0000-000000000000</groupId>
<id>7c6fb325-b566-3967-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>failure</selectedRelationships>
<source>
<groupId>5e21f448-4886-3f1f-0000-000000000000</groupId>
<id>2607ed95-c6ef-3636-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>2607ed95-c6ef-3636-0000-000000000000</id>
<parentGroupId>5e21f448-4886-3f1f-0000-000000000000</parentGroupId>
<position>
<x>315.9999988991932</x>
<y>155.99999368798018</y>
</position>
<bundle>
<artifact>nifi-hadoop-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Hadoop Configuration Resources</key>
<value>
<name>Hadoop Configuration Resources</name>
</value>
</entry>
<entry>
<key>Kerberos Principal</key>
<value>
<name>Kerberos Principal</name>
</value>
</entry>
<entry>
<key>Kerberos Keytab</key>
<value>
<name>Kerberos Keytab</name>
</value>
</entry>
<entry>
<key>Kerberos Relogin Period</key>
<value>
<name>Kerberos Relogin Period</name>
</value>
</entry>
<entry>
<key>Additional Classpath Resources</key>
<value>
<name>Additional Classpath Resources</name>
</value>
</entry>
<entry>
<key>file_or_directory</key>
<value>
<name>file_or_directory</name>
</value>
</entry>
<entry>
<key>recursive</key>
<value>
<name>recursive</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Hadoop Configuration Resources</key>
</entry>
<entry>
<key>Kerberos Principal</key>
</entry>
<entry>
<key>Kerberos Keytab</key>
</entry>
<entry>
<key>Kerberos Relogin Period</key>
<value>4 hours</value>
</entry>
<entry>
<key>Additional Classpath Resources</key>
</entry>
<entry>
<key>file_or_directory</key>
<value>/test/2017-10-*</value>
</entry>
<entry>
<key>recursive</key>
<value>true</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>DeleteHDFS</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.hadoop.DeleteHDFS</type>
</processors>
<processors>
<id>7c6fb325-b566-3967-0000-000000000000</id>
<parentGroupId>5e21f448-4886-3f1f-0000-000000000000</parentGroupId>
<position>
<x>320.9999988991932</x>
<y>398.9999936879802</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
</contents>
<name>RemoteInvocation</name>
</processGroups>
</snippet>
<timestamp>10/25/2017 11:07:48 JST</timestamp>
</template>

View File

@ -0,0 +1,133 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>S2SDirect</name>
<snippet>
<processGroups>
<id>b497792c-a93f-3e43-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>1a83b948-b7c4-362d-0000-000000000000</id>
<parentGroupId>b497792c-a93f-3e43-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>5f6971b4-9209-3005-0000-000000000000</groupId>
<id>a4f14247-89aa-4e6c-0000-000000000000</id>
<type>REMOTE_INPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>b7c5498a-de8b-38e5-0000-000000000000</groupId>
<id>d73d9115-b987-4ffc-0000-000000000000</id>
<type>REMOTE_OUTPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<remoteProcessGroups>
<id>b7c5498a-de8b-38e5-0000-000000000000</id>
<parentGroupId>b497792c-a93f-3e43-0000-000000000000</parentGroupId>
<position>
<x>334.9999869194452</x>
<y>181.99999943536397</y>
</position>
<communicationsTimeout>30 sec</communicationsTimeout>
<contents>
<inputPorts>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<connected>false</connected>
<exists>true</exists>
<id>6909277f-e8e6-497b-0000-000000000000</id>
<targetId>015f101e-dcd7-17bd-8899-1a723733521a</targetId>
<name>input</name>
<targetRunning>true</targetRunning>
<transmitting>false</transmitting>
<useCompression>false</useCompression>
</inputPorts>
<outputPorts>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<connected>true</connected>
<exists>true</exists>
<id>d73d9115-b987-4ffc-0000-000000000000</id>
<targetId>015f1040-dcd7-17bd-5c1f-e31afe0a09a4</targetId>
<name>output</name>
<targetRunning>true</targetRunning>
<transmitting>true</transmitting>
<useCompression>false</useCompression>
</outputPorts>
</contents>
<proxyHost></proxyHost>
<proxyUser></proxyUser>
<targetUri>http://localhost:8080/nifi</targetUri>
<targetUris>http://localhost:8080/nifi</targetUris>
<transportProtocol>HTTP</transportProtocol>
<yieldDuration>10 sec</yieldDuration>
</remoteProcessGroups>
<remoteProcessGroups>
<id>5f6971b4-9209-3005-0000-000000000000</id>
<parentGroupId>b497792c-a93f-3e43-0000-000000000000</parentGroupId>
<position>
<x>339.0000174370233</x>
<y>458.999999435364</y>
</position>
<communicationsTimeout>30 sec</communicationsTimeout>
<contents>
<inputPorts>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<connected>true</connected>
<exists>true</exists>
<id>a4f14247-89aa-4e6c-0000-000000000000</id>
<targetId>015f101e-dcd7-17bd-8899-1a723733521a</targetId>
<name>input</name>
<targetRunning>true</targetRunning>
<transmitting>true</transmitting>
<useCompression>false</useCompression>
</inputPorts>
<outputPorts>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<connected>false</connected>
<exists>true</exists>
<id>1ed6d4f5-b91e-4d46-0000-000000000000</id>
<targetId>015f1040-dcd7-17bd-5c1f-e31afe0a09a4</targetId>
<name>output</name>
<targetRunning>true</targetRunning>
<transmitting>false</transmitting>
<useCompression>false</useCompression>
</outputPorts>
</contents>
<proxyHost></proxyHost>
<proxyUser></proxyUser>
<targetUri>http://localhost:8080/nifi</targetUri>
<targetUris>http://localhost:8080/nifi</targetUris>
<transportProtocol>HTTP</transportProtocol>
<yieldDuration>10 sec</yieldDuration>
</remoteProcessGroups>
</contents>
<name>S2SDirect</name>
</processGroups>
</snippet>
<timestamp>10/23/2017 15:06:47 JST</timestamp>
</template>

View File

@ -0,0 +1,443 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>S2SGet</name>
<snippet>
<processGroups>
<id>9fc65d0a-ff54-3c07-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>8812c40b-5c71-369f-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>9fc65d0a-ff54-3c07-0000-000000000000</groupId>
<id>bb530e58-ee14-3cac-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>c6acb687-616a-3d36-0000-000000000000</groupId>
<id>7375f8f6-4604-468d-0000-000000000000</id>
<type>REMOTE_OUTPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>9df33c4b-8c26-33e5-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>9fc65d0a-ff54-3c07-0000-000000000000</groupId>
<id>97cc5b27-22f3-3c3b-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>c6acb687-616a-3d36-0000-000000000000</groupId>
<id>7375f8f6-4604-468d-0000-000000000000</id>
<type>REMOTE_OUTPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>1ddd5163-7815-3117-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>9fc65d0a-ff54-3c07-0000-000000000000</groupId>
<id>4f3bfa4c-6427-3aac-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>c6acb687-616a-3d36-0000-000000000000</groupId>
<id>7375f8f6-4604-468d-0000-000000000000</id>
<type>REMOTE_OUTPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<labels>
<id>12073df1-f38b-3cad-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<position>
<x>872.9999891005355</x>
<y>296.0000048267144</y>
</position>
<height>68.00000762939453</height>
<label>A FlowFile is passed to every downstream process paths.</label>
<style>
<entry>
<key>font-size</key>
<value>12px</value>
</entry>
</style>
<width>338.0</width>
</labels>
<processors>
<id>97cc5b27-22f3-3c3b-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<position>
<x>470.9999891005356</x>
<y>679.0000048267144</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>
<name>attributes-to-log-regex</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
<value>
<name>attributes-to-ignore-regex</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>false</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>.*</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>bb530e58-ee14-3cac-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<position>
<x>150.99998910053557</x>
<y>514.0000048267144</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>4f3bfa4c-6427-3aac-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<position>
<x>810.9999891005356</x>
<y>524.0000048267144</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Directory</key>
<value>
<name>Directory</name>
</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>
<name>Conflict Resolution Strategy</name>
</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>
<name>Create Missing Directories</name>
</value>
</entry>
<entry>
<key>Maximum File Count</key>
<value>
<name>Maximum File Count</name>
</value>
</entry>
<entry>
<key>Last Modified Time</key>
<value>
<name>Last Modified Time</name>
</value>
</entry>
<entry>
<key>Permissions</key>
<value>
<name>Permissions</name>
</value>
</entry>
<entry>
<key>Owner</key>
<value>
<name>Owner</name>
</value>
</entry>
<entry>
<key>Group</key>
<value>
<name>Group</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Directory</key>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>fail</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>true</value>
</entry>
<entry>
<key>Maximum File Count</key>
</entry>
<entry>
<key>Last Modified Time</key>
</entry>
<entry>
<key>Permissions</key>
</entry>
<entry>
<key>Owner</key>
</entry>
<entry>
<key>Group</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>PutFile</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.PutFile</type>
</processors>
<remoteProcessGroups>
<id>c6acb687-616a-3d36-0000-000000000000</id>
<parentGroupId>9fc65d0a-ff54-3c07-0000-000000000000</parentGroupId>
<position>
<x>451.4000360293711</x>
<y>231.5999721410119</y>
</position>
<communicationsTimeout>30 sec</communicationsTimeout>
<contents>
<inputPorts>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<connected>false</connected>
<exists>true</exists>
<id>015f101e-dcd7-17bd-8899-1a723733521a</id>
<name>input</name>
<targetRunning>true</targetRunning>
<transmitting>false</transmitting>
<useCompression>false</useCompression>
</inputPorts>
<outputPorts>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<connected>true</connected>
<exists>true</exists>
<id>7375f8f6-4604-468d-0000-000000000000</id>
<targetId>392e7343-3950-329b-0000-000000000000</targetId>
<name>output</name>
<targetRunning>true</targetRunning>
<transmitting>true</transmitting>
<useCompression>false</useCompression>
</outputPorts>
</contents>
<proxyHost></proxyHost>
<proxyUser></proxyUser>
<targetUri>http://localhost:8080/nifi</targetUri>
<targetUris>http://localhost:8080/nifi</targetUris>
<transportProtocol>HTTP</transportProtocol>
<yieldDuration>10 sec</yieldDuration>
</remoteProcessGroups>
</contents>
<name>S2SGet</name>
</processGroups>
</snippet>
<timestamp>10/20/2017 13:03:49 JST</timestamp>
</template>

View File

@ -0,0 +1,121 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>S2SReceive</name>
<snippet>
<connections>
<id>3f94c106-18a6-309e-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>c81f8a46-4aa3-313e-0000-000000000000</groupId>
<id>67834454-5a13-3872-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>c81f8a46-4aa3-313e-0000-000000000000</groupId>
<id>77919f59-533e-35a3-0000-000000000000</id>
<type>INPUT_PORT</type>
</source>
<zIndex>0</zIndex>
</connections>
<inputPorts>
<id>77919f59-533e-35a3-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>55.494756089446724</x>
<y>0.0</y>
</position>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<name>input</name>
<state>RUNNING</state>
<transmitting>false</transmitting>
<type>INPUT_PORT</type>
</inputPorts>
<processors>
<id>67834454-5a13-3872-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>159.72339625877993</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
</snippet>
<timestamp>10/20/2017 17:49:39 JST</timestamp>
</template>

View File

@ -0,0 +1,822 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>S2SSend</name>
<snippet>
<processGroups>
<id>b3a9430b-3e40-3a04-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>af5cc030-abbd-31bb-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>1504e817-9715-35fb-0000-000000000000</id>
<type>FUNNEL</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>c5392447-e9f1-33ad-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>aff3eea5-0826-338d-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>18474287-6326-311a-0000-000000000000</groupId>
<id>f31a6b53-3077-4c59-0000-000000000000</id>
<type>REMOTE_INPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>7033f311-ac68-3cab-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>071e7d2a-0680-3d9a-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>c5392447-e9f1-33ad-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>c439cdca-e989-3491-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>308a0785-e948-3151-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>18474287-6326-311a-0000-000000000000</groupId>
<id>f31a6b53-3077-4c59-0000-000000000000</id>
<type>REMOTE_INPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>b775b657-5a5b-3708-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>470d0e2a-6281-3d3b-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>18474287-6326-311a-0000-000000000000</groupId>
<id>f31a6b53-3077-4c59-0000-000000000000</id>
<type>REMOTE_INPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<source>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>1504e817-9715-35fb-0000-000000000000</id>
<type>FUNNEL</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>4dacb06c-5bb8-3318-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>c5392447-e9f1-33ad-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>333255b6-eb02-3056-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>7521c9ce-7e0c-34ed-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>1504e817-9715-35fb-0000-000000000000</id>
<type>FUNNEL</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>b3a9430b-3e40-3a04-0000-000000000000</groupId>
<id>ca71e4d9-2a4f-3970-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<funnels>
<id>1504e817-9715-35fb-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>353.013200790822</x>
<y>106.10680357116347</y>
</position>
</funnels>
<labels>
<id>9dbb0346-5ba8-3208-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>615.6071466250744</x>
<y>-91.46566086985234</y>
</position>
<height>82.47064208984375</height>
<label>If multiple paths are sending data through the same remote port,
then we need to fetch previous provenance event from
the actual SEND event, to report which path passed the data.</label>
<style>
<entry>
<key>font-size</key>
<value>12px</value>
</entry>
</style>
<width>467.25079345703125</width>
</labels>
<labels>
<id>4414cff6-9e3d-311f-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>1124.8022214005925</x>
<y>87.1594194315544</y>
</position>
<height>69.47367095947266</height>
<label>If a processor has never generated any data,
then NiFi will not report the lineage to Atlas because it hasn't happened.</label>
<style>
<entry>
<key>font-size</key>
<value>12px</value>
</entry>
</style>
<width>416.84210205078125</width>
</labels>
<processors>
<id>b775b657-5a5b-3708-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>711.1157953067149</x>
<y>1.67367972157831</y>
</position>
<bundle>
<artifact>nifi-social-media-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Twitter Endpoint</key>
<value>
<name>Twitter Endpoint</name>
</value>
</entry>
<entry>
<key>Consumer Key</key>
<value>
<name>Consumer Key</name>
</value>
</entry>
<entry>
<key>Consumer Secret</key>
<value>
<name>Consumer Secret</name>
</value>
</entry>
<entry>
<key>Access Token</key>
<value>
<name>Access Token</name>
</value>
</entry>
<entry>
<key>Access Token Secret</key>
<value>
<name>Access Token Secret</name>
</value>
</entry>
<entry>
<key>Languages</key>
<value>
<name>Languages</name>
</value>
</entry>
<entry>
<key>Terms to Filter On</key>
<value>
<name>Terms to Filter On</name>
</value>
</entry>
<entry>
<key>IDs to Follow</key>
<value>
<name>IDs to Follow</name>
</value>
</entry>
<entry>
<key>Locations to Filter On</key>
<value>
<name>Locations to Filter On</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Twitter Endpoint</key>
<value>Sample Endpoint</value>
</entry>
<entry>
<key>Consumer Key</key>
</entry>
<entry>
<key>Consumer Secret</key>
</entry>
<entry>
<key>Access Token</key>
</entry>
<entry>
<key>Access Token Secret</key>
</entry>
<entry>
<key>Languages</key>
</entry>
<entry>
<key>Terms to Filter On</key>
</entry>
<entry>
<key>IDs to Follow</key>
</entry>
<entry>
<key>Locations to Filter On</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>GetTwitter</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.twitter.GetTwitter</type>
</processors>
<processors>
<id>c439cdca-e989-3491-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>674.1998712344492</x>
<y>-474.2524572489417</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Generate C</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>c5392447-e9f1-33ad-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>350.48686507975015</x>
<y>-234.94570552127266</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>ca71e4d9-2a4f-3970-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>-73.58932859465233</x>
<y>-130.67362652994757</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Generate A</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>333255b6-eb02-3056-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>167.93687074616798</x>
<y>-480.30510007120733</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Generate B</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>7033f311-ac68-3cab-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>1147.8839837832775</x>
<y>171.22101629994745</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>InactiveProcessor</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<remoteProcessGroups>
<id>18474287-6326-311a-0000-000000000000</id>
<parentGroupId>b3a9430b-3e40-3a04-0000-000000000000</parentGroupId>
<position>
<x>534.4000360293711</x>
<y>319.5999721410119</y>
</position>
<communicationsTimeout>30 sec</communicationsTimeout>
<contents>
<inputPorts>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<connected>true</connected>
<exists>true</exists>
<id>f31a6b53-3077-4c59-0000-000000000000</id>
<targetId>77919f59-533e-35a3-0000-000000000000</targetId>
<name>input</name>
<targetRunning>true</targetRunning>
<transmitting>true</transmitting>
<useCompression>false</useCompression>
</inputPorts>
</contents>
<proxyHost></proxyHost>
<proxyUser></proxyUser>
<targetUri>http://localhost:8080/nifi</targetUri>
<targetUris>http://localhost:8080/nifi</targetUris>
<transportProtocol>HTTP</transportProtocol>
<yieldDuration>10 sec</yieldDuration>
</remoteProcessGroups>
</contents>
<name>S2SSend</name>
</processGroups>
</snippet>
<timestamp>10/18/2017 16:02:17 JST</timestamp>
</template>

View File

@ -0,0 +1,153 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>S2STransfer</name>
<snippet>
<connections>
<id>157c6a56-3753-304d-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>c81f8a46-4aa3-313e-0000-000000000000</groupId>
<id>392e7343-3950-329b-0000-000000000000</id>
<type>OUTPUT_PORT</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>c81f8a46-4aa3-313e-0000-000000000000</groupId>
<id>1b9f81db-a0fd-389a-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<outputPorts>
<id>392e7343-3950-329b-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>53.43126270299064</x>
<y>267.91819788476437</y>
</position>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<name>output</name>
<state>RUNNING</state>
<transmitting>false</transmitting>
<type>OUTPUT_PORT</type>
</outputPorts>
<processors>
<id>1b9f81db-a0fd-389a-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>GenerateFlowFile</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
</snippet>
<timestamp>10/20/2017 17:48:34 JST</timestamp>
</template>

View File

@ -0,0 +1,541 @@
<?xml version="1.0" ?>
<template encoding-version="1.1">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<description></description>
<groupId>702990fc-015f-1000-9812-e01e5450a1a4</groupId>
<name>SimpleEventLevel</name>
<snippet>
<processGroups>
<id>86420a14-2fab-3e1e-0000-000000000000</id>
<parentGroupId>5e97d31e-6852-3d6b-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>9688f73d-6793-30c0-0000-000000000000</id>
<parentGroupId>86420a14-2fab-3e1e-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>86420a14-2fab-3e1e-0000-000000000000</groupId>
<id>bfc30bc3-48cf-332a-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>86420a14-2fab-3e1e-0000-000000000000</groupId>
<id>eaf013c1-aec5-39b0-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>44a9c917-cb55-38a4-0000-000000000000</id>
<parentGroupId>86420a14-2fab-3e1e-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>86420a14-2fab-3e1e-0000-000000000000</groupId>
<id>eaf013c1-aec5-39b0-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>86420a14-2fab-3e1e-0000-000000000000</groupId>
<id>d84b9bdc-5e42-3b3b-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>552d1f14-7093-3777-0000-000000000000</id>
<parentGroupId>86420a14-2fab-3e1e-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>86420a14-2fab-3e1e-0000-000000000000</groupId>
<id>eaf013c1-aec5-39b0-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>86420a14-2fab-3e1e-0000-000000000000</groupId>
<id>d9257f7e-b78c-349a-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>bfc30bc3-48cf-332a-0000-000000000000</id>
<parentGroupId>86420a14-2fab-3e1e-0000-000000000000</parentGroupId>
<position>
<x>506.60000000000014</x>
<y>723.5999511718751</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>
<name>attributes-to-log-regex</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
<value>
<name>attributes-to-ignore-regex</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>false</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>.*</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>d84b9bdc-5e42-3b3b-0000-000000000000</id>
<parentGroupId>86420a14-2fab-3e1e-0000-000000000000</parentGroupId>
<position>
<x>747.7999633789062</x>
<y>117.59999084472656</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
<entry>
<key>filename</key>
<value>
<name>filename</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>contents of B</value>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>filename</key>
<value>b.txt</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Generate B</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>d9257f7e-b78c-349a-0000-000000000000</id>
<parentGroupId>86420a14-2fab-3e1e-0000-000000000000</parentGroupId>
<position>
<x>283.0</x>
<y>119.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
<entry>
<key>filename</key>
<value>
<name>filename</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>contents of A</value>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
<entry>
<key>filename</key>
<value>a.txt</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1d</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Generate A</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>eaf013c1-aec5-39b0-0000-000000000000</id>
<parentGroupId>86420a14-2fab-3e1e-0000-000000000000</parentGroupId>
<position>
<x>506.60001220703134</x>
<y>438.0000244140625</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Directory</key>
<value>
<name>Directory</name>
</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>
<name>Conflict Resolution Strategy</name>
</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>
<name>Create Missing Directories</name>
</value>
</entry>
<entry>
<key>Maximum File Count</key>
<value>
<name>Maximum File Count</name>
</value>
</entry>
<entry>
<key>Last Modified Time</key>
<value>
<name>Last Modified Time</name>
</value>
</entry>
<entry>
<key>Permissions</key>
<value>
<name>Permissions</name>
</value>
</entry>
<entry>
<key>Owner</key>
<value>
<name>Owner</name>
</value>
</entry>
<entry>
<key>Group</key>
<value>
<name>Group</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Directory</key>
<value>/tmp/nifi</value>
</entry>
<entry>
<key>Conflict Resolution Strategy</key>
<value>fail</value>
</entry>
<entry>
<key>Create Missing Directories</key>
<value>true</value>
</entry>
<entry>
<key>Maximum File Count</key>
</entry>
<entry>
<key>Last Modified Time</key>
</entry>
<entry>
<key>Permissions</key>
</entry>
<entry>
<key>Owner</key>
</entry>
<entry>
<key>Group</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>PutFile</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.PutFile</type>
</processors>
</contents>
<name>SimpleEventLevel</name>
</processGroups>
</snippet>
<timestamp>10/31/2017 11:10:24 JST</timestamp>
</template>

View File

@ -0,0 +1,265 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>SimplestFlowPath</name>
<snippet>
<processGroups>
<id>aa3d184c-952b-38b1-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>dcb1705e-928f-34df-0000-000000000000</id>
<parentGroupId>aa3d184c-952b-38b1-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>aa3d184c-952b-38b1-0000-000000000000</groupId>
<id>7cc44055-b9fd-38ec-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>aa3d184c-952b-38b1-0000-000000000000</groupId>
<id>d270e6f0-c5e0-38b9-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>d270e6f0-c5e0-38b9-0000-000000000000</id>
<parentGroupId>aa3d184c-952b-38b1-0000-000000000000</parentGroupId>
<position>
<x>343.0</x>
<y>88.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>GenerateFlowFile</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
<processors>
<id>7cc44055-b9fd-38ec-0000-000000000000</id>
<parentGroupId>aa3d184c-952b-38b1-0000-000000000000</parentGroupId>
<position>
<x>348.0</x>
<y>381.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>
<name>attributes-to-log-regex</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
<value>
<name>attributes-to-ignore-regex</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>false</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>.*</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
</contents>
<name>SimplestFlowPath</name>
</processGroups>
</snippet>
<timestamp>10/17/2017 10:32:02 JST</timestamp>
</template>

View File

@ -0,0 +1,943 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<template encoding-version="1.1">
<description></description>
<groupId>27b7b6b8-015f-1000-0d31-197ae42bab34</groupId>
<name>SingleFlowPath</name>
<snippet>
<processGroups>
<id>32b1e09c-fc1d-3e0d-0000-000000000000</id>
<parentGroupId>c81f8a46-4aa3-313e-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>0.0</y>
</position>
<comments></comments>
<contents>
<connections>
<id>91d5691e-0e72-301d-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>332.7599792480469</x>
<y>689.639892578125</y>
</bends>
<destination>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>5a56149a-d82a-3242-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>215239c3-dbfb-3b16-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>94548863-a2f6-3c29-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>900.0800659179688</x>
<y>532.359990234375</y>
</bends>
<bends>
<x>900.0800659179688</x>
<y>582.359990234375</y>
</bends>
<destination>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>215239c3-dbfb-3b16-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>retry</selectedRelationships>
<source>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>215239c3-dbfb-3b16-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>aee2b8c0-4c25-341f-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>322.6799621582031</x>
<y>244.67996215820312</y>
</bends>
<destination>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>387ef1b0-adf8-3f90-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>2e9a2852-228f-379b-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>dbc0e770-483c-36fb-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>319.7999572753906</x>
<y>516.8399047851562</y>
</bends>
<destination>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>215239c3-dbfb-3b16-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>sql</selectedRelationships>
<source>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>2bdf4bbe-106f-30e5-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>f62185a1-ba26-3bce-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<bends>
<x>321.2399597167969</x>
<y>377.1599426269531</y>
</bends>
<destination>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>2bdf4bbe-106f-30e5-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>0</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>32b1e09c-fc1d-3e0d-0000-000000000000</groupId>
<id>387ef1b0-adf8-3f90-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<labels>
<id>84a100ef-a4aa-3fbc-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<position>
<x>437.8799963378908</x>
<y>-13.080005035400433</y>
</position>
<height>46.07999038696289</height>
<label>ConsumeKafka will generate a RECEIVE provenance event
PLAINTEXT://0.kafka.example.com:6667/nifi-test</label>
<style>
<entry>
<key>font-size</key>
<value>12px</value>
</entry>
</style>
<width>364.3199157714844</width>
</labels>
<labels>
<id>42c2fce3-0622-31c8-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<position>
<x>442.7199621582033</x>
<y>789.5198828125</y>
</position>
<height>46.07999038696289</height>
<label>PublishKafka will generate a SEND provenance event
PLAINTEXT://0.kafka.example.com:6667/nifi-test</label>
<style>
<entry>
<key>font-size</key>
<value>12px</value>
</entry>
</style>
<width>364.3199157714844</width>
</labels>
<processors>
<id>215239c3-dbfb-3b16-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<position>
<x>445.0800659179688</x>
<y>492.359990234375</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>JDBC Connection Pool</key>
<value>
<identifiesControllerService>org.apache.nifi.dbcp.DBCPService</identifiesControllerService>
<name>JDBC Connection Pool</name>
</value>
</entry>
<entry>
<key>Support Fragmented Transactions</key>
<value>
<name>Support Fragmented Transactions</name>
</value>
</entry>
<entry>
<key>Transaction Timeout</key>
<value>
<name>Transaction Timeout</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Obtain Generated Keys</key>
<value>
<name>Obtain Generated Keys</name>
</value>
</entry>
<entry>
<key>rollback-on-failure</key>
<value>
<name>rollback-on-failure</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>JDBC Connection Pool</key>
</entry>
<entry>
<key>Support Fragmented Transactions</key>
<value>true</value>
</entry>
<entry>
<key>Transaction Timeout</key>
</entry>
<entry>
<key>Batch Size</key>
<value>100</value>
</entry>
<entry>
<key>Obtain Generated Keys</key>
<value>false</value>
</entry>
<entry>
<key>rollback-on-failure</key>
<value>false</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>PutSQL</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>retry</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.PutSQL</type>
</processors>
<processors>
<id>2bdf4bbe-106f-30e5-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<position>
<x>445.08000732421874</x>
<y>342.60004638671876</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>JDBC Connection Pool</key>
<value>
<identifiesControllerService>org.apache.nifi.dbcp.DBCPService</identifiesControllerService>
<name>JDBC Connection Pool</name>
</value>
</entry>
<entry>
<key>Statement Type</key>
<value>
<name>Statement Type</name>
</value>
</entry>
<entry>
<key>Table Name</key>
<value>
<name>Table Name</name>
</value>
</entry>
<entry>
<key>Catalog Name</key>
<value>
<name>Catalog Name</name>
</value>
</entry>
<entry>
<key>Schema Name</key>
<value>
<name>Schema Name</name>
</value>
</entry>
<entry>
<key>Translate Field Names</key>
<value>
<name>Translate Field Names</name>
</value>
</entry>
<entry>
<key>Unmatched Field Behavior</key>
<value>
<name>Unmatched Field Behavior</name>
</value>
</entry>
<entry>
<key>Unmatched Column Behavior</key>
<value>
<name>Unmatched Column Behavior</name>
</value>
</entry>
<entry>
<key>Update Keys</key>
<value>
<name>Update Keys</name>
</value>
</entry>
<entry>
<key>jts-quoted-identifiers</key>
<value>
<name>jts-quoted-identifiers</name>
</value>
</entry>
<entry>
<key>jts-quoted-table-identifiers</key>
<value>
<name>jts-quoted-table-identifiers</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>JDBC Connection Pool</key>
</entry>
<entry>
<key>Statement Type</key>
</entry>
<entry>
<key>Table Name</key>
</entry>
<entry>
<key>Catalog Name</key>
</entry>
<entry>
<key>Schema Name</key>
</entry>
<entry>
<key>Translate Field Names</key>
<value>true</value>
</entry>
<entry>
<key>Unmatched Field Behavior</key>
<value>Ignore Unmatched Fields</value>
</entry>
<entry>
<key>Unmatched Column Behavior</key>
<value>Fail on Unmatched Columns</value>
</entry>
<entry>
<key>Update Keys</key>
</entry>
<entry>
<key>jts-quoted-identifiers</key>
<value>false</value>
</entry>
<entry>
<key>jts-quoted-table-identifiers</key>
<value>false</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>ConvertJSONToSQL</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>true</autoTerminate>
<name>original</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>sql</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.ConvertJSONToSQL</type>
</processors>
<processors>
<id>2e9a2852-228f-379b-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<position>
<x>445.08000976562505</x>
<y>47.400016784668</y>
</position>
<bundle>
<artifact>nifi-kafka-0-11-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>bootstrap.servers</key>
<value>
<name>bootstrap.servers</name>
</value>
</entry>
<entry>
<key>security.protocol</key>
<value>
<name>security.protocol</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.service.name</key>
<value>
<name>sasl.kerberos.service.name</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.principal</key>
<value>
<name>sasl.kerberos.principal</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.keytab</key>
<value>
<name>sasl.kerberos.keytab</name>
</value>
</entry>
<entry>
<key>ssl.context.service</key>
<value>
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService>
<name>ssl.context.service</name>
</value>
</entry>
<entry>
<key>topic</key>
<value>
<name>topic</name>
</value>
</entry>
<entry>
<key>topic_type</key>
<value>
<name>topic_type</name>
</value>
</entry>
<entry>
<key>honor-transactions</key>
<value>
<name>honor-transactions</name>
</value>
</entry>
<entry>
<key>group.id</key>
<value>
<name>group.id</name>
</value>
</entry>
<entry>
<key>auto.offset.reset</key>
<value>
<name>auto.offset.reset</name>
</value>
</entry>
<entry>
<key>key-attribute-encoding</key>
<value>
<name>key-attribute-encoding</name>
</value>
</entry>
<entry>
<key>message-demarcator</key>
<value>
<name>message-demarcator</name>
</value>
</entry>
<entry>
<key>message-header-encoding</key>
<value>
<name>message-header-encoding</name>
</value>
</entry>
<entry>
<key>header-name-regex</key>
<value>
<name>header-name-regex</name>
</value>
</entry>
<entry>
<key>max.poll.records</key>
<value>
<name>max.poll.records</name>
</value>
</entry>
<entry>
<key>max-uncommit-offset-wait</key>
<value>
<name>max-uncommit-offset-wait</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>bootstrap.servers</key>
<value>localhost:9092</value>
</entry>
<entry>
<key>security.protocol</key>
<value>PLAINTEXT</value>
</entry>
<entry>
<key>sasl.kerberos.service.name</key>
</entry>
<entry>
<key>sasl.kerberos.principal</key>
</entry>
<entry>
<key>sasl.kerberos.keytab</key>
</entry>
<entry>
<key>ssl.context.service</key>
</entry>
<entry>
<key>topic</key>
</entry>
<entry>
<key>topic_type</key>
<value>names</value>
</entry>
<entry>
<key>honor-transactions</key>
<value>true</value>
</entry>
<entry>
<key>group.id</key>
</entry>
<entry>
<key>auto.offset.reset</key>
<value>latest</value>
</entry>
<entry>
<key>key-attribute-encoding</key>
<value>utf-8</value>
</entry>
<entry>
<key>message-demarcator</key>
</entry>
<entry>
<key>message-header-encoding</key>
<value>UTF-8</value>
</entry>
<entry>
<key>header-name-regex</key>
</entry>
<entry>
<key>max.poll.records</key>
<value>10000</value>
</entry>
<entry>
<key>max-uncommit-offset-wait</key>
<value>1 secs</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>ConsumeKafka_0_11</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.kafka.pubsub.ConsumeKafka_0_11</type>
</processors>
<processors>
<id>387ef1b0-adf8-3f90-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<position>
<x>445.08012207031254</x>
<y>192.8400793457031</y>
</position>
<bundle>
<artifact>nifi-update-attribute-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Delete Attributes Expression</key>
<value>
<name>Delete Attributes Expression</name>
</value>
</entry>
<entry>
<key>Store State</key>
<value>
<name>Store State</name>
</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
<value>
<name>Stateful Variables Initial Value</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Delete Attributes Expression</key>
</entry>
<entry>
<key>Store State</key>
<value>Do not store state</value>
</entry>
<entry>
<key>Stateful Variables Initial Value</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>UpdateAttribute</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.attributes.UpdateAttribute</type>
</processors>
<processors>
<id>5a56149a-d82a-3242-0000-000000000000</id>
<parentGroupId>32b1e09c-fc1d-3e0d-0000-000000000000</parentGroupId>
<position>
<x>450.8399475097658</x>
<y>643.5599682617187</y>
</position>
<bundle>
<artifact>nifi-kafka-0-11-nar</artifact>
<group>org.apache.nifi</group>
<version>1.5.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>bootstrap.servers</key>
<value>
<name>bootstrap.servers</name>
</value>
</entry>
<entry>
<key>security.protocol</key>
<value>
<name>security.protocol</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.service.name</key>
<value>
<name>sasl.kerberos.service.name</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.principal</key>
<value>
<name>sasl.kerberos.principal</name>
</value>
</entry>
<entry>
<key>sasl.kerberos.keytab</key>
<value>
<name>sasl.kerberos.keytab</name>
</value>
</entry>
<entry>
<key>ssl.context.service</key>
<value>
<identifiesControllerService>org.apache.nifi.ssl.SSLContextService</identifiesControllerService>
<name>ssl.context.service</name>
</value>
</entry>
<entry>
<key>topic</key>
<value>
<name>topic</name>
</value>
</entry>
<entry>
<key>acks</key>
<value>
<name>acks</name>
</value>
</entry>
<entry>
<key>use-transactions</key>
<value>
<name>use-transactions</name>
</value>
</entry>
<entry>
<key>attribute-name-regex</key>
<value>
<name>attribute-name-regex</name>
</value>
</entry>
<entry>
<key>message-header-encoding</key>
<value>
<name>message-header-encoding</name>
</value>
</entry>
<entry>
<key>kafka-key</key>
<value>
<name>kafka-key</name>
</value>
</entry>
<entry>
<key>key-attribute-encoding</key>
<value>
<name>key-attribute-encoding</name>
</value>
</entry>
<entry>
<key>message-demarcator</key>
<value>
<name>message-demarcator</name>
</value>
</entry>
<entry>
<key>max.request.size</key>
<value>
<name>max.request.size</name>
</value>
</entry>
<entry>
<key>ack.wait.time</key>
<value>
<name>ack.wait.time</name>
</value>
</entry>
<entry>
<key>max.block.ms</key>
<value>
<name>max.block.ms</name>
</value>
</entry>
<entry>
<key>partitioner.class</key>
<value>
<name>partitioner.class</name>
</value>
</entry>
<entry>
<key>compression.type</key>
<value>
<name>compression.type</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>bootstrap.servers</key>
<value>localhost:9092</value>
</entry>
<entry>
<key>security.protocol</key>
<value>PLAINTEXT</value>
</entry>
<entry>
<key>sasl.kerberos.service.name</key>
</entry>
<entry>
<key>sasl.kerberos.principal</key>
</entry>
<entry>
<key>sasl.kerberos.keytab</key>
</entry>
<entry>
<key>ssl.context.service</key>
</entry>
<entry>
<key>topic</key>
</entry>
<entry>
<key>acks</key>
<value>0</value>
</entry>
<entry>
<key>use-transactions</key>
<value>true</value>
</entry>
<entry>
<key>attribute-name-regex</key>
</entry>
<entry>
<key>message-header-encoding</key>
<value>UTF-8</value>
</entry>
<entry>
<key>kafka-key</key>
</entry>
<entry>
<key>key-attribute-encoding</key>
<value>utf-8</value>
</entry>
<entry>
<key>message-demarcator</key>
</entry>
<entry>
<key>max.request.size</key>
<value>1 MB</value>
</entry>
<entry>
<key>ack.wait.time</key>
<value>5 secs</value>
</entry>
<entry>
<key>max.block.ms</key>
<value>5 sec</value>
</entry>
<entry>
<key>partitioner.class</key>
<value>org.apache.kafka.clients.producer.internals.DefaultPartitioner</value>
</entry>
<entry>
<key>compression.type</key>
<value>none</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>PublishKafka_0_11</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.kafka.pubsub.PublishKafka_0_11</type>
</processors>
</contents>
<name>SingleFlowPath</name>
</processGroups>
</snippet>
<timestamp>10/18/2017 11:40:09 JST</timestamp>
</template>

View File

@ -0,0 +1,22 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
log4j.rootLogger=INFO,console
log4j.category.org.apache.nifi=DEBUG
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n

View File

@ -0,0 +1,115 @@
<!DOCTYPE html>
<html lang="en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<svg width="960" height="500"></svg>
<script src="https://d3js.org/d3.v4.min.js"></script>
<script src="https://unpkg.com/d3-sankey@0.7.1"></script>
<style>
.node rect {
cursor: move;
fill-opacity: .9;
shape-rendering: crispEdges;
}
.node text {
pointer-events: none;
text-shadow: 0 1px 0 #fff;
}
.link {
fill: none;
stroke: #000;
stroke-opacity: .2;
}
.link:hover {
stroke-opacity: .5;
}
</style>
<script>
var svg = d3.select("svg"),
width = +svg.attr("width"),
height = +svg.attr("height");
var formatNumber = d3.format(",.0f"),
format = function(d) { return formatNumber(d); },
color = d3.scaleOrdinal(d3.schemeCategory10);
var sankey = d3.sankey()
.nodeWidth(15)
.nodePadding(10)
.extent([[1, 1], [width - 1, height - 6]]);
var link = svg.append("g")
.attr("class", "links")
.attr("fill", "none")
.attr("stroke", "#000")
.attr("stroke-opacity", 0.2)
.selectAll("path");
var node = svg.append("g")
.attr("class", "nodes")
.attr("font-family", "sans-serif")
.attr("font-size", 10)
.selectAll("g");
d3.json("/api/atlas/v2/debug/lineage/", function(error, lineage) {
if (error) throw error;
sankey(lineage);
link = link
.data(lineage.links)
.enter().append("path")
.attr("class", "link")
.attr("d", d3.sankeyLinkHorizontal())
.attr("stroke-width", function(d) { return Math.max(1, d.width); });
link.append("title")
.text(function(d) { return d.source.name + "->" + d.target.name + "\n" + format(d.value); });
node = node
.data(lineage.nodes)
.enter().append("g")
.attr("class", "node");
node.append("rect")
.attr("x", function(d) { return d.x0; })
.attr("y", function(d) { return d.y0; })
.attr("height", function(d) { return d.y1 - d.y0; })
.attr("width", function(d) { return d.x1 - d.x0; })
.attr("fill", function(d) { return color(d.name.replace(/ .*/, "")); })
.attr("stroke", "#000");
node.append("text")
.attr("x", function(d) { return d.x0 - 6; })
.attr("y", function(d) { return (d.y1 + d.y0) / 2; })
.attr("dy", "0.35em")
.attr("text-anchor", "end")
.text(function(d) { return d.name; })
.filter(function(d) { return d.x0 < width / 2; })
.attr("x", function(d) { return d.x1 + 6; })
.attr("text-anchor", "start");
node.append("title")
.text(function(d) { return d.type + "\n" + d.name + "\n" + d.qualifiedName; });
});
</script>
</html>

View File

@ -0,0 +1,121 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# see kafka.server.KafkaConfig for additional details and defaults
############################# Server Basics #############################
# The id of the broker. This must be set to a unique integer for each broker.
broker.id=0
############################# Socket Server Settings #############################
# The port the socket server listens on
port=9092
# Hostname the broker will bind to. If not set, the server will bind to all interfaces
#host.name=localhost
# Hostname the broker will advertise to producers and consumers. If not set, it uses the
# value for "host.name" if configured. Otherwise, it will use the value returned from
# java.net.InetAddress.getCanonicalHostName().
#advertised.host.name=<hostname routable by clients>
# The port to publish to ZooKeeper for clients to use. If this is not set,
# it will publish the same port that the broker binds to.
#advertised.port=<port accessible by clients>
# The number of threads handling network requests
num.network.threads=3
# The number of threads doing disk I/O
num.io.threads=8
# The send buffer (SO_SNDBUF) used by the socket server
socket.send.buffer.bytes=102400
# The receive buffer (SO_RCVBUF) used by the socket server
socket.receive.buffer.bytes=102400
# The maximum size of a request that the socket server will accept (protection against OOM)
socket.request.max.bytes=104857600
############################# Log Basics #############################
# A comma seperated list of directories under which to store log files
log.dirs=target/kafka-tmp/kafka-logs
# The default number of log partitions per topic. More partitions allow greater
# parallelism for consumption, but this will also result in more files across
# the brokers.
num.partitions=1
# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
# This value is recommended to be increased for installations with data dirs located in RAID array.
num.recovery.threads.per.data.dir=1
############################# Log Flush Policy #############################
# Messages are immediately written to the filesystem but by default we only fsync() to sync
# the OS cache lazily. The following configurations control the flush of data to disk.
# There are a few important trade-offs here:
# 1. Durability: Unflushed data may be lost if you are not using replication.
# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
# The settings below allow one to configure the flush policy to flush data after a period of time or
# every N messages (or both). This can be done globally and overridden on a per-topic basis.
# The number of messages to accept before forcing a flush of data to disk
#log.flush.interval.messages=10000
# The maximum amount of time a message can sit in a log before we force a flush
#log.flush.interval.ms=1000
############################# Log Retention Policy #############################
# The following configurations control the disposal of log segments. The policy can
# be set to delete segments after a period of time, or after a given size has accumulated.
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
# from the end of the log.
# The minimum age of a log file to be eligible for deletion
log.retention.hours=168
# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
# segments don't drop below log.retention.bytes.
#log.retention.bytes=1073741824
# The maximum size of a log segment file. When this size is reached a new log segment will be created.
log.segment.bytes=1073741824
# The interval at which log segments are checked to see if they can be deleted according
# to the retention policies
log.retention.check.interval.ms=300000
# By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
# If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
log.cleaner.enable=false
############################# Zookeeper #############################
# Zookeeper connection string (see zookeeper docs for details).
# This is a comma separated host:port pairs, each corresponding to a zk
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
# You can also append an optional chroot string to the urls to specify the
# root directory for all kafka znodes.
zookeeper.connect=localhost:2181
# Timeout in ms for connecting to zookeeper
zookeeper.connection.timeout.ms=6000

View File

@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# the directory where the snapshot is stored.
dataDir=target/kafka-tmp/zookeeper
# the port at which the clients will connect
clientPort=2181
# disable the per-ip limit on the number of connections since this is a non-production config
maxClientCnxns=0

Some files were not shown because too many files have changed in this diff Show More