mirror of https://github.com/apache/nifi.git
NIFI-11183 Removed Hive 1.x components
This closes #6957 Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
parent
46f89e3226
commit
aae6bafc6c
|
@ -1036,55 +1036,6 @@ language governing permissions and limitations under the License. -->
|
|||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>include-hive</id>
|
||||
<activation>
|
||||
<activeByDefault>false</activeByDefault>
|
||||
<property>
|
||||
<name>allProfiles</name>
|
||||
</property>
|
||||
</activation>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-nar</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-services-api-nar</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>include-hive1_1</id>
|
||||
<!-- This profile handles the inclusion of Hive 1.1.x artifacts. The NAR
|
||||
is quite large and makes the resultant binary distribution significantly
|
||||
larger (150+ MB). -->
|
||||
<activation>
|
||||
<activeByDefault>false</activeByDefault>
|
||||
<property>
|
||||
<name>allProfiles</name>
|
||||
</property>
|
||||
</activation>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive_1_1-nar</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-services-api-nar</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>include-hive3</id>
|
||||
<!-- This profile handles the inclusion of Hive 3 artifacts. The NAR
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-bundle</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-hive-nar</artifactId>
|
||||
<packaging>nar</packaging>
|
||||
<properties>
|
||||
<maven.javadoc.skip>true</maven.javadoc.skip>
|
||||
<source.skip>true</source.skip>
|
||||
<!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
|
||||
<hadoop.version>${hive.hadoop.version}</hadoop.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-services-api-nar</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-processors</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,329 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
APACHE NIFI SUBCOMPONENTS:
|
||||
|
||||
The Apache NiFi project contains subcomponents with separate copyright
|
||||
notices and license terms. Your use of the source code for the these
|
||||
subcomponents is subject to the terms and conditions of the following
|
||||
licenses.
|
||||
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
|
||||
under an MIT style license.
|
||||
|
||||
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
The binary distribution of this product includes modules from Groovy which bundles ANTLR
|
||||
SOFTWARE RIGHTS
|
||||
|
||||
ANTLR 1989-2006 Developed by Terence Parr
|
||||
Partially supported by University of San Francisco & jGuru.com
|
||||
|
||||
We reserve no legal rights to the ANTLR--it is fully in the
|
||||
public domain. An individual or company may do whatever
|
||||
they wish with source code distributed with ANTLR or the
|
||||
code generated by ANTLR, including the incorporation of
|
||||
ANTLR, or its output, into commerical software.
|
||||
|
||||
We encourage users to develop software with ANTLR. However,
|
||||
we do ask that credit is given to us for developing
|
||||
ANTLR. By "credit", we mean that if you use ANTLR or
|
||||
incorporate any source code into one of your programs
|
||||
(commercial product, research project, or otherwise) that
|
||||
you acknowledge this fact somewhere in the documentation,
|
||||
research report, etc... If you like ANTLR and have
|
||||
developed a nice tool with the output, please mention that
|
||||
you developed it using ANTLR. In addition, we ask that the
|
||||
headers remain intact in our source code. As long as these
|
||||
guidelines are kept, we expect to continue enhancing this
|
||||
system and expect to make other tools available as they are
|
||||
completed.
|
||||
|
||||
The primary ANTLR guy:
|
||||
|
||||
Terence Parr
|
||||
parrt@cs.usfca.edu
|
||||
parrt@antlr.org
|
||||
|
||||
The binary distribution of this product includes modules from Groovy which bundles ASM
|
||||
/***
|
||||
* http://asm.objectweb.org/
|
||||
*
|
||||
* ASM: a very small and fast Java bytecode manipulation framework
|
||||
* Copyright (c) 2000-2005 INRIA, France Telecom
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
The binary distribution of this product includes modules from Groovy which bundles source from JSR-223
|
||||
The following notice applies to the files:
|
||||
|
||||
src/main/org/codehaus/groovy/jsr223/GroovyCompiledScript.java
|
||||
src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineFactory.java
|
||||
src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineImpl.java
|
||||
|
||||
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met: Redistributions of source code
|
||||
* must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution. Neither the name of the Sun Microsystems nor the names of
|
||||
* is contributors may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
|
@ -1,348 +0,0 @@
|
|||
nifi-hive-nar
|
||||
Copyright 2014-2023 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
This includes derived works from the Apache Storm (ASLv2 licensed) project (https://github.com/apache/storm):
|
||||
Copyright 2015 The Apache Software Foundation
|
||||
The derived work is adapted from
|
||||
org/apache/storm/hive/common/HiveWriter.java
|
||||
org/apache/storm/hive/common/HiveOptions.java
|
||||
and can be found in the org.apache.nifi.util.hive package
|
||||
|
||||
This includes derived works from the Apache Hive (ASLv2 licensed) project (https://github.com/apache/hive):
|
||||
Copyright 2008-2016 The Apache Software Foundation
|
||||
The derived work is adapted from
|
||||
release-1.2.1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
|
||||
and can be found in the org.apache.hadoop.hive.ql.io.orc package
|
||||
|
||||
===========================================
|
||||
Apache Software License v2
|
||||
===========================================
|
||||
|
||||
The following binary components are provided under the Apache Software License v2
|
||||
|
||||
(ASLv2) Apache Ant
|
||||
The following NOTICE information applies:
|
||||
Apache Ant
|
||||
Copyright 1999-2016 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Commons Codec
|
||||
The following NOTICE information applies:
|
||||
Apache Commons Codec
|
||||
Copyright 2002-2014 The Apache Software Foundation
|
||||
|
||||
src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
|
||||
contains test data from http://aspell.net/test/orig/batch0.tab.
|
||||
Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
|
||||
|
||||
===============================================================================
|
||||
|
||||
The content of package org.apache.commons.codec.language.bm has been translated
|
||||
from the original php source code available at http://stevemorse.org/phoneticinfo.htm
|
||||
with permission from the original authors.
|
||||
Original source copyright:
|
||||
Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
|
||||
|
||||
(ASLv2) Apache Commons DBCP
|
||||
The following NOTICE information applies:
|
||||
Apache Commons DBCP
|
||||
Copyright 2001-2015 The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache Commons EL
|
||||
The following NOTICE information applies:
|
||||
Apache Commons EL
|
||||
Copyright 1999-2016 The Apache Software Foundation
|
||||
|
||||
EL-8 patch - Copyright 2004-2007 Jamie Taylor
|
||||
http://issues.apache.org/jira/browse/EL-8
|
||||
|
||||
(ASLv2) Apache HttpComponents
|
||||
The following NOTICE information applies:
|
||||
Apache HttpComponents Client
|
||||
Copyright 1999-2016 The Apache Software Foundation
|
||||
Apache HttpComponents Core - HttpCore
|
||||
Copyright 2006-2009 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Commons Pool
|
||||
The following NOTICE information applies:
|
||||
Apache Commons Pool
|
||||
Copyright 1999-2009 The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache Commons IO
|
||||
The following NOTICE information applies:
|
||||
Apache Commons IO
|
||||
Copyright 2002-2016 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Hive
|
||||
The following NOTICE information applies:
|
||||
Apache Hive
|
||||
Copyright 2008-2015 The Apache Software Foundation
|
||||
|
||||
This product includes software developed by The Apache Software
|
||||
Foundation (http://www.apache.org/).
|
||||
|
||||
This product includes Jersey (https://jersey.java.net/)
|
||||
Copyright (c) 2010-2014 Oracle and/or its affiliates.
|
||||
|
||||
This project includes software copyrighted by Microsoft Corporation and
|
||||
licensed under the Apache License, Version 2.0.
|
||||
|
||||
This project includes software copyrighted by Dell SecureWorks and
|
||||
licensed under the Apache License, Version 2.0.
|
||||
|
||||
(ASLv2) Jackson JSON processor
|
||||
The following NOTICE information applies:
|
||||
# Jackson JSON processor
|
||||
|
||||
Jackson is a high-performance, Free/Open Source JSON processing library.
|
||||
It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
|
||||
been in development since 2007.
|
||||
It is currently developed by a community of developers, as well as supported
|
||||
commercially by FasterXML.com.
|
||||
|
||||
## Licensing
|
||||
|
||||
Jackson core and extension components may licensed under different licenses.
|
||||
To find the details that apply to this artifact see the accompanying LICENSE file.
|
||||
For more information, including possible other licensing options, contact
|
||||
FasterXML.com (http://fasterxml.com).
|
||||
|
||||
## Credits
|
||||
|
||||
A list of contributors may be found from CREDITS file, which is included
|
||||
in some artifacts (usually source distributions); but is always available
|
||||
from the source code management (SCM) system project uses.
|
||||
|
||||
(ASLv2) BoneCP
|
||||
The following NOTICE information applies:
|
||||
BoneCP
|
||||
Copyright 2010 Wallace Wadge
|
||||
|
||||
(ASLv2) Apache Hadoop
|
||||
The following NOTICE information applies:
|
||||
The binary distribution of this product bundles binaries of
|
||||
org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the
|
||||
following notices:
|
||||
* Copyright 2011 Dain Sundstrom <dain@iq80.com>
|
||||
* Copyright 2011 FuseSource Corp. http://fusesource.com
|
||||
|
||||
The binary distribution of this product bundles binaries of
|
||||
org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni),
|
||||
which has the following notices:
|
||||
* This product includes software developed by FuseSource Corp.
|
||||
http://fusesource.com
|
||||
* This product includes software developed at
|
||||
Progress Software Corporation and/or its subsidiaries or affiliates.
|
||||
* This product includes software developed by IBM Corporation and others.
|
||||
|
||||
(ASLv2) Apache HBase
|
||||
The following NOTICE information applies:
|
||||
Apache HBase
|
||||
Copyright 2007-2015 The Apache Software Foundation
|
||||
|
||||
--
|
||||
This product incorporates portions of the 'Hadoop' project
|
||||
|
||||
Copyright 2007-2009 The Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License v2.0
|
||||
--
|
||||
Our Orca logo we got here: http://www.vectorfree.com/jumping-orca
|
||||
It is licensed Creative Commons Attribution 3.0.
|
||||
See https://creativecommons.org/licenses/by/3.0/us/
|
||||
We changed the logo by stripping the colored background, inverting
|
||||
it and then rotating it some.
|
||||
|
||||
Later we found that vectorfree.com image is not properly licensed.
|
||||
The original is owned by vectorportal.com. The original was
|
||||
relicensed so we could use it as Creative Commons Attribution 3.0.
|
||||
The license is bundled with the download available here:
|
||||
http://www.vectorportal.com/subcategory/205/KILLER-WHALE-FREE-VECTOR.eps/ifile/9136/detailtest.asp
|
||||
--
|
||||
This product includes portions of the Bootstrap project v3.0.0
|
||||
|
||||
Copyright 2013 Twitter, Inc.
|
||||
|
||||
Licensed under the Apache License v2.0
|
||||
|
||||
This product uses the Glyphicons Halflings icon set.
|
||||
|
||||
http://glyphicons.com/
|
||||
|
||||
Copyright Jan Kovařík
|
||||
|
||||
Licensed under the Apache License v2.0 as a part of the Bootstrap project.
|
||||
|
||||
--
|
||||
This product includes portions of the Guava project v14, specifically
|
||||
'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java'
|
||||
|
||||
Copyright (C) 2007 The Guava Authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0
|
||||
|
||||
(ASLv2) Apache Commons Lang
|
||||
The following NOTICE information applies:
|
||||
Apache Commons Lang
|
||||
Copyright 2001-2015 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Curator
|
||||
The following NOTICE information applies:
|
||||
Apache Curator
|
||||
Copyright 2013-2014 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Derby
|
||||
The following NOTICE information applies:
|
||||
Apache Derby
|
||||
Copyright 2004-2014 Apache, Apache DB, Apache Derby, Apache Torque, Apache JDO, Apache DDLUtils,
|
||||
the Derby hat logo, the Apache JDO logo, and the Apache feather logo are trademarks of The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache DS
|
||||
The following NOTICE information applies:
|
||||
ApacheDS
|
||||
Copyright 2003-2015 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Geronimo
|
||||
The following NOTICE information applies:
|
||||
Apache Geronimo
|
||||
Copyright 2003-2008 The Apache Software Foundation
|
||||
|
||||
(ASLv2) HTrace Core
|
||||
The following NOTICE information applies:
|
||||
In addition, this product includes software dependencies. See
|
||||
the accompanying LICENSE.txt for a listing of dependencies
|
||||
that are NOT Apache licensed (with pointers to their licensing)
|
||||
|
||||
Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin
|
||||
is a distributed tracing system that is Apache 2.0 Licensed.
|
||||
Copyright 2012 Twitter, Inc.
|
||||
|
||||
(ASLv2) Jettison
|
||||
The following NOTICE information applies:
|
||||
Copyright 2006 Envoi Solutions LLC
|
||||
|
||||
(ASLv2) Jetty
|
||||
The following NOTICE information applies:
|
||||
Jetty Web Container
|
||||
Copyright 1995-2019 Mort Bay Consulting Pty Ltd.
|
||||
|
||||
(ASLv2) Apache log4j
|
||||
The following NOTICE information applies:
|
||||
Apache log4j
|
||||
Copyright 2007 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Parquet MR
|
||||
The following NOTICE information applies:
|
||||
Parquet MR
|
||||
Copyright 2012 Twitter, Inc.
|
||||
|
||||
This project includes code from https://github.com/lemire/JavaFastPFOR
|
||||
parquet-column/src/main/java/parquet/column/values/bitpacking/LemireBitPacking.java
|
||||
Apache License Version 2.0 http://www.apache.org/licenses/.
|
||||
(c) Daniel Lemire, http://lemire.me/en/
|
||||
|
||||
(ASLv2) Apache Thrift
|
||||
The following NOTICE information applies:
|
||||
Apache Thrift
|
||||
Copyright 2006-2010 The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache Twill
|
||||
The following NOTICE information applies:
|
||||
Apache Twill
|
||||
Copyright 2013-2016 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Dropwizard Metrics
|
||||
The following NOTICE information applies:
|
||||
Metrics
|
||||
Copyright 2010-2013 Coda Hale and Yammer, Inc.
|
||||
|
||||
This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
|
||||
LongAdder), which was released with the following comments:
|
||||
|
||||
Written by Doug Lea with assistance from members of JCP JSR-166
|
||||
Expert Group and released to the public domain, as explained at
|
||||
http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
(ASLv2) Joda Time
|
||||
The following NOTICE information applies:
|
||||
This product includes software developed by
|
||||
Joda.org (http://www.joda.org/).
|
||||
|
||||
(ASLv2) The Netty Project
|
||||
The following NOTICE information applies:
|
||||
The Netty Project
|
||||
Copyright 2011 The Netty Project
|
||||
|
||||
(ASLv2) Apache Tomcat
|
||||
The following NOTICE information applies:
|
||||
Apache Tomcat
|
||||
Copyright 2007 The Apache Software Foundation
|
||||
|
||||
Java Management Extensions (JMX) support is provided by
|
||||
the MX4J package, which is open source software. The
|
||||
original software and related information is available
|
||||
at http://mx4j.sourceforge.net.
|
||||
|
||||
Java compilation software for JSP pages is provided by Eclipse,
|
||||
which is open source software. The orginal software and
|
||||
related infomation is available at
|
||||
http://www.eclipse.org.
|
||||
|
||||
(ASLv2) Apache ZooKeeper
|
||||
The following NOTICE information applies:
|
||||
Apache ZooKeeper
|
||||
Copyright 2009-2012 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Google GSON
|
||||
The following NOTICE information applies:
|
||||
Copyright 2008 Google Inc.
|
||||
|
||||
(ASLv2) Groovy (org.codehaus.groovy:groovy-all:jar:2.1.6 - http://www.groovy-lang.org)
|
||||
The following NOTICE information applies:
|
||||
Groovy Language
|
||||
Copyright 2003-2012 The respective authors and developers
|
||||
Developers and Contributors are listed in the project POM file
|
||||
and Gradle build file
|
||||
|
||||
This product includes software developed by
|
||||
The Groovy community (http://groovy.codehaus.org/).
|
||||
|
||||
(ASLv2) JPam
|
||||
The following NOTICE information applies:
|
||||
Copyright 2003-2006 Greg Luck
|
||||
|
||||
************************
|
||||
Common Development and Distribution License 1.1
|
||||
************************
|
||||
|
||||
The following binary components are provided under the Common Development and Distribution License 1.1. See project link for details.
|
||||
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.9 - https://jersey.java.net)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) JavaMail API (compat) (javax.mail:mail:jar:1.4.7 - http://kenai.com/projects/javamail/mail)
|
||||
|
||||
|
||||
************************
|
||||
Common Development and Distribution License 1.0
|
||||
************************
|
||||
|
||||
The following binary components are provided under the Common Development and Distribution License 1.0. See project link for details.
|
||||
|
||||
(CDDL 1.0) JavaServlet(TM) Specification (javax.servlet:servlet-api:jar:2.5 - no url available)
|
||||
(CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided)
|
||||
(CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp)
|
||||
(CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net)
|
||||
|
||||
*****************
|
||||
Public Domain
|
||||
*****************
|
||||
|
||||
The following binary components are provided to the 'Public Domain'. See project link for details.
|
||||
|
||||
(Public Domain) AOP Alliance 1.0 (http://aopalliance.sourceforge.net/)
|
|
@ -1,225 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-bundle</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-hive-processors</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
|
||||
<hadoop.version>${hive12.hadoop.version}</hadoop.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-api</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-put-pattern</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-security-kerberos</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-dbcp-service-api</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-services-api</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-kerberos-credentials-service-api</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive12.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>apache-log4j-extras</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive.hcatalog</groupId>
|
||||
<artifactId>hive-hcatalog-streaming</artifactId>
|
||||
<version>${hive12.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>apache-log4j-extras</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- Override groovy-all:2.1.6 from Hive -->
|
||||
<dependency>
|
||||
<groupId>org.codehaus.groovy</groupId>
|
||||
<artifactId>groovy-all</artifactId>
|
||||
<version>2.4.21</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive.hcatalog</groupId>
|
||||
<artifactId>hive-hcatalog-core</artifactId>
|
||||
<version>${hive12.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.google.code.findbugs</groupId>
|
||||
<artifactId>jsr305</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hadoop-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hadoop-record-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-record-serialization-service-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-record</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.stephenc.findbugs</groupId>
|
||||
<artifactId>findbugs-annotations</artifactId>
|
||||
<version>1.3.9-1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-text</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-dbcp2</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>log4j-over-slf4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>jcl-over-slf4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-mock</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-mock-record-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.xerial.snappy</groupId>
|
||||
<artifactId>snappy-java</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,612 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hive.ql.io.orc;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.util.Utf8;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.common.type.HiveDecimal;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
|
||||
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
|
||||
import org.apache.hadoop.io.BooleanWritable;
|
||||
import org.apache.hadoop.io.BytesWritable;
|
||||
import org.apache.hadoop.io.DoubleWritable;
|
||||
import org.apache.hadoop.io.FloatWritable;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.nifi.serialization.record.DataType;
|
||||
import org.apache.nifi.serialization.record.RecordField;
|
||||
import org.apache.nifi.serialization.record.RecordFieldType;
|
||||
import org.apache.nifi.serialization.record.type.ArrayDataType;
|
||||
import org.apache.nifi.serialization.record.type.ChoiceDataType;
|
||||
import org.apache.nifi.serialization.record.type.MapDataType;
|
||||
import org.apache.nifi.serialization.record.type.RecordDataType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING;
|
||||
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE;
|
||||
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE;
|
||||
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT;
|
||||
|
||||
/**
|
||||
* Utility methods for ORC support (conversion from Avro, conversion to Hive types, e.g.
|
||||
*/
|
||||
public class NiFiOrcUtils {
|
||||
|
||||
public static Object convertToORCObject(TypeInfo typeInfo, Object o) {
|
||||
if (o != null) {
|
||||
if (typeInfo instanceof UnionTypeInfo) {
|
||||
OrcUnion union = new OrcUnion();
|
||||
// Avro uses Utf8 and GenericData.EnumSymbol objects instead of Strings. This is handled in other places in the method, but here
|
||||
// we need to determine the union types from the objects, so choose String.class if the object is one of those Avro classes
|
||||
Class clazzToCompareTo = o.getClass();
|
||||
if (o instanceof org.apache.avro.util.Utf8 || o instanceof GenericData.EnumSymbol) {
|
||||
clazzToCompareTo = String.class;
|
||||
}
|
||||
// Need to find which of the union types correspond to the primitive object
|
||||
TypeInfo objectTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(
|
||||
ObjectInspectorFactory.getReflectionObjectInspector(clazzToCompareTo, ObjectInspectorFactory.ObjectInspectorOptions.JAVA));
|
||||
List<TypeInfo> unionTypeInfos = ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos();
|
||||
|
||||
int index = 0;
|
||||
while (index < unionTypeInfos.size() && !unionTypeInfos.get(index).equals(objectTypeInfo)) {
|
||||
index++;
|
||||
}
|
||||
if (index < unionTypeInfos.size()) {
|
||||
union.set((byte) index, convertToORCObject(objectTypeInfo, o));
|
||||
} else {
|
||||
throw new IllegalArgumentException("Object Type for class " + o.getClass().getName() + " not in Union declaration");
|
||||
}
|
||||
return union;
|
||||
}
|
||||
if (o instanceof Integer) {
|
||||
return new IntWritable((int) o);
|
||||
}
|
||||
if (o instanceof Boolean) {
|
||||
return new BooleanWritable((boolean) o);
|
||||
}
|
||||
if (o instanceof Long) {
|
||||
return new LongWritable((long) o);
|
||||
}
|
||||
if (o instanceof Float) {
|
||||
return new FloatWritable((float) o);
|
||||
}
|
||||
if (o instanceof Double) {
|
||||
return new DoubleWritable((double) o);
|
||||
}
|
||||
if (o instanceof BigDecimal) {
|
||||
return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) o));
|
||||
}
|
||||
if (o instanceof String || o instanceof Utf8 || o instanceof GenericData.EnumSymbol) {
|
||||
return new Text(o.toString());
|
||||
}
|
||||
if (o instanceof ByteBuffer && typeInfo instanceof DecimalTypeInfo) {
|
||||
ByteBuffer buffer = (ByteBuffer) o;
|
||||
return new HiveDecimalWritable(buffer.array(), ((DecimalTypeInfo) typeInfo).scale());
|
||||
}
|
||||
if (o instanceof ByteBuffer) {
|
||||
return new BytesWritable(((ByteBuffer) o).array());
|
||||
}
|
||||
if (o instanceof int[]) {
|
||||
int[] intArray = (int[]) o;
|
||||
return Arrays.stream(intArray)
|
||||
.mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("int"), element))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
if (o instanceof long[]) {
|
||||
long[] longArray = (long[]) o;
|
||||
return Arrays.stream(longArray)
|
||||
.mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("bigint"), element))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
if (o instanceof float[]) {
|
||||
float[] floatArray = (float[]) o;
|
||||
return IntStream.range(0, floatArray.length)
|
||||
.mapToDouble(i -> floatArray[i])
|
||||
.mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("float"), (float) element))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
if (o instanceof double[]) {
|
||||
double[] doubleArray = (double[]) o;
|
||||
return Arrays.stream(doubleArray)
|
||||
.mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("double"), element))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
if (o instanceof boolean[]) {
|
||||
boolean[] booleanArray = (boolean[]) o;
|
||||
return IntStream.range(0, booleanArray.length)
|
||||
.map(i -> booleanArray[i] ? 1 : 0)
|
||||
.mapToObj((element) -> convertToORCObject(TypeInfoFactory.getPrimitiveTypeInfo("boolean"), element == 1))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
if (o instanceof GenericData.Array) {
|
||||
GenericData.Array array = ((GenericData.Array) o);
|
||||
// The type information in this case is interpreted as a List
|
||||
TypeInfo listTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo();
|
||||
return array.stream().map((element) -> convertToORCObject(listTypeInfo, element)).collect(Collectors.toList());
|
||||
}
|
||||
if (o instanceof List) {
|
||||
return o;
|
||||
}
|
||||
if (o instanceof Map) {
|
||||
Map map = new HashMap();
|
||||
TypeInfo keyInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
|
||||
TypeInfo valueInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
|
||||
// Unions are not allowed as key/value types, so if we convert the key and value objects,
|
||||
// they should return Writable objects
|
||||
((Map) o).forEach((key, value) -> {
|
||||
Object keyObject = convertToORCObject(keyInfo, key);
|
||||
Object valueObject = convertToORCObject(valueInfo, value);
|
||||
if (keyObject == null) {
|
||||
throw new IllegalArgumentException("Maps' key cannot be null");
|
||||
}
|
||||
map.put(keyObject, valueObject);
|
||||
});
|
||||
return map;
|
||||
}
|
||||
if (o instanceof GenericData.Record) {
|
||||
GenericData.Record record = (GenericData.Record) o;
|
||||
TypeInfo recordSchema = NiFiOrcUtils.getOrcField(record.getSchema());
|
||||
List<Schema.Field> recordFields = record.getSchema().getFields();
|
||||
if (recordFields != null) {
|
||||
Object[] fieldObjects = new Object[recordFields.size()];
|
||||
for (int i = 0; i < recordFields.size(); i++) {
|
||||
Schema.Field field = recordFields.get(i);
|
||||
Schema fieldSchema = field.schema();
|
||||
Object fieldObject = record.get(field.name());
|
||||
fieldObjects[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), fieldObject);
|
||||
}
|
||||
return NiFiOrcUtils.createOrcStruct(recordSchema, fieldObjects);
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Error converting object of type " + o.getClass().getName() + " to ORC type " + typeInfo.getTypeName());
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create an object of OrcStruct given a TypeInfo and a list of objects
|
||||
*
|
||||
* @param typeInfo The TypeInfo object representing the ORC record schema
|
||||
* @param objs ORC objects/Writables
|
||||
* @return an OrcStruct containing the specified objects for the specified schema
|
||||
*/
|
||||
public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) {
|
||||
SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct
|
||||
.createObjectInspector(typeInfo);
|
||||
List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs();
|
||||
OrcStruct result = (OrcStruct) oi.create();
|
||||
result.setNumFields(fields.size());
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
oi.setStructFieldData(result, fields.get(i), objs[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static String normalizeHiveTableName(String name) {
|
||||
return name.replaceAll("[\\. ]", "_");
|
||||
}
|
||||
|
||||
public static String generateHiveDDL(Schema avroSchema, String tableName) {
|
||||
Schema.Type schemaType = avroSchema.getType();
|
||||
StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
|
||||
sb.append(tableName);
|
||||
sb.append(" (");
|
||||
if (Schema.Type.RECORD.equals(schemaType)) {
|
||||
List<String> hiveColumns = new ArrayList<>();
|
||||
List<Schema.Field> fields = avroSchema.getFields();
|
||||
if (fields != null) {
|
||||
hiveColumns.addAll(
|
||||
fields.stream().map(field -> field.name() + " " + getHiveTypeFromAvroType(field.schema())).collect(Collectors.toList()));
|
||||
}
|
||||
sb.append(StringUtils.join(hiveColumns, ", "));
|
||||
sb.append(") STORED AS ORC");
|
||||
return sb.toString();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Avro schema is of type " + schemaType.getName() + ", not RECORD");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static TypeInfo getOrcField(Schema fieldSchema) throws IllegalArgumentException {
|
||||
Schema.Type fieldType = fieldSchema.getType();
|
||||
|
||||
switch (fieldType) {
|
||||
case INT:
|
||||
case LONG:
|
||||
case BOOLEAN:
|
||||
case DOUBLE:
|
||||
case FLOAT:
|
||||
case STRING:
|
||||
case NULL:
|
||||
return getPrimitiveOrcTypeFromPrimitiveAvroType(fieldType);
|
||||
case BYTES:
|
||||
if (isLogicalType(fieldSchema)){
|
||||
return getLogicalTypeInfo(fieldSchema);
|
||||
} else {
|
||||
return getPrimitiveOrcTypeFromPrimitiveAvroType(fieldType);
|
||||
}
|
||||
case UNION:
|
||||
List<Schema> unionFieldSchemas = fieldSchema.getTypes();
|
||||
|
||||
if (unionFieldSchemas != null) {
|
||||
// Ignore null types in union
|
||||
List<TypeInfo> orcFields = unionFieldSchemas.stream().filter(
|
||||
unionFieldSchema -> !Schema.Type.NULL.equals(unionFieldSchema.getType()))
|
||||
.map(NiFiOrcUtils::getOrcField)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// Flatten the field if the union only has one non-null element
|
||||
if (orcFields.size() == 1) {
|
||||
return orcFields.get(0);
|
||||
} else {
|
||||
return TypeInfoFactory.getUnionTypeInfo(orcFields);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
||||
case ARRAY:
|
||||
return TypeInfoFactory.getListTypeInfo(getOrcField(fieldSchema.getElementType()));
|
||||
|
||||
case MAP:
|
||||
return TypeInfoFactory.getMapTypeInfo(
|
||||
getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type.STRING),
|
||||
getOrcField(fieldSchema.getValueType()));
|
||||
|
||||
case RECORD:
|
||||
List<Schema.Field> avroFields = fieldSchema.getFields();
|
||||
if (avroFields != null) {
|
||||
List<String> orcFieldNames = new ArrayList<>(avroFields.size());
|
||||
List<TypeInfo> orcFields = new ArrayList<>(avroFields.size());
|
||||
avroFields.forEach(avroField -> {
|
||||
String fieldName = avroField.name();
|
||||
orcFieldNames.add(fieldName);
|
||||
orcFields.add(getOrcField(avroField.schema()));
|
||||
});
|
||||
return TypeInfoFactory.getStructTypeInfo(orcFieldNames, orcFields);
|
||||
}
|
||||
return null;
|
||||
|
||||
case ENUM:
|
||||
// An enum value is just a String for ORC/Hive
|
||||
return getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type.STRING);
|
||||
|
||||
default:
|
||||
throw new IllegalArgumentException("Did not recognize Avro type " + fieldType.getName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static boolean isLogicalType(Schema schema){
|
||||
return schema.getProp("logicalType") != null;
|
||||
}
|
||||
|
||||
private static TypeInfo getLogicalTypeInfo(Schema schema){
|
||||
String type = schema.getProp("logicalType");
|
||||
switch (type){
|
||||
case "decimal":
|
||||
int precision = schema.getObjectProp("precision") != null
|
||||
? Integer.valueOf(schema.getObjectProp("precision").toString())
|
||||
: 10;
|
||||
int scale = schema.getObjectProp("scale") != null
|
||||
? Integer.valueOf(schema.getObjectProp("scale").toString())
|
||||
: 2;
|
||||
return new DecimalTypeInfo(precision, scale);
|
||||
}
|
||||
throw new IllegalArgumentException("Logical type " + type + " is not supported!");
|
||||
}
|
||||
|
||||
public static Schema.Type getAvroSchemaTypeOfObject(Object o) {
|
||||
if (o == null) {
|
||||
return Schema.Type.NULL;
|
||||
} else if (o instanceof Integer) {
|
||||
return Schema.Type.INT;
|
||||
} else if (o instanceof Long) {
|
||||
return Schema.Type.LONG;
|
||||
} else if (o instanceof Boolean) {
|
||||
return Schema.Type.BOOLEAN;
|
||||
} else if (o instanceof byte[]) {
|
||||
return Schema.Type.BYTES;
|
||||
} else if (o instanceof Float) {
|
||||
return Schema.Type.FLOAT;
|
||||
} else if (o instanceof Double) {
|
||||
return Schema.Type.DOUBLE;
|
||||
} else if (o instanceof Enum) {
|
||||
return Schema.Type.ENUM;
|
||||
} else if (o instanceof Object[]) {
|
||||
return Schema.Type.ARRAY;
|
||||
} else if (o instanceof List) {
|
||||
return Schema.Type.ARRAY;
|
||||
} else if (o instanceof Map) {
|
||||
return Schema.Type.MAP;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Object of class " + o.getClass() + " is not a supported Avro Type");
|
||||
}
|
||||
}
|
||||
|
||||
public static TypeInfo getPrimitiveOrcTypeFromPrimitiveAvroType(Schema.Type avroType) throws IllegalArgumentException {
|
||||
if (avroType == null) {
|
||||
throw new IllegalArgumentException("Avro type is null");
|
||||
}
|
||||
switch (avroType) {
|
||||
case INT:
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("int");
|
||||
case LONG:
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
|
||||
case BOOLEAN:
|
||||
case NULL: // ORC has no null type, so just pick the smallest. All values are necessarily null.
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
|
||||
case BYTES:
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("binary");
|
||||
case DOUBLE:
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("double");
|
||||
case FLOAT:
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("float");
|
||||
case STRING:
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("string");
|
||||
default:
|
||||
throw new IllegalArgumentException("Avro type " + avroType.getName() + " is not a primitive type");
|
||||
}
|
||||
}
|
||||
|
||||
public static String getHiveTypeFromAvroType(Schema avroSchema) {
|
||||
if (avroSchema == null) {
|
||||
throw new IllegalArgumentException("Avro schema is null");
|
||||
}
|
||||
|
||||
Schema.Type avroType = avroSchema.getType();
|
||||
|
||||
switch (avroType) {
|
||||
case INT:
|
||||
return "INT";
|
||||
case LONG:
|
||||
return "BIGINT";
|
||||
case BOOLEAN:
|
||||
case NULL: // Hive has no null type, we picked boolean as the ORC type so use it for Hive DDL too. All values are necessarily null.
|
||||
return "BOOLEAN";
|
||||
case BYTES:
|
||||
if (isLogicalType(avroSchema)){
|
||||
return getLogicalTypeInfo(avroSchema).toString().toUpperCase();
|
||||
} else {
|
||||
return "BINARY";
|
||||
}
|
||||
case DOUBLE:
|
||||
return "DOUBLE";
|
||||
case FLOAT:
|
||||
return "FLOAT";
|
||||
case STRING:
|
||||
case ENUM:
|
||||
return "STRING";
|
||||
case UNION:
|
||||
List<Schema> unionFieldSchemas = avroSchema.getTypes();
|
||||
if (unionFieldSchemas != null) {
|
||||
List<String> hiveFields = new ArrayList<>();
|
||||
for (Schema unionFieldSchema : unionFieldSchemas) {
|
||||
Schema.Type unionFieldSchemaType = unionFieldSchema.getType();
|
||||
// Ignore null types in union
|
||||
if (!Schema.Type.NULL.equals(unionFieldSchemaType)) {
|
||||
hiveFields.add(getHiveTypeFromAvroType(unionFieldSchema));
|
||||
}
|
||||
}
|
||||
// Flatten the field if the union only has one non-null element
|
||||
return (hiveFields.size() == 1)
|
||||
? hiveFields.get(0)
|
||||
: "UNIONTYPE<" + StringUtils.join(hiveFields, ", ") + ">";
|
||||
|
||||
}
|
||||
break;
|
||||
case MAP:
|
||||
return "MAP<STRING, " + getHiveTypeFromAvroType(avroSchema.getValueType()) + ">";
|
||||
case ARRAY:
|
||||
return "ARRAY<" + getHiveTypeFromAvroType(avroSchema.getElementType()) + ">";
|
||||
case RECORD:
|
||||
List<Schema.Field> recordFields = avroSchema.getFields();
|
||||
if (recordFields != null) {
|
||||
List<String> hiveFields = recordFields.stream().map(
|
||||
recordField -> recordField.name() + ":" + getHiveTypeFromAvroType(recordField.schema())).collect(Collectors.toList());
|
||||
return "STRUCT<" + StringUtils.join(hiveFields, ", ") + ">";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Error converting Avro type " + avroType.getName() + " to Hive type");
|
||||
}
|
||||
|
||||
public static String getHiveTypeFromFieldType(DataType rawDataType, boolean hiveFieldNames) {
|
||||
if (rawDataType == null) {
|
||||
throw new IllegalArgumentException("Field type is null");
|
||||
}
|
||||
RecordFieldType dataType = rawDataType.getFieldType();
|
||||
|
||||
if (RecordFieldType.INT.equals(dataType)) {
|
||||
return "INT";
|
||||
}
|
||||
if (RecordFieldType.LONG.equals(dataType)) {
|
||||
return "BIGINT";
|
||||
}
|
||||
if (RecordFieldType.BOOLEAN.equals(dataType)) {
|
||||
return "BOOLEAN";
|
||||
}
|
||||
if (RecordFieldType.DOUBLE.equals(dataType)) {
|
||||
return "DOUBLE";
|
||||
}
|
||||
if (RecordFieldType.FLOAT.equals(dataType)) {
|
||||
return "FLOAT";
|
||||
}
|
||||
if (RecordFieldType.DECIMAL.equals(dataType)) {
|
||||
return "DECIMAL";
|
||||
}
|
||||
if (RecordFieldType.STRING.equals(dataType) || RecordFieldType.ENUM.equals(dataType)) {
|
||||
return "STRING";
|
||||
}
|
||||
if (RecordFieldType.DATE.equals(dataType)) {
|
||||
return "DATE";
|
||||
}
|
||||
if (RecordFieldType.TIME.equals(dataType)) {
|
||||
return "INT";
|
||||
}
|
||||
if (RecordFieldType.TIMESTAMP.equals(dataType)) {
|
||||
return "TIMESTAMP";
|
||||
}
|
||||
if (RecordFieldType.ARRAY.equals(dataType)) {
|
||||
ArrayDataType arrayDataType = (ArrayDataType) rawDataType;
|
||||
if (RecordFieldType.BYTE.getDataType().equals(arrayDataType.getElementType())) {
|
||||
return "BINARY";
|
||||
}
|
||||
return "ARRAY<" + getHiveTypeFromFieldType(arrayDataType.getElementType(), hiveFieldNames) + ">";
|
||||
}
|
||||
if (RecordFieldType.MAP.equals(dataType)) {
|
||||
MapDataType mapDataType = (MapDataType) rawDataType;
|
||||
return "MAP<STRING, " + getHiveTypeFromFieldType(mapDataType.getValueType(), hiveFieldNames) + ">";
|
||||
}
|
||||
if (RecordFieldType.CHOICE.equals(dataType)) {
|
||||
ChoiceDataType choiceDataType = (ChoiceDataType) rawDataType;
|
||||
List<DataType> unionFieldSchemas = choiceDataType.getPossibleSubTypes();
|
||||
|
||||
if (unionFieldSchemas != null) {
|
||||
// Ignore null types in union
|
||||
List<String> hiveFields = unionFieldSchemas.stream()
|
||||
.map((it) -> getHiveTypeFromFieldType(it, hiveFieldNames))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// Flatten the field if the union only has one non-null element
|
||||
return (hiveFields.size() == 1)
|
||||
? hiveFields.get(0)
|
||||
: "UNIONTYPE<" + StringUtils.join(hiveFields, ", ") + ">";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (RecordFieldType.RECORD.equals(dataType)) {
|
||||
RecordDataType recordDataType = (RecordDataType) rawDataType;
|
||||
List<RecordField> recordFields = recordDataType.getChildSchema().getFields();
|
||||
if (recordFields != null) {
|
||||
List<String> hiveFields = recordFields.stream().map(
|
||||
recordField -> ("`" + (hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName()) + "`:"
|
||||
+ getHiveTypeFromFieldType(recordField.getDataType(), hiveFieldNames))).collect(Collectors.toList());
|
||||
return "STRUCT<" + StringUtils.join(hiveFields, ", ") + ">";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Error converting Avro type " + dataType.name() + " to Hive type");
|
||||
}
|
||||
|
||||
|
||||
public static OrcFlowFileWriter createWriter(OutputStream flowFileOutputStream,
|
||||
Path path,
|
||||
Configuration conf,
|
||||
TypeInfo orcSchema,
|
||||
long stripeSize,
|
||||
CompressionKind compress,
|
||||
int bufferSize) throws IOException {
|
||||
|
||||
int rowIndexStride = HiveConf.getIntVar(conf, HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE);
|
||||
|
||||
boolean addBlockPadding = HiveConf.getBoolVar(conf, HIVE_ORC_DEFAULT_BLOCK_PADDING);
|
||||
|
||||
String versionName = HiveConf.getVar(conf, HIVE_ORC_WRITE_FORMAT);
|
||||
OrcFile.Version versionValue = (versionName == null)
|
||||
? OrcFile.Version.CURRENT
|
||||
: OrcFile.Version.byName(versionName);
|
||||
|
||||
OrcFile.EncodingStrategy encodingStrategy;
|
||||
String enString = conf.get(HiveConf.ConfVars.HIVE_ORC_ENCODING_STRATEGY.varname);
|
||||
if (enString == null) {
|
||||
encodingStrategy = OrcFile.EncodingStrategy.SPEED;
|
||||
} else {
|
||||
encodingStrategy = OrcFile.EncodingStrategy.valueOf(enString);
|
||||
}
|
||||
|
||||
OrcFile.CompressionStrategy compressionStrategy;
|
||||
String compString = conf.get(HiveConf.ConfVars.HIVE_ORC_COMPRESSION_STRATEGY.varname);
|
||||
if (compString == null) {
|
||||
compressionStrategy = OrcFile.CompressionStrategy.SPEED;
|
||||
} else {
|
||||
compressionStrategy = OrcFile.CompressionStrategy.valueOf(compString);
|
||||
}
|
||||
|
||||
float paddingTolerance;
|
||||
paddingTolerance = conf.getFloat(HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.varname,
|
||||
HiveConf.ConfVars.HIVE_ORC_BLOCK_PADDING_TOLERANCE.defaultFloatVal);
|
||||
|
||||
long blockSizeValue = HiveConf.getLongVar(conf, HIVE_ORC_DEFAULT_BLOCK_SIZE);
|
||||
|
||||
double bloomFilterFpp = BloomFilterIO.DEFAULT_FPP;
|
||||
|
||||
ObjectInspector inspector = OrcStruct.createObjectInspector(orcSchema);
|
||||
|
||||
return new OrcFlowFileWriter(flowFileOutputStream,
|
||||
path,
|
||||
conf,
|
||||
inspector,
|
||||
stripeSize,
|
||||
compress,
|
||||
bufferSize,
|
||||
rowIndexStride,
|
||||
getMemoryManager(conf),
|
||||
addBlockPadding,
|
||||
versionValue,
|
||||
null, // no callback
|
||||
encodingStrategy,
|
||||
compressionStrategy,
|
||||
paddingTolerance,
|
||||
blockSizeValue,
|
||||
null, // no Bloom Filter column names
|
||||
bloomFilterFpp);
|
||||
}
|
||||
|
||||
private static MemoryManager memoryManager = null;
|
||||
|
||||
private static synchronized MemoryManager getMemoryManager(Configuration conf) {
|
||||
if (memoryManager == null) {
|
||||
memoryManager = new MemoryManager(conf);
|
||||
}
|
||||
return memoryManager;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,459 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.dbcp.hive;
|
||||
|
||||
import org.apache.commons.dbcp2.BasicDataSource;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hive.jdbc.HiveDriver;
|
||||
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnDisabled;
|
||||
import org.apache.nifi.annotation.lifecycle.OnEnabled;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.components.PropertyValue;
|
||||
import org.apache.nifi.components.ValidationContext;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.components.resource.ResourceCardinality;
|
||||
import org.apache.nifi.components.resource.ResourceType;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.controller.ConfigurationContext;
|
||||
import org.apache.nifi.controller.ControllerServiceInitializationContext;
|
||||
import org.apache.nifi.dbcp.DBCPValidator;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.hadoop.KerberosProperties;
|
||||
import org.apache.nifi.hadoop.SecurityUtil;
|
||||
import org.apache.nifi.kerberos.KerberosCredentialsService;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.security.krb.KerberosKeytabUser;
|
||||
import org.apache.nifi.security.krb.KerberosLoginException;
|
||||
import org.apache.nifi.security.krb.KerberosPasswordUser;
|
||||
import org.apache.nifi.security.krb.KerberosUser;
|
||||
import org.apache.nifi.util.hive.AuthenticationFailedException;
|
||||
import org.apache.nifi.util.hive.HiveConfigurator;
|
||||
import org.apache.nifi.util.hive.ValidationResources;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.UndeclaredThrowableException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* Implementation for Database Connection Pooling Service used for Apache Hive
|
||||
* connections. Apache DBCP is used for connection pooling functionality.
|
||||
*/
|
||||
@RequiresInstanceClassLoading
|
||||
@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"})
|
||||
@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive. Connections can be asked from pool and returned after usage.")
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.dbcp.hive.Hive3ConnectionPool")
|
||||
public class HiveConnectionPool extends AbstractControllerService implements HiveDBCPService {
|
||||
private static final String ALLOW_EXPLICIT_KEYTAB = "NIFI_ALLOW_EXPLICIT_KEYTAB";
|
||||
|
||||
private static final String DEFAULT_MAX_CONN_LIFETIME = "-1";
|
||||
|
||||
public static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder()
|
||||
.name("hive-db-connect-url")
|
||||
.displayName("Database Connection URL")
|
||||
.description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters."
|
||||
+ " The exact syntax of a database connection URL is specified by the Hive documentation. For example, the server principal is often included "
|
||||
+ "as a connection parameter when connecting to a secure Hive server.")
|
||||
.defaultValue(null)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder()
|
||||
.name("hive-config-resources")
|
||||
.displayName("Hive Configuration Resources")
|
||||
.description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop "
|
||||
+ "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication "
|
||||
+ "with Kerberos e.g., the appropriate properties must be set in the configuration files. Please see the Hive documentation for more details.")
|
||||
.required(false)
|
||||
.identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder()
|
||||
.name("hive-db-user")
|
||||
.displayName("Database User")
|
||||
.description("Database user name")
|
||||
.defaultValue(null)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor DB_PASSWORD = new PropertyDescriptor.Builder()
|
||||
.name("hive-db-password")
|
||||
.displayName("Password")
|
||||
.description("The password for the database user")
|
||||
.defaultValue(null)
|
||||
.required(false)
|
||||
.sensitive(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_WAIT_TIME = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-wait-time")
|
||||
.displayName("Max Wait Time")
|
||||
.description("The maximum amount of time that the pool will wait (when there are no available connections) "
|
||||
+ " for a connection to be returned before failing, or -1 to wait indefinitely. ")
|
||||
.defaultValue("500 millis")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_TOTAL_CONNECTIONS = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-total-connections")
|
||||
.displayName("Max Total Connections")
|
||||
.description("The maximum number of active connections that can be allocated from this pool at the same time, "
|
||||
+ "or negative for no limit.")
|
||||
.defaultValue("8")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_CONN_LIFETIME = new PropertyDescriptor.Builder()
|
||||
.displayName("Max Connection Lifetime")
|
||||
.name("hive-max-conn-lifetime")
|
||||
.description("The maximum lifetime in milliseconds of a connection. After this time is exceeded the " +
|
||||
"connection pool will invalidate the connection. A value of zero or -1 " +
|
||||
"means the connection has an infinite lifetime.")
|
||||
.defaultValue(DEFAULT_MAX_CONN_LIFETIME)
|
||||
.required(true)
|
||||
.addValidator(DBCPValidator.CUSTOM_TIME_PERIOD_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor VALIDATION_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("Validation-query")
|
||||
.displayName("Validation query")
|
||||
.description("Validation query used to validate connections before returning them. "
|
||||
+ "When a borrowed connection is invalid, it gets dropped and a new valid connection will be returned. "
|
||||
+ "NOTE: Using validation may have a performance penalty.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder()
|
||||
.name("kerberos-credentials-service")
|
||||
.displayName("Kerberos Credentials Service")
|
||||
.description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos")
|
||||
.identifiesControllerService(KerberosCredentialsService.class)
|
||||
.required(false)
|
||||
.build();
|
||||
|
||||
|
||||
private List<PropertyDescriptor> properties;
|
||||
|
||||
private String connectionUrl = "unknown";
|
||||
|
||||
// Holder of cached Configuration information so validation does not reload the same config over and over
|
||||
private final AtomicReference<ValidationResources> validationResourceHolder = new AtomicReference<>();
|
||||
|
||||
private volatile BasicDataSource dataSource;
|
||||
|
||||
private volatile HiveConfigurator hiveConfigurator = new HiveConfigurator();
|
||||
private volatile UserGroupInformation ugi;
|
||||
private final AtomicReference<KerberosUser> kerberosUserReference = new AtomicReference<>();
|
||||
private volatile File kerberosConfigFile = null;
|
||||
private volatile KerberosProperties kerberosProperties;
|
||||
|
||||
@Override
|
||||
protected void init(final ControllerServiceInitializationContext context) {
|
||||
List<PropertyDescriptor> props = new ArrayList<>();
|
||||
props.add(DATABASE_URL);
|
||||
props.add(HIVE_CONFIGURATION_RESOURCES);
|
||||
props.add(DB_USER);
|
||||
props.add(DB_PASSWORD);
|
||||
props.add(MAX_WAIT_TIME);
|
||||
props.add(MAX_TOTAL_CONNECTIONS);
|
||||
props.add(MAX_CONN_LIFETIME);
|
||||
props.add(VALIDATION_QUERY);
|
||||
props.add(KERBEROS_CREDENTIALS_SERVICE);
|
||||
|
||||
kerberosConfigFile = context.getKerberosConfigurationFile();
|
||||
kerberosProperties = new KerberosProperties(kerberosConfigFile);
|
||||
props.add(kerberosProperties.getKerberosPrincipal());
|
||||
props.add(kerberosProperties.getKerberosKeytab());
|
||||
props.add(kerberosProperties.getKerberosPassword());
|
||||
properties = props;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
|
||||
boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet();
|
||||
|
||||
final List<ValidationResult> problems = new ArrayList<>();
|
||||
|
||||
if (confFileProvided) {
|
||||
final String explicitPrincipal = validationContext.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue();
|
||||
final String explicitKeytab = validationContext.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue();
|
||||
final String explicitPassword = validationContext.getProperty(kerberosProperties.getKerberosPassword()).getValue();
|
||||
final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
|
||||
|
||||
final String resolvedPrincipal;
|
||||
final String resolvedKeytab;
|
||||
if (credentialsService != null) {
|
||||
resolvedPrincipal = credentialsService.getPrincipal();
|
||||
resolvedKeytab = credentialsService.getKeytab();
|
||||
} else {
|
||||
resolvedPrincipal = explicitPrincipal;
|
||||
resolvedKeytab = explicitKeytab;
|
||||
}
|
||||
|
||||
|
||||
final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
|
||||
problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, explicitPassword, validationResourceHolder, getLogger()));
|
||||
|
||||
if (credentialsService != null && (explicitPrincipal != null || explicitKeytab != null || explicitPassword != null)) {
|
||||
problems.add(new ValidationResult.Builder()
|
||||
.subject("Kerberos Credentials")
|
||||
.valid(false)
|
||||
.explanation("Cannot specify a Kerberos Credentials Service while also specifying a Kerberos Principal, Kerberos Keytab, or Kerberos Password")
|
||||
.build());
|
||||
}
|
||||
|
||||
if (!isAllowExplicitKeytab() && explicitKeytab != null) {
|
||||
problems.add(new ValidationResult.Builder()
|
||||
.subject("Kerberos Credentials")
|
||||
.valid(false)
|
||||
.explanation("The '" + ALLOW_EXPLICIT_KEYTAB + "' system environment variable is configured to forbid explicitly configuring Kerberos Keytab in processors. "
|
||||
+ "The Kerberos Credentials Service should be used instead of setting the Kerberos Keytab or Kerberos Principal property.")
|
||||
.build());
|
||||
}
|
||||
}
|
||||
|
||||
return problems;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures connection pool by creating an instance of the
|
||||
* {@link BasicDataSource} based on configuration provided with
|
||||
* {@link ConfigurationContext}.
|
||||
* <p>
|
||||
* This operation makes no guarantees that the actual connection could be
|
||||
* made since the underlying system may still go off-line during normal
|
||||
* operation of the connection pool.
|
||||
* <p/>
|
||||
* As of Apache NiFi 1.5.0, due to changes made to
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this class invoking
|
||||
* {@link HiveConfigurator#authenticate(Configuration, String, String)}
|
||||
* to authenticate a principal with Kerberos, Hive controller services no longer use a separate thread to
|
||||
* relogin, and instead call {@link UserGroupInformation#checkTGTAndReloginFromKeytab()} from
|
||||
* {@link HiveConnectionPool#getConnection()}. The relogin request is performed in a synchronized block to prevent
|
||||
* threads from requesting concurrent relogins. For more information, please read the documentation for
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
|
||||
* <p/>
|
||||
* In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
|
||||
* {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
|
||||
* controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions
|
||||
* with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
|
||||
* {@link UserGroupInformation} instance. One of these threads could leave the
|
||||
* {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
|
||||
* while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
|
||||
* authentication attempts that would leave the Hive controller service in an unrecoverable state.
|
||||
*
|
||||
* @see SecurityUtil#loginKerberos(Configuration, String, String)
|
||||
* @see HiveConfigurator#authenticate(Configuration, String, String)
|
||||
* @see HiveConfigurator#authenticate(Configuration, String, String, long)
|
||||
* @param context the configuration context
|
||||
* @throws InitializationException if unable to create a database connection
|
||||
*/
|
||||
@OnEnabled
|
||||
public void onConfigured(final ConfigurationContext context) throws InitializationException {
|
||||
|
||||
ComponentLog log = getLogger();
|
||||
|
||||
final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
|
||||
final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles);
|
||||
final String validationQuery = context.getProperty(VALIDATION_QUERY).evaluateAttributeExpressions().getValue();
|
||||
|
||||
// add any dynamic properties to the Hive configuration
|
||||
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
|
||||
final PropertyDescriptor descriptor = entry.getKey();
|
||||
if (descriptor.isDynamic()) {
|
||||
hiveConfig.set(descriptor.getName(), context.getProperty(descriptor).evaluateAttributeExpressions().getValue());
|
||||
}
|
||||
}
|
||||
|
||||
final String drv = HiveDriver.class.getName();
|
||||
if (SecurityUtil.isSecurityEnabled(hiveConfig)) {
|
||||
final String explicitPrincipal = context.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue();
|
||||
final String explicitKeytab = context.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue();
|
||||
final String explicitPassword = context.getProperty(kerberosProperties.getKerberosPassword()).getValue();
|
||||
final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
|
||||
|
||||
final String resolvedPrincipal;
|
||||
final String resolvedKeytab;
|
||||
if (credentialsService != null) {
|
||||
resolvedPrincipal = credentialsService.getPrincipal();
|
||||
resolvedKeytab = credentialsService.getKeytab();
|
||||
} else {
|
||||
resolvedPrincipal = explicitPrincipal;
|
||||
resolvedKeytab = explicitKeytab;
|
||||
}
|
||||
|
||||
if (resolvedKeytab != null) {
|
||||
kerberosUserReference.set(new KerberosKeytabUser(resolvedPrincipal, resolvedKeytab));
|
||||
log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab});
|
||||
} else if (explicitPassword != null) {
|
||||
kerberosUserReference.set(new KerberosPasswordUser(resolvedPrincipal, explicitPassword));
|
||||
log.info("Hive Security Enabled, logging in as principal {} with password", new Object[] {resolvedPrincipal});
|
||||
} else {
|
||||
throw new InitializationException("Unable to authenticate with Kerberos, no keytab or password was provided");
|
||||
}
|
||||
|
||||
try {
|
||||
ugi = hiveConfigurator.authenticate(hiveConfig, kerberosUserReference.get());
|
||||
} catch (AuthenticationFailedException ae) {
|
||||
log.error(ae.getMessage(), ae);
|
||||
throw new InitializationException(ae);
|
||||
}
|
||||
|
||||
getLogger().info("Successfully logged in as principal " + resolvedPrincipal);
|
||||
}
|
||||
|
||||
final String user = context.getProperty(DB_USER).evaluateAttributeExpressions().getValue();
|
||||
final String passw = context.getProperty(DB_PASSWORD).evaluateAttributeExpressions().getValue();
|
||||
final Long maxWaitMillis = context.getProperty(MAX_WAIT_TIME).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS);
|
||||
final Integer maxTotal = context.getProperty(MAX_TOTAL_CONNECTIONS).evaluateAttributeExpressions().asInteger();
|
||||
final long maxConnectionLifetimeMillis = extractMillisWithInfinite(context.getProperty(MAX_CONN_LIFETIME).evaluateAttributeExpressions());
|
||||
|
||||
dataSource = new BasicDataSource();
|
||||
dataSource.setDriverClassName(drv);
|
||||
|
||||
connectionUrl = context.getProperty(DATABASE_URL).evaluateAttributeExpressions().getValue();
|
||||
|
||||
dataSource.setMaxWaitMillis(maxWaitMillis);
|
||||
dataSource.setMaxTotal(maxTotal);
|
||||
dataSource.setMaxConnLifetimeMillis(maxConnectionLifetimeMillis);
|
||||
|
||||
if (validationQuery != null && !validationQuery.isEmpty()) {
|
||||
dataSource.setValidationQuery(validationQuery);
|
||||
dataSource.setTestOnBorrow(true);
|
||||
}
|
||||
|
||||
dataSource.setUrl(connectionUrl);
|
||||
dataSource.setUsername(user);
|
||||
dataSource.setPassword(passw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown pool, close all open connections.
|
||||
*/
|
||||
@OnDisabled
|
||||
public void shutdown() {
|
||||
try {
|
||||
dataSource.close();
|
||||
} catch (final SQLException e) {
|
||||
throw new ProcessException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
if (ugi != null) {
|
||||
/*
|
||||
* Explicitly check the TGT and relogin if necessary with the KerberosUser instance. No synchronization
|
||||
* is necessary in the client code, since AbstractKerberosUser's checkTGTAndRelogin method is synchronized.
|
||||
*/
|
||||
getLogger().trace("getting UGI instance");
|
||||
if (kerberosUserReference.get() != null) {
|
||||
// if there's a KerberosUser associated with this UGI, check the TGT and relogin if it is close to expiring
|
||||
KerberosUser kerberosUser = kerberosUserReference.get();
|
||||
getLogger().debug("kerberosUser is " + kerberosUser);
|
||||
try {
|
||||
getLogger().debug("checking TGT on kerberosUser [{}]", new Object[]{kerberosUser});
|
||||
kerberosUser.checkTGTAndRelogin();
|
||||
} catch (final KerberosLoginException e) {
|
||||
throw new ProcessException("Unable to relogin with kerberos credentials for " + kerberosUser.getPrincipal(), e);
|
||||
}
|
||||
} else {
|
||||
getLogger().debug("kerberosUser was null, will not refresh TGT with KerberosUser");
|
||||
// no synchronization is needed for UserGroupInformation.checkTGTAndReloginFromKeytab; UGI handles the synchronization internally
|
||||
ugi.checkTGTAndReloginFromKeytab();
|
||||
}
|
||||
try {
|
||||
return ugi.doAs((PrivilegedExceptionAction<Connection>) () -> dataSource.getConnection());
|
||||
} catch (UndeclaredThrowableException e) {
|
||||
Throwable cause = e.getCause();
|
||||
if (cause instanceof SQLException) {
|
||||
throw (SQLException) cause;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
getLogger().info("Simple Authentication");
|
||||
return dataSource.getConnection();
|
||||
}
|
||||
} catch (SQLException | IOException | InterruptedException e) {
|
||||
getLogger().error("Error getting Hive connection", e);
|
||||
throw new ProcessException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HiveConnectionPool[id=" + getIdentifier() + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return connectionUrl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Overridable by subclasses in the same package, mainly intended for testing purposes to allow verification without having to set environment variables.
|
||||
*/
|
||||
boolean isAllowExplicitKeytab() {
|
||||
return Boolean.parseBoolean(System.getenv(ALLOW_EXPLICIT_KEYTAB));
|
||||
}
|
||||
|
||||
private long extractMillisWithInfinite(PropertyValue prop) {
|
||||
if (prop.getValue() == null || DEFAULT_MAX_CONN_LIFETIME.equals(prop.getValue())) {
|
||||
return -1;
|
||||
} else {
|
||||
return prop.asTimePeriod(TimeUnit.MILLISECONDS);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,344 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.antlr.runtime.tree.CommonTree;
|
||||
import org.apache.hadoop.hive.ql.parse.ASTNode;
|
||||
import org.apache.hadoop.hive.ql.parse.ParseDriver;
|
||||
import org.apache.hadoop.hive.ql.parse.ParseException;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.dbcp.hive.HiveDBCPService;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.AbstractSessionFactoryProcessor;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.stream.io.StreamUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.charset.Charset;
|
||||
import java.sql.Date;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLDataException;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Time;
|
||||
import java.sql.Timestamp;
|
||||
import java.sql.Types;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* An abstract base class for HiveQL processors to share common data, methods, etc.
|
||||
*/
|
||||
public abstract class AbstractHiveQLProcessor extends AbstractSessionFactoryProcessor {
|
||||
|
||||
protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type");
|
||||
protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+");
|
||||
static String ATTR_INPUT_TABLES = "query.input.tables";
|
||||
static String ATTR_OUTPUT_TABLES = "query.output.tables";
|
||||
|
||||
|
||||
public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder()
|
||||
.name("Hive Database Connection Pooling Service")
|
||||
.description("The Hive Controller Service that is used to obtain connection(s) to the Hive database")
|
||||
.required(true)
|
||||
.identifiesControllerService(HiveDBCPService.class)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
|
||||
.name("hive-charset")
|
||||
.displayName("Character Set")
|
||||
.description("Specifies the character set of the record data.")
|
||||
.required(true)
|
||||
.defaultValue("UTF-8")
|
||||
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
|
||||
.build();
|
||||
|
||||
/**
|
||||
* Determines the HiveQL statement that should be executed for the given FlowFile
|
||||
*
|
||||
* @param session the session that can be used to access the given FlowFile
|
||||
* @param flowFile the FlowFile whose HiveQL statement should be executed
|
||||
* @return the HiveQL that is associated with the given FlowFile
|
||||
*/
|
||||
protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
|
||||
// Read the HiveQL from the FlowFile's content
|
||||
final byte[] buffer = new byte[(int) flowFile.getSize()];
|
||||
session.read(flowFile, new InputStreamCallback() {
|
||||
@Override
|
||||
public void process(final InputStream in) throws IOException {
|
||||
StreamUtils.fillBuffer(in, buffer);
|
||||
}
|
||||
});
|
||||
|
||||
// Create the PreparedStatement to use for this FlowFile.
|
||||
return new String(buffer, charset);
|
||||
}
|
||||
|
||||
private class ParameterHolder {
|
||||
String attributeName;
|
||||
int jdbcType;
|
||||
String value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes.
|
||||
*
|
||||
* @param stmt the statement to set the parameters on
|
||||
* @param attributes the attributes from which to derive parameter indices, values, and types
|
||||
* @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called
|
||||
*/
|
||||
protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map<String, String> attributes) throws SQLException {
|
||||
|
||||
Map<Integer, ParameterHolder> parmMap = new TreeMap<Integer, ParameterHolder>();
|
||||
|
||||
for (final Map.Entry<String, String> entry : attributes.entrySet()) {
|
||||
final String key = entry.getKey();
|
||||
final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key);
|
||||
if (matcher.matches()) {
|
||||
final int parameterIndex = Integer.parseInt(matcher.group(1));
|
||||
if (parameterIndex >= base && parameterIndex < base + paramCount) {
|
||||
final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches();
|
||||
if (!isNumeric) {
|
||||
throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType");
|
||||
}
|
||||
|
||||
final String valueAttrName = "hiveql.args." + parameterIndex + ".value";
|
||||
|
||||
ParameterHolder ph = new ParameterHolder();
|
||||
int realIndexLoc = parameterIndex - base +1;
|
||||
|
||||
ph.jdbcType = Integer.parseInt(entry.getValue());
|
||||
ph.value = attributes.get(valueAttrName);
|
||||
ph.attributeName = valueAttrName;
|
||||
|
||||
parmMap.put(realIndexLoc, ph);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Now that's we've retrieved the correct number of parameters and it's sorted, let's set them.
|
||||
for (final Map.Entry<Integer, ParameterHolder> entry : parmMap.entrySet()) {
|
||||
final Integer index = entry.getKey();
|
||||
final ParameterHolder ph = entry.getValue();
|
||||
|
||||
try {
|
||||
setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType);
|
||||
} catch (final NumberFormatException nfe) {
|
||||
throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe);
|
||||
}
|
||||
}
|
||||
return base + paramCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the
|
||||
* provided PreparedStatement
|
||||
*
|
||||
* @param stmt the PreparedStatement to set the parameter on
|
||||
* @param attrName the name of the attribute that the parameter is coming from - for logging purposes
|
||||
* @param parameterIndex the index of the HiveQL parameter to set
|
||||
* @param parameterValue the value of the HiveQL parameter to set
|
||||
* @param jdbcType the JDBC Type of the HiveQL parameter to set
|
||||
* @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter
|
||||
*/
|
||||
protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException {
|
||||
if (parameterValue == null) {
|
||||
stmt.setNull(parameterIndex, jdbcType);
|
||||
} else {
|
||||
try {
|
||||
switch (jdbcType) {
|
||||
case Types.BIT:
|
||||
case Types.BOOLEAN:
|
||||
stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue));
|
||||
break;
|
||||
case Types.TINYINT:
|
||||
stmt.setByte(parameterIndex, Byte.parseByte(parameterValue));
|
||||
break;
|
||||
case Types.SMALLINT:
|
||||
stmt.setShort(parameterIndex, Short.parseShort(parameterValue));
|
||||
break;
|
||||
case Types.INTEGER:
|
||||
stmt.setInt(parameterIndex, Integer.parseInt(parameterValue));
|
||||
break;
|
||||
case Types.BIGINT:
|
||||
stmt.setLong(parameterIndex, Long.parseLong(parameterValue));
|
||||
break;
|
||||
case Types.REAL:
|
||||
stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue));
|
||||
break;
|
||||
case Types.FLOAT:
|
||||
case Types.DOUBLE:
|
||||
stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue));
|
||||
break;
|
||||
case Types.DECIMAL:
|
||||
case Types.NUMERIC:
|
||||
stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue));
|
||||
break;
|
||||
case Types.DATE:
|
||||
stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue)));
|
||||
break;
|
||||
case Types.TIME:
|
||||
stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue)));
|
||||
break;
|
||||
case Types.TIMESTAMP:
|
||||
stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue)));
|
||||
break;
|
||||
case Types.CHAR:
|
||||
case Types.VARCHAR:
|
||||
case Types.LONGNVARCHAR:
|
||||
case Types.LONGVARCHAR:
|
||||
stmt.setString(parameterIndex, parameterValue);
|
||||
break;
|
||||
default:
|
||||
stmt.setObject(parameterIndex, parameterValue, jdbcType);
|
||||
break;
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
// Log which attribute/parameter had an error, then rethrow to be handled at the top level
|
||||
getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected static class TableName {
|
||||
private final String database;
|
||||
private final String table;
|
||||
private final boolean input;
|
||||
|
||||
TableName(String database, String table, boolean input) {
|
||||
this.database = database;
|
||||
this.table = table;
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
public String getDatabase() {
|
||||
return database;
|
||||
}
|
||||
|
||||
public String getTable() {
|
||||
return table;
|
||||
}
|
||||
|
||||
public boolean isInput() {
|
||||
return input;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return database == null || database.isEmpty() ? table : database + '.' + table;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
TableName tableName = (TableName) o;
|
||||
|
||||
if (input != tableName.input) return false;
|
||||
if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false;
|
||||
return table.equals(tableName.table);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = database != null ? database.hashCode() : 0;
|
||||
result = 31 * result + table.hashCode();
|
||||
result = 31 * result + (input ? 1 : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
protected Set<TableName> findTableNames(final String query) {
|
||||
final ASTNode node;
|
||||
try {
|
||||
node = new ParseDriver().parse(normalize(query));
|
||||
} catch (ParseException e) {
|
||||
// If failed to parse the query, just log a message, but continue.
|
||||
getLogger().debug("Failed to parse query: {} due to {}", new Object[]{query, e}, e);
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
final HashSet<TableName> tableNames = new HashSet<>();
|
||||
findTableNames(node, tableNames);
|
||||
return tableNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize query.
|
||||
* Hive resolves prepared statement parameters before executing a query,
|
||||
* see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail.
|
||||
* HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one.
|
||||
* In this normalize method, '?' is replaced to 'x' to avoid that.
|
||||
*/
|
||||
private String normalize(String query) {
|
||||
return query.replace('?', 'x');
|
||||
}
|
||||
|
||||
private void findTableNames(final Object obj, final Set<TableName> tableNames) {
|
||||
if (!(obj instanceof CommonTree)) {
|
||||
return;
|
||||
}
|
||||
final CommonTree tree = (CommonTree) obj;
|
||||
final int childCount = tree.getChildCount();
|
||||
if ("TOK_TABNAME".equals(tree.getText())) {
|
||||
final TableName tableName;
|
||||
final boolean isInput = "TOK_TABREF".equals(tree.getParent().getText());
|
||||
switch (childCount) {
|
||||
case 1 :
|
||||
tableName = new TableName(null, tree.getChild(0).getText(), isInput);
|
||||
break;
|
||||
case 2:
|
||||
tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText(), isInput);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount);
|
||||
}
|
||||
// If parent is TOK_TABREF, then it is an input table.
|
||||
tableNames.add(tableName);
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < childCount; i++) {
|
||||
findTableNames(tree.getChild(i), tableNames);
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, String> toQueryTableAttributes(Set<TableName> tableNames) {
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
for (TableName tableName : tableNames) {
|
||||
final String attributeName = tableName.isInput() ? ATTR_INPUT_TABLES : ATTR_OUTPUT_TABLES;
|
||||
if (attributes.containsKey(attributeName)) {
|
||||
attributes.put(attributeName, attributes.get(attributeName) + "," + tableName);
|
||||
} else {
|
||||
attributes.put(attributeName, tableName.toString());
|
||||
}
|
||||
}
|
||||
return attributes;
|
||||
}
|
||||
}
|
|
@ -1,293 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.file.DataFileStream;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
|
||||
import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
|
||||
import org.apache.hadoop.hive.ql.io.orc.OrcFlowFileWriter;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.SideEffectFree;
|
||||
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.components.resource.ResourceCardinality;
|
||||
import org.apache.nifi.components.resource.ResourceType;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.DataUnit;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.util.hive.HiveJdbcCommon;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* The ConvertAvroToORC processor takes an Avro-formatted flow file as input and converts it into ORC format.
|
||||
*/
|
||||
@SideEffectFree
|
||||
@SupportsBatching
|
||||
@Tags({"avro", "orc", "hive", "convert"})
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||
@CapabilityDescription("Converts an Avro record into ORC file format. This processor provides a direct mapping of an Avro record to an ORC record, such "
|
||||
+ "that the resulting ORC file will have the same hierarchical structure as the Avro document. If an incoming FlowFile contains a stream of "
|
||||
+ "multiple Avro records, the resultant FlowFile will contain a ORC file containing all of the Avro records. If an incoming FlowFile does "
|
||||
+ "not contain any records, an empty ORC file is the output. NOTE: Many Avro datatypes (collections, primitives, and unions of primitives, e.g.) can "
|
||||
+ "be converted to ORC, but unions of collections and other complex datatypes may not be able to be converted to ORC.")
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "mime.type", description = "Sets the mime type to application/octet-stream"),
|
||||
@WritesAttribute(attribute = "filename", description = "Sets the filename to the existing filename with the extension replaced by / added to by .orc"),
|
||||
@WritesAttribute(attribute = "record.count", description = "Sets the number of records in the ORC file."),
|
||||
@WritesAttribute(attribute = "hive.ddl", description = "Creates a partial Hive DDL statement for creating a table in Hive from this ORC file. "
|
||||
+ "This can be used in ReplaceText for setting the content to the DDL. To make it valid DDL, add \"LOCATION '<path_to_orc_file_in_hdfs>'\", where "
|
||||
+ "the path is the directory that contains this ORC file on HDFS. For example, ConvertAvroToORC can send flow files to a PutHDFS processor to send the file to "
|
||||
+ "HDFS, then to a ReplaceText to set the content to this DDL (plus the LOCATION clause as described), then to PutHiveQL processor to create the table "
|
||||
+ "if it doesn't exist.")
|
||||
})
|
||||
public class ConvertAvroToORC extends AbstractProcessor {
|
||||
|
||||
// Attributes
|
||||
public static final String ORC_MIME_TYPE = "application/octet-stream";
|
||||
public static final String HIVE_DDL_ATTRIBUTE = "hive.ddl";
|
||||
public static final String RECORD_COUNT_ATTRIBUTE = "record.count";
|
||||
|
||||
|
||||
// Properties
|
||||
public static final PropertyDescriptor ORC_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder()
|
||||
.name("orc-config-resources")
|
||||
.displayName("ORC Configuration Resources")
|
||||
.description("A file or comma separated list of files which contains the ORC configuration (hive-site.xml, e.g.). Without this, Hadoop "
|
||||
+ "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Please see the ORC documentation for more details.")
|
||||
.required(false)
|
||||
.identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor STRIPE_SIZE = new PropertyDescriptor.Builder()
|
||||
.name("orc-stripe-size")
|
||||
.displayName("Stripe Size")
|
||||
.description("The size of the memory buffer (in bytes) for writing stripes to an ORC file")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
|
||||
.defaultValue("64 MB")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor BUFFER_SIZE = new PropertyDescriptor.Builder()
|
||||
.name("orc-buffer-size")
|
||||
.displayName("Buffer Size")
|
||||
.description("The maximum size of the memory buffers (in bytes) used for compressing and storing a stripe in memory. This is a hint to the ORC writer, "
|
||||
+ "which may choose to use a smaller buffer size based on stripe size and number of columns for efficient stripe writing and memory utilization.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.DATA_SIZE_VALIDATOR)
|
||||
.defaultValue("10 KB")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor COMPRESSION_TYPE = new PropertyDescriptor.Builder()
|
||||
.name("orc-compression-type")
|
||||
.displayName("Compression Type")
|
||||
.required(true)
|
||||
.allowableValues("NONE", "ZLIB", "SNAPPY", "LZO")
|
||||
.defaultValue("NONE")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVE_TABLE_NAME = new PropertyDescriptor.Builder()
|
||||
.name("orc-hive-table-name")
|
||||
.displayName("Hive Table Name")
|
||||
.description("An optional table name to insert into the hive.ddl attribute. The generated DDL can be used by "
|
||||
+ "a PutHiveQL processor (presumably after a PutHDFS processor) to create a table backed by the converted ORC file. "
|
||||
+ "If this property is not provided, the full name (including namespace) of the incoming Avro record will be normalized "
|
||||
+ "and used as the table name.")
|
||||
.required(false)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
|
||||
.build();
|
||||
|
||||
// Relationships
|
||||
static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("A FlowFile is routed to this relationship after it has been converted to ORC format.")
|
||||
.build();
|
||||
static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("A FlowFile is routed to this relationship if it cannot be parsed as Avro or cannot be converted to ORC for any reason")
|
||||
.build();
|
||||
|
||||
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||
private final static Set<Relationship> relationships;
|
||||
|
||||
private volatile Configuration orcConfig;
|
||||
|
||||
/*
|
||||
* Will ensure that the list of property descriptors is built only once.
|
||||
* Will also create a Set of relationships
|
||||
*/
|
||||
static {
|
||||
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
|
||||
_propertyDescriptors.add(ORC_CONFIGURATION_RESOURCES);
|
||||
_propertyDescriptors.add(STRIPE_SIZE);
|
||||
_propertyDescriptors.add(BUFFER_SIZE);
|
||||
_propertyDescriptors.add(COMPRESSION_TYPE);
|
||||
_propertyDescriptors.add(HIVE_TABLE_NAME);
|
||||
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
|
||||
|
||||
Set<Relationship> _relationships = new HashSet<>();
|
||||
_relationships.add(REL_SUCCESS);
|
||||
_relationships.add(REL_FAILURE);
|
||||
relationships = Collections.unmodifiableSet(_relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return propertyDescriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
@OnScheduled
|
||||
public void setup(ProcessContext context) {
|
||||
boolean confFileProvided = context.getProperty(ORC_CONFIGURATION_RESOURCES).isSet();
|
||||
if (confFileProvided) {
|
||||
final String configFiles = context.getProperty(ORC_CONFIGURATION_RESOURCES).getValue();
|
||||
orcConfig = HiveJdbcCommon.getConfigurationFromFiles(configFiles);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||
FlowFile flowFile = session.get();
|
||||
if (flowFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
long startTime = System.currentTimeMillis();
|
||||
final long stripeSize = context.getProperty(STRIPE_SIZE).asDataSize(DataUnit.B).longValue();
|
||||
final int bufferSize = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
|
||||
final CompressionKind compressionType = CompressionKind.valueOf(context.getProperty(COMPRESSION_TYPE).getValue());
|
||||
final AtomicReference<Schema> hiveAvroSchema = new AtomicReference<>(null);
|
||||
final AtomicInteger totalRecordCount = new AtomicInteger(0);
|
||||
final String fileName = flowFile.getAttribute(CoreAttributes.FILENAME.key());
|
||||
flowFile = session.write(flowFile, (rawIn, rawOut) -> {
|
||||
try (final InputStream in = new BufferedInputStream(rawIn);
|
||||
final OutputStream out = new BufferedOutputStream(rawOut);
|
||||
final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<>())) {
|
||||
|
||||
// Create ORC schema from Avro schema
|
||||
Schema avroSchema = reader.getSchema();
|
||||
|
||||
TypeInfo orcSchema = NiFiOrcUtils.getOrcField(avroSchema);
|
||||
|
||||
if (orcConfig == null) {
|
||||
orcConfig = new Configuration();
|
||||
}
|
||||
|
||||
OrcFlowFileWriter orcWriter = NiFiOrcUtils.createWriter(
|
||||
out,
|
||||
new Path(fileName),
|
||||
orcConfig,
|
||||
orcSchema,
|
||||
stripeSize,
|
||||
compressionType,
|
||||
bufferSize);
|
||||
try {
|
||||
|
||||
int recordCount = 0;
|
||||
while (reader.hasNext()) {
|
||||
GenericRecord currRecord = reader.next();
|
||||
List<Schema.Field> fields = currRecord.getSchema().getFields();
|
||||
if (fields != null) {
|
||||
Object[] row = new Object[fields.size()];
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Schema.Field field = fields.get(i);
|
||||
Schema fieldSchema = field.schema();
|
||||
Object o = currRecord.get(field.name());
|
||||
try {
|
||||
row[i] = NiFiOrcUtils.convertToORCObject(NiFiOrcUtils.getOrcField(fieldSchema), o);
|
||||
} catch (ArrayIndexOutOfBoundsException aioobe) {
|
||||
getLogger().error("Index out of bounds at record {} for column {}, type {}, and object {}",
|
||||
new Object[]{recordCount, i, fieldSchema.getType().getName(), o.toString()},
|
||||
aioobe);
|
||||
throw new IOException(aioobe);
|
||||
}
|
||||
}
|
||||
orcWriter.addRow(NiFiOrcUtils.createOrcStruct(orcSchema, row));
|
||||
recordCount++;
|
||||
}
|
||||
}
|
||||
hiveAvroSchema.set(avroSchema);
|
||||
totalRecordCount.set(recordCount);
|
||||
} finally {
|
||||
// finished writing this record, close the writer (which will flush to the flow file)
|
||||
orcWriter.close();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
final String hiveTableName = context.getProperty(HIVE_TABLE_NAME).isSet()
|
||||
? context.getProperty(HIVE_TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue()
|
||||
: NiFiOrcUtils.normalizeHiveTableName(hiveAvroSchema.get().getFullName());
|
||||
String hiveDDL = NiFiOrcUtils.generateHiveDDL(hiveAvroSchema.get(), hiveTableName);
|
||||
// Add attributes and transfer to success
|
||||
flowFile = session.putAttribute(flowFile, RECORD_COUNT_ATTRIBUTE, Integer.toString(totalRecordCount.get()));
|
||||
flowFile = session.putAttribute(flowFile, HIVE_DDL_ATTRIBUTE, hiveDDL);
|
||||
StringBuilder newFilename = new StringBuilder();
|
||||
int extensionIndex = fileName.lastIndexOf(".");
|
||||
if (extensionIndex != -1) {
|
||||
newFilename.append(fileName.substring(0, extensionIndex));
|
||||
} else {
|
||||
newFilename.append(fileName);
|
||||
}
|
||||
newFilename.append(".orc");
|
||||
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), ORC_MIME_TYPE);
|
||||
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename.toString());
|
||||
session.transfer(flowFile, REL_SUCCESS);
|
||||
session.getProvenanceReporter().modifyContent(flowFile, "Converted " + totalRecordCount.get() + " records", System.currentTimeMillis() - startTime);
|
||||
|
||||
} catch (ProcessException | IllegalArgumentException e) {
|
||||
getLogger().error("Failed to convert {} from Avro to ORC due to {}; transferring to failure", new Object[]{flowFile, e});
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,300 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttribute;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttributes;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.dbcp.hive.HiveDBCPService;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processor.util.pattern.ErrorTypes;
|
||||
import org.apache.nifi.processor.util.pattern.ExceptionHandler;
|
||||
import org.apache.nifi.processor.util.pattern.ExceptionHandler.OnError;
|
||||
import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles;
|
||||
import org.apache.nifi.processor.util.pattern.PartialFunctions.InitConnection;
|
||||
import org.apache.nifi.processor.util.pattern.Put;
|
||||
import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
|
||||
import org.apache.nifi.processor.util.pattern.RoutingResult;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.SQLNonTransientException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@SeeAlso(SelectHiveQL.class)
|
||||
@InputRequirement(Requirement.INPUT_REQUIRED)
|
||||
@Tags({"sql", "hive", "put", "database", "update", "insert"})
|
||||
@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command "
|
||||
+ "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes "
|
||||
+ "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be "
|
||||
+ "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.")
|
||||
@ReadsAttributes({
|
||||
@ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer "
|
||||
+ "that represents the JDBC Type of the parameter."),
|
||||
@ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as "
|
||||
+ "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.")
|
||||
})
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "query.input.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
|
||||
+ "and contains input table names (if any) in comma delimited 'databaseName.tableName' format."),
|
||||
@WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
|
||||
+ "and contains the target table names in 'databaseName.tableName' format.")
|
||||
})
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.PutHive3QL")
|
||||
public class PutHiveQL extends AbstractHiveQLProcessor {
|
||||
|
||||
public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
|
||||
.name("hive-batch-size")
|
||||
.displayName("Batch Size")
|
||||
.description("The preferred number of FlowFiles to put to the database in a single transaction")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
|
||||
.defaultValue("100")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder()
|
||||
.name("statement-delimiter")
|
||||
.displayName("Statement Delimiter")
|
||||
.description("Statement Delimiter used to separate SQL statements in a multiple statement script")
|
||||
.required(true)
|
||||
.defaultValue(";")
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("A FlowFile is routed to this relationship after the database is successfully updated")
|
||||
.build();
|
||||
public static final Relationship REL_RETRY = new Relationship.Builder()
|
||||
.name("retry")
|
||||
.description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed")
|
||||
.build();
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, "
|
||||
+ "such as an invalid query or an integrity constraint violation")
|
||||
.build();
|
||||
|
||||
|
||||
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||
private final static Set<Relationship> relationships;
|
||||
|
||||
/*
|
||||
* Will ensure that the list of property descriptors is built only once.
|
||||
* Will also create a Set of relationships
|
||||
*/
|
||||
static {
|
||||
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
|
||||
_propertyDescriptors.add(HIVE_DBCP_SERVICE);
|
||||
_propertyDescriptors.add(BATCH_SIZE);
|
||||
_propertyDescriptors.add(CHARSET);
|
||||
_propertyDescriptors.add(STATEMENT_DELIMITER);
|
||||
_propertyDescriptors.add(RollbackOnFailure.ROLLBACK_ON_FAILURE);
|
||||
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
|
||||
|
||||
Set<Relationship> _relationships = new HashSet<>();
|
||||
_relationships.add(REL_SUCCESS);
|
||||
_relationships.add(REL_FAILURE);
|
||||
_relationships.add(REL_RETRY);
|
||||
relationships = Collections.unmodifiableSet(_relationships);
|
||||
}
|
||||
|
||||
private Put<FunctionContext, Connection> process;
|
||||
private ExceptionHandler<FunctionContext> exceptionHandler;
|
||||
|
||||
@OnScheduled
|
||||
public void constructProcess() {
|
||||
exceptionHandler = new ExceptionHandler<>();
|
||||
exceptionHandler.mapException(e -> {
|
||||
if (e instanceof SQLNonTransientException) {
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else if (e instanceof SQLException) {
|
||||
// Use the SQLException's vendor code for guidance -- see Hive's ErrorMsg class for details on error codes
|
||||
int errorCode = ((SQLException) e).getErrorCode();
|
||||
getLogger().debug("Error occurred during Hive operation, Hive returned error code {}", new Object[]{errorCode});
|
||||
if (errorCode >= 10000 && errorCode < 20000) {
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else if (errorCode >= 20000 && errorCode < 30000) {
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else if (errorCode >= 30000 && errorCode < 40000) {
|
||||
return ErrorTypes.TemporalInputFailure;
|
||||
} else if (errorCode >= 40000 && errorCode < 50000) {
|
||||
// These are unknown errors (to include some parse errors), but rather than generating an UnknownFailure which causes
|
||||
// a ProcessException, we'll route to failure via an InvalidInput error type.
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else {
|
||||
// Default unknown errors to TemporalFailure (as they were implemented originally), so they can be routed to failure
|
||||
// or rolled back depending on the user's setting of Rollback On Failure.
|
||||
return ErrorTypes.TemporalFailure;
|
||||
}
|
||||
} else {
|
||||
return ErrorTypes.UnknownFailure;
|
||||
}
|
||||
});
|
||||
exceptionHandler.adjustError(RollbackOnFailure.createAdjustError(getLogger()));
|
||||
|
||||
process = new Put<>();
|
||||
process.setLogger(getLogger());
|
||||
process.initConnection(initConnection);
|
||||
process.fetchFlowFiles(fetchFlowFiles);
|
||||
process.putFlowFile(putFlowFile);
|
||||
process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return propertyDescriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
private class FunctionContext extends RollbackOnFailure {
|
||||
final Charset charset;
|
||||
final String statementDelimiter;
|
||||
final long startNanos = System.nanoTime();
|
||||
|
||||
String connectionUrl;
|
||||
|
||||
|
||||
private FunctionContext(boolean rollbackOnFailure, Charset charset, String statementDelimiter) {
|
||||
super(rollbackOnFailure, false);
|
||||
this.charset = charset;
|
||||
this.statementDelimiter = statementDelimiter;
|
||||
}
|
||||
}
|
||||
|
||||
private InitConnection<FunctionContext, Connection> initConnection = (context, session, fc, ffs) -> {
|
||||
final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class);
|
||||
final Connection connection = dbcpService.getConnection(ffs == null || ffs.isEmpty() ? Collections.emptyMap() : ffs.get(0).getAttributes());
|
||||
fc.connectionUrl = dbcpService.getConnectionURL();
|
||||
return connection;
|
||||
};
|
||||
|
||||
private FetchFlowFiles<FunctionContext> fetchFlowFiles = (context, session, functionContext, result) -> {
|
||||
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
|
||||
return session.get(batchSize);
|
||||
};
|
||||
|
||||
private Put.PutFlowFile<FunctionContext, Connection> putFlowFile = (context, session, fc, conn, flowFile, result) -> {
|
||||
final String script = getHiveQL(session, flowFile, fc.charset);
|
||||
String regex = "(?<!\\\\)" + Pattern.quote(fc.statementDelimiter);
|
||||
|
||||
String[] hiveQLs = script.split(regex);
|
||||
|
||||
final Set<TableName> tableNames = new HashSet<>();
|
||||
exceptionHandler.execute(fc, flowFile, input -> {
|
||||
int loc = 1;
|
||||
for (String hiveQLStr: hiveQLs) {
|
||||
getLogger().debug("HiveQL: {}", new Object[]{hiveQLStr});
|
||||
|
||||
final String hiveQL = hiveQLStr.trim();
|
||||
if (!StringUtils.isEmpty(hiveQL)) {
|
||||
try (final PreparedStatement stmt = conn.prepareStatement(hiveQL)) {
|
||||
|
||||
// Get ParameterMetadata
|
||||
// Hive JDBC Doesn't support this yet:
|
||||
// ParameterMetaData pmd = stmt.getParameterMetaData();
|
||||
// int paramCount = pmd.getParameterCount();
|
||||
int paramCount = StringUtils.countMatches(hiveQL, "?");
|
||||
|
||||
if (paramCount > 0) {
|
||||
loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes());
|
||||
}
|
||||
|
||||
// Parse hiveQL and extract input/output tables
|
||||
try {
|
||||
tableNames.addAll(findTableNames(hiveQL));
|
||||
} catch (Exception e) {
|
||||
// If failed to parse the query, just log a warning message, but continue.
|
||||
getLogger().warn("Failed to parse hiveQL: {} due to {}", new Object[]{hiveQL, e}, e);
|
||||
}
|
||||
|
||||
// Execute the statement
|
||||
stmt.execute();
|
||||
fc.proceed();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit a Provenance SEND event
|
||||
final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos);
|
||||
|
||||
final FlowFile updatedFlowFile = session.putAllAttributes(flowFile, toQueryTableAttributes(tableNames));
|
||||
session.getProvenanceReporter().send(updatedFlowFile, fc.connectionUrl, transmissionMillis, true);
|
||||
result.routeTo(flowFile, REL_SUCCESS);
|
||||
|
||||
}, onFlowFileError(context, session, result));
|
||||
|
||||
};
|
||||
|
||||
private OnError<FunctionContext, FlowFile> onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) {
|
||||
OnError<FunctionContext, FlowFile> onFlowFileError = ExceptionHandler.createOnError(context, session, result, REL_FAILURE, REL_RETRY);
|
||||
onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> {
|
||||
switch (r.destination()) {
|
||||
case Failure:
|
||||
getLogger().error("Failed to update Hive for {} due to {}; routing to failure", new Object[] {i, e}, e);
|
||||
break;
|
||||
case Retry:
|
||||
getLogger().error("Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry",
|
||||
new Object[] {i, e}, e);
|
||||
break;
|
||||
case Self:
|
||||
getLogger().error("Failed to update Hive for {} due to {};", new Object[] {i, e}, e);
|
||||
break;
|
||||
}
|
||||
});
|
||||
return RollbackOnFailure.createOnError(onFlowFileError);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||
final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean();
|
||||
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
|
||||
final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue();
|
||||
final FunctionContext functionContext = new FunctionContext(rollbackOnFailure, charset, statementDelimiter);
|
||||
RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext));
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,572 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.dbcp.hive.HiveDBCPService;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processor.util.pattern.PartialFunctions;
|
||||
import org.apache.nifi.util.StopWatch;
|
||||
import org.apache.nifi.util.hive.CsvOutputOptions;
|
||||
import org.apache.nifi.util.hive.HiveJdbcCommon;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO;
|
||||
|
||||
@EventDriven
|
||||
@InputRequirement(Requirement.INPUT_ALLOWED)
|
||||
@Tags({"hive", "sql", "select", "jdbc", "query", "database"})
|
||||
@CapabilityDescription("Execute provided HiveQL SELECT query against a Hive database connection. Query result will be converted to Avro or CSV format."
|
||||
+ " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on "
|
||||
+ "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. "
|
||||
+ "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the "
|
||||
+ "select query. FlowFile attribute 'selecthiveql.row.count' indicates how many rows were selected.")
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "mime.type", description = "Sets the MIME type for the outgoing flowfile to application/avro-binary for Avro or text/csv for CSV."),
|
||||
@WritesAttribute(attribute = "filename", description = "Adds .avro or .csv to the filename attribute depending on which output format is selected."),
|
||||
@WritesAttribute(attribute = "selecthiveql.row.count", description = "Indicates how many rows were selected/returned by the query."),
|
||||
@WritesAttribute(attribute = "selecthiveql.query.duration", description = "Combined duration of the query execution time and fetch time in milliseconds. "
|
||||
+ "If 'Max Rows Per Flow File' is set, then this number will reflect only the fetch time for the rows in the Flow File instead of the entire result set."),
|
||||
@WritesAttribute(attribute = "selecthiveql.query.executiontime", description = "Duration of the query execution time in milliseconds. "
|
||||
+ "This number will reflect the query execution time regardless of the 'Max Rows Per Flow File' setting."),
|
||||
@WritesAttribute(attribute = "selecthiveql.query.fetchtime", description = "Duration of the result set fetch time in milliseconds. "
|
||||
+ "If 'Max Rows Per Flow File' is set, then this number will reflect only the fetch time for the rows in the Flow File instead of the entire result set."),
|
||||
@WritesAttribute(attribute = "fragment.identifier", description = "If 'Max Rows Per Flow File' is set then all FlowFiles from the same query result set "
|
||||
+ "will have the same value for the fragment.identifier attribute. This can then be used to correlate the results."),
|
||||
@WritesAttribute(attribute = "fragment.count", description = "If 'Max Rows Per Flow File' is set then this is the total number of "
|
||||
+ "FlowFiles produced by a single ResultSet. This can be used in conjunction with the "
|
||||
+ "fragment.identifier attribute in order to know how many FlowFiles belonged to the same incoming ResultSet."),
|
||||
@WritesAttribute(attribute = "fragment.index", description = "If 'Max Rows Per Flow File' is set then the position of this FlowFile in the list of "
|
||||
+ "outgoing FlowFiles that were all derived from the same result set FlowFile. This can be "
|
||||
+ "used in conjunction with the fragment.identifier attribute to know which FlowFiles originated from the same query result set and in what order "
|
||||
+ "FlowFiles were produced"),
|
||||
@WritesAttribute(attribute = "query.input.tables", description = "Contains input table names in comma delimited 'databaseName.tableName' format.")
|
||||
})
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.SelectHive3QL")
|
||||
public class SelectHiveQL extends AbstractHiveQLProcessor {
|
||||
|
||||
public static final String RESULT_ROW_COUNT = "selecthiveql.row.count";
|
||||
public static final String RESULT_QUERY_DURATION = "selecthiveql.query.duration";
|
||||
public static final String RESULT_QUERY_EXECUTION_TIME = "selecthiveql.query.executiontime";
|
||||
public static final String RESULT_QUERY_FETCH_TIME = "selecthiveql.query.fetchtime";
|
||||
|
||||
// Relationships
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("Successfully created FlowFile from HiveQL query result set.")
|
||||
.build();
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("HiveQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship.")
|
||||
.build();
|
||||
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_PRE_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("hive-pre-query")
|
||||
.displayName("HiveQL Pre-Query")
|
||||
.description("A semicolon-delimited list of queries executed before the main SQL query is executed. "
|
||||
+ "Example: 'set tez.queue.name=queue1; set hive.exec.orc.split.strategy=ETL; set hive.exec.reducers.bytes.per.reducer=1073741824'. "
|
||||
+ "Note, the results/outputs of these queries will be suppressed if successfully executed.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_SELECT_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("hive-query")
|
||||
.displayName("HiveQL Select Query")
|
||||
.description("HiveQL SELECT query to execute. If this is not set, the query is assumed to be in the content of an incoming FlowFile.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_POST_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("hive-post-query")
|
||||
.displayName("HiveQL Post-Query")
|
||||
.description("A semicolon-delimited list of queries executed after the main SQL query is executed. "
|
||||
+ "Note, the results/outputs of these queries will be suppressed if successfully executed.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder()
|
||||
.name("hive-fetch-size")
|
||||
.displayName("Fetch Size")
|
||||
.description("The number of result rows to be fetched from the result set at a time. This is a hint to the driver and may not be "
|
||||
+ "honored and/or exact. If the value specified is zero, then the hint is ignored.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_ROWS_PER_FLOW_FILE = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-rows")
|
||||
.displayName("Max Rows Per Flow File")
|
||||
.description("The maximum number of result rows that will be included in a single FlowFile. " +
|
||||
"This will allow you to break up very large result sets into multiple FlowFiles. If the value specified is zero, then all rows are returned in a single FlowFile.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_FRAGMENTS = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-frags")
|
||||
.displayName("Maximum Number of Fragments")
|
||||
.description("The maximum number of fragments. If the value specified is zero, then all fragments are returned. " +
|
||||
"This prevents OutOfMemoryError when this processor ingests huge table.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_HEADER = new PropertyDescriptor.Builder()
|
||||
.name("csv-header")
|
||||
.displayName("CSV Header")
|
||||
.description("Include Header in Output")
|
||||
.required(true)
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("true")
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_ALT_HEADER = new PropertyDescriptor.Builder()
|
||||
.name("csv-alt-header")
|
||||
.displayName("Alternate CSV Header")
|
||||
.description("Comma separated list of header fields")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_DELIMITER = new PropertyDescriptor.Builder()
|
||||
.name("csv-delimiter")
|
||||
.displayName("CSV Delimiter")
|
||||
.description("CSV Delimiter used to separate fields")
|
||||
.required(true)
|
||||
.defaultValue(",")
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_QUOTE = new PropertyDescriptor.Builder()
|
||||
.name("csv-quote")
|
||||
.displayName("CSV Quote")
|
||||
.description("Whether to force quoting of CSV fields. Note that this might conflict with the setting for CSV Escape.")
|
||||
.required(true)
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("true")
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
public static final PropertyDescriptor HIVEQL_CSV_ESCAPE = new PropertyDescriptor.Builder()
|
||||
.name("csv-escape")
|
||||
.displayName("CSV Escape")
|
||||
.description("Whether to escape CSV strings in output. Note that this might conflict with the setting for CSV Quote.")
|
||||
.required(true)
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("true")
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_OUTPUT_FORMAT = new PropertyDescriptor.Builder()
|
||||
.name("hive-output-format")
|
||||
.displayName("Output Format")
|
||||
.description("How to represent the records coming from Hive (Avro, CSV, e.g.)")
|
||||
.required(true)
|
||||
.allowableValues(AVRO, CSV)
|
||||
.defaultValue(AVRO)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
|
||||
.build();
|
||||
|
||||
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||
private final static Set<Relationship> relationships;
|
||||
|
||||
/*
|
||||
* Will ensure that the list of property descriptors is built only once.
|
||||
* Will also create a Set of relationships
|
||||
*/
|
||||
static {
|
||||
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
|
||||
_propertyDescriptors.add(HIVE_DBCP_SERVICE);
|
||||
_propertyDescriptors.add(HIVEQL_PRE_QUERY);
|
||||
_propertyDescriptors.add(HIVEQL_SELECT_QUERY);
|
||||
_propertyDescriptors.add(HIVEQL_POST_QUERY);
|
||||
_propertyDescriptors.add(FETCH_SIZE);
|
||||
_propertyDescriptors.add(MAX_ROWS_PER_FLOW_FILE);
|
||||
_propertyDescriptors.add(MAX_FRAGMENTS);
|
||||
_propertyDescriptors.add(HIVEQL_OUTPUT_FORMAT);
|
||||
_propertyDescriptors.add(NORMALIZE_NAMES_FOR_AVRO);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_HEADER);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_ALT_HEADER);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_DELIMITER);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_QUOTE);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_ESCAPE);
|
||||
_propertyDescriptors.add(CHARSET);
|
||||
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
|
||||
|
||||
Set<Relationship> _relationships = new HashSet<>();
|
||||
_relationships.add(REL_SUCCESS);
|
||||
_relationships.add(REL_FAILURE);
|
||||
relationships = Collections.unmodifiableSet(_relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return propertyDescriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
@OnScheduled
|
||||
public void setup(ProcessContext context) {
|
||||
// If the query is not set, then an incoming flow file is needed. Otherwise fail the initialization
|
||||
if (!context.getProperty(HIVEQL_SELECT_QUERY).isSet() && !context.hasIncomingConnection()) {
|
||||
final String errorString = "Either the Select Query must be specified or there must be an incoming connection "
|
||||
+ "providing flowfile(s) containing a SQL select query";
|
||||
getLogger().error(errorString);
|
||||
throw new ProcessException(errorString);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||
PartialFunctions.onTrigger(context, sessionFactory, getLogger(), session -> onTrigger(context, session));
|
||||
}
|
||||
|
||||
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||
FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
|
||||
FlowFile flowfile = null;
|
||||
|
||||
// If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
|
||||
// However, if we have no FlowFile and we have connections coming from other Processors, then
|
||||
// we know that we should run only if we have a FlowFile.
|
||||
if (context.hasIncomingConnection()) {
|
||||
if (fileToProcess == null && context.hasNonLoopConnection()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
final ComponentLog logger = getLogger();
|
||||
final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class);
|
||||
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
|
||||
|
||||
List<String> preQueries = getQueries(context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
|
||||
List<String> postQueries = getQueries(context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
|
||||
|
||||
final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
|
||||
|
||||
// Source the SQL
|
||||
String hqlStatement;
|
||||
|
||||
if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
|
||||
hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||
} else {
|
||||
// If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
|
||||
// If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
|
||||
final StringBuilder queryContents = new StringBuilder();
|
||||
session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
|
||||
hqlStatement = queryContents.toString();
|
||||
}
|
||||
|
||||
|
||||
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
|
||||
final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
|
||||
final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet()
|
||||
? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger()
|
||||
: 0;
|
||||
final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
|
||||
final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
|
||||
final StopWatch stopWatch = new StopWatch(true);
|
||||
final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
|
||||
final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||
final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||
final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
|
||||
final boolean escape = context.getProperty(HIVEQL_CSV_ESCAPE).asBoolean();
|
||||
final String fragmentIdentifier = UUID.randomUUID().toString();
|
||||
|
||||
try (final Connection con = dbcpService.getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes());
|
||||
final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())
|
||||
) {
|
||||
Pair<String,SQLException> failure = executeConfigStatements(con, preQueries);
|
||||
if (failure != null) {
|
||||
// In case of failure, assigning config query to "hqlStatement" to follow current error handling
|
||||
hqlStatement = failure.getLeft();
|
||||
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||
fileToProcess = null;
|
||||
throw failure.getRight();
|
||||
}
|
||||
if (fetchSize != null && fetchSize > 0) {
|
||||
try {
|
||||
st.setFetchSize(fetchSize);
|
||||
} catch (SQLException se) {
|
||||
// Not all drivers support this, just log the error (at debug level) and move on
|
||||
logger.debug("Cannot set fetch size to {} due to {}", new Object[]{fetchSize, se.getLocalizedMessage()}, se);
|
||||
}
|
||||
}
|
||||
|
||||
final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
|
||||
try {
|
||||
logger.debug("Executing query {}", new Object[]{hqlStatement});
|
||||
if (flowbased) {
|
||||
// Hive JDBC Doesn't Support this yet:
|
||||
// ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
|
||||
// int paramCount = pmd.getParameterCount();
|
||||
|
||||
// Alternate way to determine number of params in SQL.
|
||||
int paramCount = StringUtils.countMatches(hqlStatement, "?");
|
||||
|
||||
if (paramCount > 0) {
|
||||
setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
|
||||
}
|
||||
}
|
||||
|
||||
final StopWatch executionTime = new StopWatch(true);
|
||||
final ResultSet resultSet;
|
||||
|
||||
try {
|
||||
resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement));
|
||||
} catch (SQLException se) {
|
||||
// If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
|
||||
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||
fileToProcess = null;
|
||||
throw se;
|
||||
}
|
||||
long executionTimeElapsed = executionTime.getElapsed(TimeUnit.MILLISECONDS);
|
||||
|
||||
int fragmentIndex = 0;
|
||||
String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
|
||||
while (true) {
|
||||
final AtomicLong nrOfRows = new AtomicLong(0L);
|
||||
final StopWatch fetchTime = new StopWatch(true);
|
||||
|
||||
flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
|
||||
if (baseFilename == null) {
|
||||
baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
|
||||
}
|
||||
try {
|
||||
flowfile = session.write(flowfile, out -> {
|
||||
try {
|
||||
if (AVRO.equals(outputFormat)) {
|
||||
nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
|
||||
} else if (CSV.equals(outputFormat)) {
|
||||
CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
|
||||
nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
|
||||
} else {
|
||||
nrOfRows.set(0L);
|
||||
throw new ProcessException("Unsupported output format: " + outputFormat);
|
||||
}
|
||||
} catch (final SQLException | RuntimeException e) {
|
||||
throw new ProcessException("Error during database query or conversion of records.", e);
|
||||
}
|
||||
});
|
||||
} catch (ProcessException e) {
|
||||
// Add flowfile to results before rethrowing so it will be removed from session in outer catch
|
||||
resultSetFlowFiles.add(flowfile);
|
||||
throw e;
|
||||
}
|
||||
long fetchTimeElapsed = fetchTime.getElapsed(TimeUnit.MILLISECONDS);
|
||||
|
||||
if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
// Set attribute for how many rows were selected
|
||||
attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
|
||||
|
||||
try {
|
||||
// Set input/output table names by parsing the query
|
||||
attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement)));
|
||||
} catch (Exception e) {
|
||||
// If failed to parse the query, just log a warning message, but continue.
|
||||
getLogger().warn("Failed to parse query: {} due to {}", new Object[]{hqlStatement, e}, e);
|
||||
}
|
||||
|
||||
// Set MIME type on output document and add extension to filename
|
||||
if (AVRO.equals(outputFormat)) {
|
||||
attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
|
||||
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
|
||||
} else if (CSV.equals(outputFormat)) {
|
||||
attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
|
||||
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
|
||||
}
|
||||
|
||||
if (maxRowsPerFlowFile > 0) {
|
||||
attributes.put("fragment.identifier", fragmentIdentifier);
|
||||
attributes.put("fragment.index", String.valueOf(fragmentIndex));
|
||||
}
|
||||
|
||||
attributes.put(RESULT_QUERY_DURATION, String.valueOf(executionTimeElapsed + fetchTimeElapsed));
|
||||
attributes.put(RESULT_QUERY_EXECUTION_TIME, String.valueOf(executionTimeElapsed));
|
||||
attributes.put(RESULT_QUERY_FETCH_TIME, String.valueOf(fetchTimeElapsed));
|
||||
|
||||
flowfile = session.putAllAttributes(flowfile, attributes);
|
||||
|
||||
logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'",
|
||||
new Object[]{flowfile, nrOfRows.get()});
|
||||
|
||||
if (context.hasIncomingConnection()) {
|
||||
// If the flow file came from an incoming connection, issue a Fetch provenance event
|
||||
session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(),
|
||||
"Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
|
||||
} else {
|
||||
// If we created a flow file from rows received from Hive, issue a Receive provenance event
|
||||
session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
|
||||
}
|
||||
resultSetFlowFiles.add(flowfile);
|
||||
} else {
|
||||
// If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
|
||||
session.remove(flowfile);
|
||||
if (resultSetFlowFiles != null && resultSetFlowFiles.size()>0) {
|
||||
flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size()-1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
fragmentIndex++;
|
||||
if (maxFragments > 0 && fragmentIndex >= maxFragments) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < resultSetFlowFiles.size(); i++) {
|
||||
// Set count on all FlowFiles
|
||||
if (maxRowsPerFlowFile > 0) {
|
||||
resultSetFlowFiles.set(i,
|
||||
session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
|
||||
}
|
||||
}
|
||||
|
||||
} catch (final SQLException e) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
failure = executeConfigStatements(con, postQueries);
|
||||
if (failure != null) {
|
||||
hqlStatement = failure.getLeft();
|
||||
if (resultSetFlowFiles != null) {
|
||||
resultSetFlowFiles.forEach(ff -> session.remove(ff));
|
||||
}
|
||||
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||
fileToProcess = null;
|
||||
throw failure.getRight();
|
||||
}
|
||||
|
||||
session.transfer(resultSetFlowFiles, REL_SUCCESS);
|
||||
if (fileToProcess != null) {
|
||||
session.remove(fileToProcess);
|
||||
}
|
||||
} catch (final ProcessException | SQLException e) {
|
||||
logger.error("Issue processing SQL {} due to {}.", new Object[]{hqlStatement, e});
|
||||
if (flowfile == null) {
|
||||
// This can happen if any exceptions occur while setting up the connection, statement, etc.
|
||||
logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure",
|
||||
new Object[]{hqlStatement, e});
|
||||
context.yield();
|
||||
} else {
|
||||
if (context.hasIncomingConnection()) {
|
||||
logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure",
|
||||
new Object[]{hqlStatement, flowfile, e});
|
||||
flowfile = session.penalize(flowfile);
|
||||
} else {
|
||||
logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure",
|
||||
new Object[]{hqlStatement, e});
|
||||
context.yield();
|
||||
}
|
||||
session.transfer(flowfile, REL_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Executes given queries using pre-defined connection.
|
||||
* Returns null on success, or a query string if failed.
|
||||
*/
|
||||
protected Pair<String,SQLException> executeConfigStatements(final Connection con, final List<String> configQueries){
|
||||
if (configQueries == null || configQueries.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (String confSQL : configQueries) {
|
||||
try(final Statement st = con.createStatement()){
|
||||
st.execute(confSQL);
|
||||
} catch (SQLException e) {
|
||||
return Pair.of(confSQL, e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected List<String> getQueries(final String value) {
|
||||
if (value == null || value.length() == 0 || value.trim().length() == 0) {
|
||||
return null;
|
||||
}
|
||||
final List<String> queries = new LinkedList<>();
|
||||
for (String query : value.split(";")) {
|
||||
if (query.trim().length() > 0) {
|
||||
queries.add(query.trim());
|
||||
}
|
||||
}
|
||||
return queries;
|
||||
}
|
||||
}
|
|
@ -1,769 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttribute;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttributes;
|
||||
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.components.AllowableValue;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.components.ValidationContext;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.components.Validator;
|
||||
import org.apache.nifi.dbcp.hive.HiveDBCPService;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processor.util.pattern.DiscontinuedException;
|
||||
import org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException;
|
||||
import org.apache.nifi.serialization.MalformedRecordException;
|
||||
import org.apache.nifi.serialization.RecordReader;
|
||||
import org.apache.nifi.serialization.RecordReaderFactory;
|
||||
import org.apache.nifi.serialization.RecordSetWriter;
|
||||
import org.apache.nifi.serialization.RecordSetWriterFactory;
|
||||
import org.apache.nifi.serialization.SimpleRecordSchema;
|
||||
import org.apache.nifi.serialization.WriteResult;
|
||||
import org.apache.nifi.serialization.record.MapRecord;
|
||||
import org.apache.nifi.serialization.record.Record;
|
||||
import org.apache.nifi.serialization.record.RecordField;
|
||||
import org.apache.nifi.serialization.record.RecordSchema;
|
||||
import org.apache.nifi.util.StringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
@Tags({"hive", "metadata", "jdbc", "database", "table"})
|
||||
@CapabilityDescription("This processor uses a Hive JDBC connection and incoming records to generate any Hive 1.2 table changes needed to support the incoming records.")
|
||||
@ReadsAttributes({
|
||||
@ReadsAttribute(attribute = "hive.table.management.strategy", description = "This attribute is read if the 'Table Management Strategy' property is configured "
|
||||
+ "to use the value of this attribute. The value of this attribute should correspond (ignoring case) to a valid option of the 'Table Management Strategy' property.")
|
||||
})
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "output.table", description = "This attribute is written on the flow files routed to the 'success' "
|
||||
+ "and 'failure' relationships, and contains the target table name."),
|
||||
@WritesAttribute(attribute = "output.path", description = "This attribute is written on the flow files routed to the 'success' "
|
||||
+ "and 'failure' relationships, and contains the path on the file system to the table (or partition location if the table is partitioned)."),
|
||||
@WritesAttribute(attribute = "mime.type", description = "Sets the mime.type attribute to the MIME Type specified by the Record Writer, only if a Record Writer is specified "
|
||||
+ "and Update Field Names is 'true'."),
|
||||
@WritesAttribute(attribute = "record.count", description = "Sets the number of records in the FlowFile, only if a Record Writer is specified and Update Field Names is 'true'.")
|
||||
})
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||
@RequiresInstanceClassLoading
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.UpdateHive3Table")
|
||||
public class UpdateHiveTable extends AbstractProcessor {
|
||||
|
||||
static final String TEXTFILE = "TEXTFILE";
|
||||
static final String SEQUENCEFILE = "SEQUENCEFILE";
|
||||
static final String ORC = "ORC";
|
||||
static final String PARQUET = "PARQUET";
|
||||
static final String AVRO = "AVRO";
|
||||
static final String RCFILE = "RCFILE";
|
||||
|
||||
static final AllowableValue TEXTFILE_STORAGE = new AllowableValue(TEXTFILE, TEXTFILE, "Stored as plain text files. TEXTFILE is the default file format, unless the configuration "
|
||||
+ "parameter hive.default.fileformat has a different setting.");
|
||||
static final AllowableValue SEQUENCEFILE_STORAGE = new AllowableValue(SEQUENCEFILE, SEQUENCEFILE, "Stored as compressed Sequence Files.");
|
||||
static final AllowableValue ORC_STORAGE = new AllowableValue(ORC, ORC, "Stored as ORC file format. Supports ACID Transactions & Cost-based Optimizer (CBO). "
|
||||
+ "Stores column-level metadata.");
|
||||
static final AllowableValue PARQUET_STORAGE = new AllowableValue(PARQUET, PARQUET, "Stored as Parquet format for the Parquet columnar storage format.");
|
||||
static final AllowableValue AVRO_STORAGE = new AllowableValue(AVRO, AVRO, "Stored as Avro format.");
|
||||
static final AllowableValue RCFILE_STORAGE = new AllowableValue(RCFILE, RCFILE, "Stored as Record Columnar File format.");
|
||||
|
||||
static final AllowableValue CREATE_IF_NOT_EXISTS = new AllowableValue("Create If Not Exists", "Create If Not Exists",
|
||||
"Create a table with the given schema if it does not already exist");
|
||||
static final AllowableValue FAIL_IF_NOT_EXISTS = new AllowableValue("Fail If Not Exists", "Fail If Not Exists",
|
||||
"If the target does not already exist, log an error and route the flowfile to failure");
|
||||
|
||||
static final String TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE = "hive.table.management.strategy";
|
||||
static final AllowableValue MANAGED_TABLE = new AllowableValue("Managed", "Managed",
|
||||
"Any tables created by this processor will be managed tables (see Hive documentation for details).");
|
||||
static final AllowableValue EXTERNAL_TABLE = new AllowableValue("External", "External",
|
||||
"Any tables created by this processor will be external tables located at the `External Table Location` property value.");
|
||||
static final AllowableValue ATTRIBUTE_DRIVEN_TABLE = new AllowableValue("Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute",
|
||||
"Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute",
|
||||
"Inspects the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' FlowFile attribute to determine the table management strategy. The value "
|
||||
+ "of this attribute must be a case-insensitive match to one of the other allowable values (Managed, External, e.g.).");
|
||||
|
||||
static final String ATTR_OUTPUT_TABLE = "output.table";
|
||||
static final String ATTR_OUTPUT_PATH = "output.path";
|
||||
|
||||
// Properties
|
||||
static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder()
|
||||
.name("record-reader")
|
||||
.displayName("Record Reader")
|
||||
.description("The service for reading incoming flow files. The reader is only used to determine the schema of the records, the actual records will not be processed.")
|
||||
.identifiesControllerService(RecordReaderFactory.class)
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder()
|
||||
.name("hive-dbcp-service")
|
||||
.displayName("Hive Database Connection Pooling Service")
|
||||
.description("The Hive Controller Service that is used to obtain connection(s) to the Hive database")
|
||||
.required(true)
|
||||
.identifiesControllerService(HiveDBCPService.class)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
|
||||
.name("hive-table-name")
|
||||
.displayName("Table Name")
|
||||
.description("The name of the database table to update. If the table does not exist, then it will either be created or an error thrown, depending "
|
||||
+ "on the value of the Create Table property.")
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor CREATE_TABLE = new PropertyDescriptor.Builder()
|
||||
.name("hive-create-table")
|
||||
.displayName("Create Table Strategy")
|
||||
.description("Specifies how to process the target table when it does not exist (create it, fail, e.g.).")
|
||||
.required(true)
|
||||
.addValidator(Validator.VALID)
|
||||
.allowableValues(CREATE_IF_NOT_EXISTS, FAIL_IF_NOT_EXISTS)
|
||||
.defaultValue(FAIL_IF_NOT_EXISTS.getValue())
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor TABLE_MANAGEMENT_STRATEGY = new PropertyDescriptor.Builder()
|
||||
.name("hive-create-table-management")
|
||||
.displayName("Create Table Management Strategy")
|
||||
.description("Specifies (when a table is to be created) whether the table is a managed table or an external table. Note that when External is specified, the "
|
||||
+ "'External Table Location' property must be specified. If the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' value is selected, 'External Table Location' "
|
||||
+ "must still be specified, but can contain Expression Language or be set to the empty string, and is ignored when the attribute evaluates to 'Managed'.")
|
||||
.required(true)
|
||||
.addValidator(Validator.VALID)
|
||||
.allowableValues(MANAGED_TABLE, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE)
|
||||
.defaultValue(MANAGED_TABLE.getValue())
|
||||
.dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor UPDATE_FIELD_NAMES = new PropertyDescriptor.Builder()
|
||||
.name("hive-update-field-names")
|
||||
.displayName("Update Field Names")
|
||||
.description("This property indicates whether to update the output schema such that the field names are set to the exact column names from the specified "
|
||||
+ "table. This should be used if the incoming record field names may not match the table's column names in terms of upper- and lower-case. For example, this property should be "
|
||||
+ "set to true if the output FlowFile (and target table storage) is Avro format, as Hive/Impala expects the field names to match the column names exactly.")
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("false")
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor RECORD_WRITER_FACTORY = new PropertyDescriptor.Builder()
|
||||
.name("hive-record-writer")
|
||||
.displayName("Record Writer")
|
||||
.description("Specifies the Controller Service to use for writing results to a FlowFile. The Record Writer should use Inherit Schema to emulate the inferred schema behavior, i.e. "
|
||||
+ "an explicit schema need not be defined in the writer, and will be supplied by the same logic used to infer the schema from the column types. If Create Table Strategy is set "
|
||||
+ "'Create If Not Exists', the Record Writer's output format must match the Record Reader's format in order for the data to be placed in the created table location. Note that "
|
||||
+ "this property is only used if 'Update Field Names' is set to true and the field names do not all match the column names exactly. If no "
|
||||
+ "update is needed for any field names (or 'Update Field Names' is false), the Record Writer is not used and instead the input FlowFile is routed to success or failure "
|
||||
+ "without modification.")
|
||||
.identifiesControllerService(RecordSetWriterFactory.class)
|
||||
.dependsOn(UPDATE_FIELD_NAMES, "true")
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor EXTERNAL_TABLE_LOCATION = new PropertyDescriptor.Builder()
|
||||
.name("hive-external-table-location")
|
||||
.displayName("External Table Location")
|
||||
.description("Specifies (when an external table is to be created) the file path (in HDFS, e.g.) to store table data.")
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR)
|
||||
.dependsOn(TABLE_MANAGEMENT_STRATEGY, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor TABLE_STORAGE_FORMAT = new PropertyDescriptor.Builder()
|
||||
.name("hive-storage-format")
|
||||
.displayName("Create Table Storage Format")
|
||||
.description("If a table is to be created, the specified storage format will be used.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.allowableValues(TEXTFILE_STORAGE, SEQUENCEFILE_STORAGE, ORC_STORAGE, PARQUET_STORAGE, AVRO_STORAGE, RCFILE_STORAGE)
|
||||
.defaultValue(TEXTFILE)
|
||||
.dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor QUERY_TIMEOUT = new PropertyDescriptor.Builder()
|
||||
.name("hive-query-timeout")
|
||||
.displayName("Query Timeout")
|
||||
.description("Sets the number of seconds the driver will wait for a query to execute. "
|
||||
+ "A value of 0 means no timeout. NOTE: Non-zero values may not be supported by the driver.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor PARTITION_CLAUSE = new PropertyDescriptor.Builder()
|
||||
.name("hive-partition-clause")
|
||||
.displayName("Partition Clause")
|
||||
.description("Specifies a comma-separated list of attribute names and optional data types corresponding to the partition columns of the target table. Simply put, if the table is "
|
||||
+ "partitioned or is to be created with partitions, each partition name should be an attribute on the FlowFile and listed in this property. This assumes all incoming records "
|
||||
+ "belong to the same partition and the partition columns are not fields in the record. An example of specifying this field is if PartitionRecord "
|
||||
+ "is upstream and two partition columns 'name' (of type string) and 'age' (of type integer) are used, then this property can be set to 'name string, age int'. The data types "
|
||||
+ "are optional and if partition(s) are to be created they will default to string type if not specified. For non-string primitive types, specifying the data type for existing "
|
||||
+ "partition columns is helpful for interpreting the partition value(s). If the table exists, the data types need not be specified "
|
||||
+ "(and are ignored in that case). This property must be set if the table is partitioned, and there must be an attribute for each partition column in the table. "
|
||||
+ "The values of the attributes will be used as the partition values, and the resulting output.path attribute value will reflect the location of the partition in the filesystem "
|
||||
+ "(for use downstream in processors such as PutHDFS).")
|
||||
.required(false)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
// Relationships
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("A FlowFile containing records routed to this relationship after the record has been successfully transmitted to Hive.")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("A FlowFile containing records routed to this relationship if the record could not be transmitted to Hive.")
|
||||
.build();
|
||||
|
||||
private List<PropertyDescriptor> propertyDescriptors;
|
||||
private Set<Relationship> relationships;
|
||||
|
||||
@Override
|
||||
protected void init(ProcessorInitializationContext context) {
|
||||
List<PropertyDescriptor> props = new ArrayList<>();
|
||||
props.add(RECORD_READER);
|
||||
props.add(HIVE_DBCP_SERVICE);
|
||||
props.add(TABLE_NAME);
|
||||
props.add(PARTITION_CLAUSE);
|
||||
props.add(CREATE_TABLE);
|
||||
props.add(TABLE_MANAGEMENT_STRATEGY);
|
||||
props.add(EXTERNAL_TABLE_LOCATION);
|
||||
props.add(TABLE_STORAGE_FORMAT);
|
||||
props.add(UPDATE_FIELD_NAMES);
|
||||
props.add(RECORD_WRITER_FACTORY);
|
||||
props.add(QUERY_TIMEOUT);
|
||||
|
||||
propertyDescriptors = Collections.unmodifiableList(props);
|
||||
|
||||
Set<Relationship> _relationships = new HashSet<>();
|
||||
_relationships.add(REL_SUCCESS);
|
||||
_relationships.add(REL_FAILURE);
|
||||
relationships = Collections.unmodifiableSet(_relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return propertyDescriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
|
||||
List<ValidationResult> validationResults = new ArrayList<>(super.customValidate(validationContext));
|
||||
final boolean recordWriterFactorySet = validationContext.getProperty(RECORD_WRITER_FACTORY).isSet();
|
||||
final boolean createIfNotExists = validationContext.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue());
|
||||
final boolean updateFieldNames = validationContext.getProperty(UPDATE_FIELD_NAMES).asBoolean();
|
||||
|
||||
if (!recordWriterFactorySet && updateFieldNames) {
|
||||
validationResults.add(new ValidationResult.Builder().subject(RECORD_WRITER_FACTORY.getDisplayName())
|
||||
.explanation("Record Writer must be set if 'Update Field Names' is true").valid(false).build());
|
||||
}
|
||||
final String tableManagementStrategy = validationContext.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue();
|
||||
final boolean managedTable;
|
||||
if (!ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) {
|
||||
managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy);
|
||||
// Ensure valid configuration for external tables
|
||||
if (createIfNotExists && !managedTable && !validationContext.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) {
|
||||
validationResults.add(new ValidationResult.Builder().subject(EXTERNAL_TABLE_LOCATION.getDisplayName())
|
||||
.explanation("External Table Location must be set when Table Management Strategy is set to External").valid(false).build());
|
||||
}
|
||||
}
|
||||
return validationResults;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
|
||||
|
||||
FlowFile flowFile = session.get();
|
||||
if (flowFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
|
||||
final RecordSetWriterFactory recordWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
|
||||
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
|
||||
final String partitionClauseString = context.getProperty(PARTITION_CLAUSE).evaluateAttributeExpressions(flowFile).getValue();
|
||||
List<String> partitionClauseElements = null;
|
||||
if (!StringUtils.isEmpty(partitionClauseString)) {
|
||||
partitionClauseElements = Arrays.stream(partitionClauseString.split(",")).filter(Objects::nonNull).map(String::trim).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
final ComponentLog log = getLogger();
|
||||
|
||||
try {
|
||||
final RecordReader reader;
|
||||
|
||||
try (final InputStream in = session.read(flowFile)) {
|
||||
// if we fail to create the RecordReader then we want to route to failure, so we need to
|
||||
// handle this separately from the other IOExceptions which normally route to retry
|
||||
try {
|
||||
reader = recordReaderFactory.createRecordReader(flowFile, in, getLogger());
|
||||
} catch (Exception e) {
|
||||
throw new RecordReaderFactoryException("Unable to create RecordReader", e);
|
||||
}
|
||||
} catch (RecordReaderFactoryException rrfe) {
|
||||
log.error(
|
||||
"Failed to create {} for {} - routing to failure",
|
||||
new Object[]{RecordReader.class.getSimpleName(), flowFile},
|
||||
rrfe
|
||||
);
|
||||
// Since we are wrapping the exceptions above there should always be a cause
|
||||
// but it's possible it might not have a message. This handles that by logging
|
||||
// the name of the class thrown.
|
||||
Throwable c = rrfe.getCause();
|
||||
if (c != null) {
|
||||
session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown");
|
||||
} else {
|
||||
session.putAttribute(flowFile, "record.error.message", rrfe.getClass().getCanonicalName() + " Thrown");
|
||||
}
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
|
||||
RecordSchema recordSchema = reader.getSchema();
|
||||
|
||||
final boolean createIfNotExists = context.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue());
|
||||
final boolean updateFieldNames = context.getProperty(UPDATE_FIELD_NAMES).asBoolean();
|
||||
if (recordWriterFactory == null && updateFieldNames) {
|
||||
throw new ProcessException("Record Writer must be set if 'Update Field Names' is true");
|
||||
}
|
||||
final String tableManagementStrategy = context.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue();
|
||||
final boolean managedTable;
|
||||
if (ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) {
|
||||
String tableManagementStrategyAttribute = flowFile.getAttribute(TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE);
|
||||
if (MANAGED_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) {
|
||||
managedTable = true;
|
||||
} else if (EXTERNAL_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) {
|
||||
managedTable = false;
|
||||
} else {
|
||||
log.error("The '{}' attribute either does not exist or has invalid value: {}. Must be one of (ignoring case): Managed, External. "
|
||||
+ "Routing flowfile to failure",
|
||||
new Object[]{TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE, tableManagementStrategyAttribute});
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy);
|
||||
}
|
||||
|
||||
// Ensure valid configuration for external tables
|
||||
if (createIfNotExists && !managedTable && !context.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) {
|
||||
throw new IOException("External Table Location must be set when Table Management Strategy is set to External");
|
||||
}
|
||||
final String externalTableLocation = managedTable ? null : context.getProperty(EXTERNAL_TABLE_LOCATION).evaluateAttributeExpressions(flowFile).getValue();
|
||||
if (!managedTable && StringUtils.isEmpty(externalTableLocation)) {
|
||||
log.error("External Table Location has invalid value: {}. Routing flowfile to failure", new Object[]{externalTableLocation});
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
final String storageFormat = context.getProperty(TABLE_STORAGE_FORMAT).getValue();
|
||||
final HiveDBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(HiveDBCPService.class);
|
||||
try (final Connection connection = dbcpService.getConnection()) {
|
||||
final Map<String,String> attributes = new HashMap<>(flowFile.getAttributes());
|
||||
OutputMetadataHolder outputMetadataHolder = checkAndUpdateTableSchema(attributes, connection, recordSchema, tableName, partitionClauseElements,
|
||||
createIfNotExists, externalTableLocation, storageFormat, updateFieldNames);
|
||||
if (outputMetadataHolder != null) {
|
||||
// The output schema changed (i.e. field names were updated), so write out the corresponding FlowFile
|
||||
try {
|
||||
final FlowFile inputFlowFile = flowFile;
|
||||
flowFile = session.write(flowFile, (in, out) -> {
|
||||
|
||||
// if we fail to create the RecordReader then we want to route to failure, so we need to
|
||||
// handle this separately from the other IOExceptions which normally route to retry
|
||||
final RecordReader recordReader;
|
||||
final RecordSetWriter recordSetWriter;
|
||||
try {
|
||||
recordReader = recordReaderFactory.createRecordReader(inputFlowFile, in, getLogger());
|
||||
recordSetWriter = recordWriterFactory.createWriter(getLogger(), outputMetadataHolder.getOutputSchema(), out, attributes);
|
||||
} catch (Exception e) {
|
||||
if(e instanceof IOException) {
|
||||
throw (IOException) e;
|
||||
}
|
||||
throw new IOException(new RecordReaderFactoryException("Unable to create RecordReader", e));
|
||||
}
|
||||
|
||||
WriteResult writeResult = updateRecords(recordSchema, outputMetadataHolder, recordReader, recordSetWriter);
|
||||
recordSetWriter.flush();
|
||||
recordSetWriter.close();
|
||||
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
|
||||
attributes.put(CoreAttributes.MIME_TYPE.key(), recordSetWriter.getMimeType());
|
||||
attributes.putAll(writeResult.getAttributes());
|
||||
});
|
||||
} catch (final Exception e) {
|
||||
getLogger().error("Failed to process {}; will route to failure", new Object[]{flowFile, e});
|
||||
// Since we are wrapping the exceptions above there should always be a cause
|
||||
// but it's possible it might not have a message. This handles that by logging
|
||||
// the name of the class thrown.
|
||||
Throwable c = e.getCause();
|
||||
if (c != null) {
|
||||
session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown");
|
||||
} else {
|
||||
session.putAttribute(flowFile, "record.error.message", e.getClass().getCanonicalName() + " Thrown");
|
||||
}
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
attributes.put(ATTR_OUTPUT_TABLE, tableName);
|
||||
flowFile = session.putAllAttributes(flowFile, attributes);
|
||||
session.getProvenanceReporter().invokeRemoteProcess(flowFile, dbcpService.getConnectionURL());
|
||||
session.transfer(flowFile, REL_SUCCESS);
|
||||
}
|
||||
} catch (IOException | SQLException e) {
|
||||
|
||||
flowFile = session.putAttribute(flowFile, ATTR_OUTPUT_TABLE, tableName);
|
||||
log.error("Exception while processing {} - routing to failure", new Object[]{flowFile}, e);
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
|
||||
} catch (DiscontinuedException e) {
|
||||
// The input FlowFile processing is discontinued. Keep it in the input queue.
|
||||
getLogger().warn("Discontinued processing for {} due to {}", new Object[]{flowFile, e}, e);
|
||||
session.transfer(flowFile, Relationship.SELF);
|
||||
} catch (Throwable t) {
|
||||
throw (t instanceof ProcessException) ? (ProcessException) t : new ProcessException(t);
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized OutputMetadataHolder checkAndUpdateTableSchema(Map<String,String> attributes, final Connection conn, final RecordSchema schema,
|
||||
final String tableName, List<String> partitionClause, final boolean createIfNotExists,
|
||||
final String externalTableLocation, final String storageFormat, final boolean updateFieldNames) throws IOException {
|
||||
// Read in the current table metadata, compare it to the reader's schema, and
|
||||
// add any columns from the schema that are missing in the table
|
||||
try (Statement s = conn.createStatement()) {
|
||||
// Determine whether the table exists
|
||||
ResultSet tables = s.executeQuery("SHOW TABLES");
|
||||
List<String> tableNames = new ArrayList<>();
|
||||
String hiveTableName;
|
||||
while (tables.next() && StringUtils.isNotEmpty(hiveTableName = tables.getString(1))) {
|
||||
tableNames.add(hiveTableName);
|
||||
}
|
||||
|
||||
List<String> columnsToAdd = new ArrayList<>();
|
||||
String outputPath;
|
||||
boolean tableCreated = false;
|
||||
if (!tableNames.contains(tableName) && createIfNotExists) {
|
||||
StringBuilder createTableStatement = new StringBuilder();
|
||||
for (RecordField recordField : schema.getFields()) {
|
||||
String recordFieldName = recordField.getFieldName();
|
||||
// The field does not exist in the table, add it
|
||||
columnsToAdd.add("`" + recordFieldName + "` " + NiFiOrcUtils.getHiveTypeFromFieldType(recordField.getDataType(), true));
|
||||
getLogger().debug("Adding column " + recordFieldName + " to table " + tableName);
|
||||
}
|
||||
|
||||
// Handle partition clause
|
||||
if (partitionClause == null) {
|
||||
partitionClause = Collections.emptyList();
|
||||
}
|
||||
List<String> validatedPartitionClause = new ArrayList<>(partitionClause.size());
|
||||
for (String partition : partitionClause) {
|
||||
String[] partitionInfo = partition.split(" ");
|
||||
if (partitionInfo.length != 2) {
|
||||
validatedPartitionClause.add("`" + partitionInfo[0] + "` string");
|
||||
} else {
|
||||
validatedPartitionClause.add("`" + partitionInfo[0] + "` " + partitionInfo[1]);
|
||||
}
|
||||
}
|
||||
|
||||
createTableStatement.append("CREATE ")
|
||||
.append(externalTableLocation == null ? "" : "EXTERNAL ")
|
||||
.append("TABLE IF NOT EXISTS `")
|
||||
.append(tableName)
|
||||
.append("` (")
|
||||
.append(String.join(", ", columnsToAdd))
|
||||
.append(") ")
|
||||
.append(validatedPartitionClause.isEmpty() ? "" : "PARTITIONED BY (" + String.join(", ", validatedPartitionClause) + ") ")
|
||||
.append("STORED AS ")
|
||||
.append(storageFormat)
|
||||
.append(externalTableLocation == null ? "" : " LOCATION '" + externalTableLocation + "'");
|
||||
|
||||
String createTableSql = createTableStatement.toString();
|
||||
|
||||
if (StringUtils.isNotEmpty(createTableSql)) {
|
||||
// Perform the table create
|
||||
getLogger().info("Executing Hive DDL: " + createTableSql);
|
||||
s.execute(createTableSql);
|
||||
}
|
||||
|
||||
tableCreated = true;
|
||||
}
|
||||
|
||||
// Process the table (columns, partitions, location, etc.)
|
||||
List<String> hiveColumns = new ArrayList<>();
|
||||
|
||||
String describeTable = "DESC FORMATTED `" + tableName + "`";
|
||||
ResultSet tableInfo = s.executeQuery(describeTable);
|
||||
// Result is 3 columns, col_name, data_type, comment. Check the first row for a header and skip if so, otherwise add column name
|
||||
tableInfo.next();
|
||||
String columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) {
|
||||
hiveColumns.add(columnName);
|
||||
}
|
||||
// If the column was a header, check for a blank line to follow and skip it, otherwise add the column name
|
||||
if (columnName.startsWith("#")) {
|
||||
tableInfo.next();
|
||||
columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName)) {
|
||||
hiveColumns.add(columnName);
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all column names
|
||||
while (tableInfo.next() && StringUtils.isNotEmpty(columnName = tableInfo.getString(1))) {
|
||||
hiveColumns.add(columnName);
|
||||
}
|
||||
|
||||
// Collect all partition columns
|
||||
boolean moreRows = true;
|
||||
boolean headerFound = false;
|
||||
while (moreRows && !headerFound) {
|
||||
String line = tableInfo.getString(1);
|
||||
if ("# Partition Information".equals(line)) {
|
||||
headerFound = true;
|
||||
} else if ("# Detailed Table Information".equals(line)) {
|
||||
// Not partitioned, exit the loop with headerFound = false
|
||||
break;
|
||||
}
|
||||
moreRows = tableInfo.next();
|
||||
}
|
||||
|
||||
List<String> partitionColumns = new ArrayList<>();
|
||||
List<String> partitionColumnsEqualsValueList = new ArrayList<>();
|
||||
List<String> partitionColumnsLocationList = new ArrayList<>();
|
||||
if (headerFound) {
|
||||
// If the table is partitioned, construct the partition=value strings for each partition column
|
||||
String partitionColumnName;
|
||||
columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) {
|
||||
partitionColumns.add(columnName);
|
||||
}
|
||||
// If the column was a header, check for a blank line to follow and skip it, otherwise add the column name
|
||||
if (columnName.startsWith("#")) {
|
||||
tableInfo.next();
|
||||
columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName)) {
|
||||
partitionColumns.add(columnName);
|
||||
}
|
||||
}
|
||||
while (tableInfo.next() && StringUtils.isNotEmpty(partitionColumnName = tableInfo.getString(1))) {
|
||||
partitionColumns.add(partitionColumnName);
|
||||
}
|
||||
|
||||
final int partitionColumnsSize = partitionColumns.size();
|
||||
final int partitionClauseSize = (partitionClause == null) ? 0 : partitionClause.size();
|
||||
if (partitionClauseSize != partitionColumnsSize) {
|
||||
throw new IOException("Found " + partitionColumnsSize + " partition columns but " + partitionClauseSize + " partition values were supplied");
|
||||
}
|
||||
|
||||
for (int i = 0; i < partitionClauseSize; i++) {
|
||||
String partitionName = partitionClause.get(i).split(" ")[0];
|
||||
String partitionValue = attributes.get(partitionName);
|
||||
if (StringUtils.isEmpty(partitionValue)) {
|
||||
throw new IOException("No value found for partition value attribute '" + partitionName + "'");
|
||||
}
|
||||
if (!partitionColumns.contains(partitionName)) {
|
||||
throw new IOException("Cannot add partition '" + partitionName + "' to existing table");
|
||||
}
|
||||
partitionColumnsEqualsValueList.add("`" + partitionName + "`='" + partitionValue + "'");
|
||||
// Add unquoted version for the output path
|
||||
partitionColumnsLocationList.add(partitionName + "=" + partitionValue);
|
||||
}
|
||||
}
|
||||
|
||||
// Get table location
|
||||
moreRows = true;
|
||||
headerFound = false;
|
||||
while (moreRows && !headerFound) {
|
||||
String line = tableInfo.getString(1);
|
||||
if (line.startsWith("Location:")) {
|
||||
headerFound = true;
|
||||
continue; // Don't do a next() here, need to get the second column value
|
||||
}
|
||||
moreRows = tableInfo.next();
|
||||
}
|
||||
String tableLocation = tableInfo.getString(2);
|
||||
|
||||
String alterTableSql;
|
||||
// If the table wasn't newly created, alter it accordingly
|
||||
if (!tableCreated) {
|
||||
StringBuilder alterTableStatement = new StringBuilder();
|
||||
// Handle new columns
|
||||
for (RecordField recordField : schema.getFields()) {
|
||||
String recordFieldName = recordField.getFieldName().toLowerCase();
|
||||
if (!hiveColumns.contains(recordFieldName) && !partitionColumns.contains(recordFieldName)) {
|
||||
// The field does not exist in the table (and is not a partition column), add it
|
||||
columnsToAdd.add("`" + recordFieldName + "` " + NiFiOrcUtils.getHiveTypeFromFieldType(recordField.getDataType(), true));
|
||||
getLogger().info("Adding column " + recordFieldName + " to table " + tableName);
|
||||
}
|
||||
}
|
||||
|
||||
if (!columnsToAdd.isEmpty()) {
|
||||
alterTableStatement.append("ALTER TABLE `")
|
||||
.append(tableName)
|
||||
.append("` ADD COLUMNS (")
|
||||
.append(String.join(", ", columnsToAdd))
|
||||
.append(")");
|
||||
|
||||
alterTableSql = alterTableStatement.toString();
|
||||
if (StringUtils.isNotEmpty(alterTableSql)) {
|
||||
// Perform the table update
|
||||
getLogger().info("Executing Hive DDL: " + alterTableSql);
|
||||
s.execute(alterTableSql);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outputPath = tableLocation;
|
||||
|
||||
// Handle new partition values
|
||||
if (!partitionColumnsEqualsValueList.isEmpty()) {
|
||||
alterTableSql = "ALTER TABLE `" +
|
||||
tableName +
|
||||
"` ADD IF NOT EXISTS PARTITION (" +
|
||||
String.join(", ", partitionColumnsEqualsValueList) +
|
||||
")";
|
||||
if (StringUtils.isNotEmpty(alterTableSql)) {
|
||||
// Perform the table update
|
||||
getLogger().info("Executing Hive DDL: " + alterTableSql);
|
||||
s.execute(alterTableSql);
|
||||
}
|
||||
// Add attribute for HDFS location of the partition values
|
||||
outputPath = tableLocation + "/" + String.join("/", partitionColumnsLocationList);
|
||||
}
|
||||
|
||||
// If updating field names, return a new RecordSchema, otherwise return null
|
||||
OutputMetadataHolder outputMetadataHolder;
|
||||
if (updateFieldNames) {
|
||||
List<RecordField> inputRecordFields = schema.getFields();
|
||||
List<RecordField> outputRecordFields = new ArrayList<>();
|
||||
Map<String,String> fieldMap = new HashMap<>();
|
||||
boolean needsUpdating = false;
|
||||
|
||||
for (RecordField inputRecordField : inputRecordFields) {
|
||||
final String inputRecordFieldName = inputRecordField.getFieldName();
|
||||
boolean found = false;
|
||||
for (String hiveColumnName : hiveColumns) {
|
||||
if (inputRecordFieldName.equalsIgnoreCase(hiveColumnName)) {
|
||||
// Set a flag if the field name doesn't match the column name exactly. This overall flag will determine whether
|
||||
// the records need updating (if true) or not (if false)
|
||||
if (!inputRecordFieldName.equals(hiveColumnName)) {
|
||||
needsUpdating = true;
|
||||
}
|
||||
fieldMap.put(inputRecordFieldName, hiveColumnName);
|
||||
outputRecordFields.add(new RecordField(hiveColumnName, inputRecordField.getDataType(), inputRecordField.getDefaultValue(), inputRecordField.isNullable()));
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
// If the input field wasn't a Hive table column, add it back to the schema as-is
|
||||
fieldMap.put(inputRecordFieldName, inputRecordFieldName);
|
||||
}
|
||||
}
|
||||
outputMetadataHolder = needsUpdating ? new OutputMetadataHolder(new SimpleRecordSchema(outputRecordFields), fieldMap)
|
||||
: null;
|
||||
} else {
|
||||
outputMetadataHolder = null;
|
||||
}
|
||||
attributes.put(ATTR_OUTPUT_PATH, outputPath);
|
||||
return outputMetadataHolder;
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized WriteResult updateRecords(final RecordSchema inputRecordSchema, final OutputMetadataHolder outputMetadataHolder,
|
||||
final RecordReader reader, final RecordSetWriter writer) throws IOException {
|
||||
try {
|
||||
writer.beginRecordSet();
|
||||
Record inputRecord;
|
||||
while((inputRecord = reader.nextRecord()) != null) {
|
||||
List<RecordField> inputRecordFields = inputRecordSchema.getFields();
|
||||
Map<String,Object> outputRecordFields = new HashMap<>(inputRecordFields.size());
|
||||
// Copy values from input field name to output field name
|
||||
for(Map.Entry<String,String> mapping : outputMetadataHolder.getFieldMap().entrySet()) {
|
||||
outputRecordFields.put(mapping.getValue(), inputRecord.getValue(mapping.getKey()));
|
||||
}
|
||||
Record outputRecord = new MapRecord(outputMetadataHolder.getOutputSchema(), outputRecordFields);
|
||||
writer.write(outputRecord);
|
||||
}
|
||||
return writer.finishRecordSet();
|
||||
|
||||
} catch (MalformedRecordException mre) {
|
||||
throw new IOException("Error reading records: "+mre.getMessage(), mre);
|
||||
}
|
||||
}
|
||||
|
||||
private static class OutputMetadataHolder {
|
||||
private final RecordSchema outputSchema;
|
||||
private final Map<String,String> fieldMap;
|
||||
|
||||
public OutputMetadataHolder(RecordSchema outputSchema, Map<String, String> fieldMap) {
|
||||
this.outputSchema = outputSchema;
|
||||
this.fieldMap = fieldMap;
|
||||
}
|
||||
|
||||
public RecordSchema getOutputSchema() {
|
||||
return outputSchema;
|
||||
}
|
||||
|
||||
public Map<String, String> getFieldMap() {
|
||||
return fieldMap;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
public class AuthenticationFailedException extends Exception {
|
||||
public AuthenticationFailedException(String reason, Exception cause) {
|
||||
super(reason, cause);
|
||||
}
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
public class CsvOutputOptions {
|
||||
|
||||
private boolean header = true;
|
||||
private String altHeader = null;
|
||||
private String delimiter = ",";
|
||||
private boolean quote = false;
|
||||
private boolean escape = true;
|
||||
|
||||
private int maxRowsPerFlowFile = 0;
|
||||
|
||||
public boolean isHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public String getAltHeader() {
|
||||
return altHeader;
|
||||
}
|
||||
|
||||
|
||||
public String getDelimiter() {
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
|
||||
public boolean isQuote() {
|
||||
return quote;
|
||||
}
|
||||
|
||||
public boolean isEscape() {
|
||||
return escape;
|
||||
}
|
||||
|
||||
public int getMaxRowsPerFlowFile() {
|
||||
return maxRowsPerFlowFile;
|
||||
}
|
||||
|
||||
public CsvOutputOptions(boolean header, String altHeader, String delimiter, boolean quote, boolean escape, int maxRowsPerFlowFile) {
|
||||
this.header = header;
|
||||
this.altHeader = altHeader;
|
||||
this.delimiter = delimiter;
|
||||
this.quote = quote;
|
||||
this.escape = escape;
|
||||
this.maxRowsPerFlowFile = maxRowsPerFlowFile;
|
||||
}
|
||||
}
|
|
@ -1,136 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.hadoop.KerberosProperties;
|
||||
import org.apache.nifi.hadoop.SecurityUtil;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.security.krb.KerberosUser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
public class HiveConfigurator {
|
||||
|
||||
public Collection<ValidationResult> validate(String configFiles, String principal, String keyTab, String password,
|
||||
AtomicReference<ValidationResources> validationResourceHolder, ComponentLog log) {
|
||||
|
||||
final List<ValidationResult> problems = new ArrayList<>();
|
||||
ValidationResources resources = validationResourceHolder.get();
|
||||
|
||||
// if no resources in the holder, or if the holder has different resources loaded,
|
||||
// then load the Configuration and set the new resources in the holder
|
||||
if (resources == null || !configFiles.equals(resources.getConfigResources())) {
|
||||
log.debug("Reloading validation resources");
|
||||
resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles));
|
||||
validationResourceHolder.set(resources);
|
||||
}
|
||||
|
||||
final Configuration hiveConfig = resources.getConfiguration();
|
||||
|
||||
problems.addAll(KerberosProperties.validatePrincipalWithKeytabOrPassword(this.getClass().getSimpleName(), hiveConfig, principal, keyTab, password, log));
|
||||
|
||||
return problems;
|
||||
}
|
||||
|
||||
public HiveConf getConfigurationFromFiles(final String configFiles) {
|
||||
final HiveConf hiveConfig = new HiveConf();
|
||||
if (StringUtils.isNotBlank(configFiles)) {
|
||||
for (final String configFile : configFiles.split(",")) {
|
||||
hiveConfig.addResource(new Path(configFile.trim()));
|
||||
}
|
||||
}
|
||||
return hiveConfig;
|
||||
}
|
||||
|
||||
public void preload(Configuration configuration) {
|
||||
try {
|
||||
FileSystem.get(configuration).close();
|
||||
UserGroupInformation.setConfiguration(configuration);
|
||||
} catch (IOException ioe) {
|
||||
// Suppress exception as future uses of this configuration will fail
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquires a {@link UserGroupInformation} using the given {@link Configuration} and {@link KerberosUser}.
|
||||
* @see SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser)
|
||||
* @param hiveConfig The Configuration to apply to the acquired UserGroupInformation
|
||||
* @param kerberosUser The KerberosUser to authenticate
|
||||
* @return A UserGroupInformation instance created using the Subject of the given KerberosUser
|
||||
* @throws AuthenticationFailedException if authentication fails
|
||||
*/
|
||||
public UserGroupInformation authenticate(final Configuration hiveConfig, KerberosUser kerberosUser) throws AuthenticationFailedException {
|
||||
try {
|
||||
return SecurityUtil.getUgiForKerberosUser(hiveConfig, kerberosUser);
|
||||
} catch (IOException ioe) {
|
||||
throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* As of Apache NiFi 1.5.0, due to changes made to
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this
|
||||
* class to authenticate a principal with Kerberos, Hive controller services no longer
|
||||
* attempt relogins explicitly. For more information, please read the documentation for
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
|
||||
* <p/>
|
||||
* In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
|
||||
* {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
|
||||
* controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions
|
||||
* with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
|
||||
* {@link UserGroupInformation} instance. One of these threads could leave the
|
||||
* {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
|
||||
* while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
|
||||
* authentication attempts that would leave the Hive controller service in an unrecoverable state.
|
||||
*
|
||||
* @see SecurityUtil#loginKerberos(Configuration, String, String)
|
||||
* @deprecated Use {@link SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser)}
|
||||
*/
|
||||
@Deprecated
|
||||
public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab) throws AuthenticationFailedException {
|
||||
UserGroupInformation ugi;
|
||||
try {
|
||||
ugi = SecurityUtil.loginKerberos(hiveConfig, principal, keyTab);
|
||||
} catch (IOException ioe) {
|
||||
throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe);
|
||||
}
|
||||
return ugi;
|
||||
}
|
||||
|
||||
/**
|
||||
* As of Apache NiFi 1.5.0, this method has been deprecated and is now a wrapper
|
||||
* method which invokes {@link HiveConfigurator#authenticate(Configuration, String, String)}. It will no longer start a
|
||||
* {@link org.apache.nifi.hadoop.KerberosTicketRenewer} to perform explicit relogins.
|
||||
*
|
||||
* @see HiveConfigurator#authenticate(Configuration, String, String)
|
||||
*/
|
||||
@Deprecated
|
||||
public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab, long ticketRenewalPeriod) throws AuthenticationFailedException {
|
||||
return authenticate(hiveConfig, principal, keyTab);
|
||||
}
|
||||
}
|
|
@ -1,462 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.SchemaBuilder;
|
||||
import org.apache.avro.SchemaBuilder.FieldAssembler;
|
||||
import org.apache.avro.file.DataFileWriter;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.io.DatumWriter;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.text.StringEscapeUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.ResultSetMetaData;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static java.sql.Types.ARRAY;
|
||||
import static java.sql.Types.BIGINT;
|
||||
import static java.sql.Types.BINARY;
|
||||
import static java.sql.Types.BIT;
|
||||
import static java.sql.Types.BLOB;
|
||||
import static java.sql.Types.BOOLEAN;
|
||||
import static java.sql.Types.CHAR;
|
||||
import static java.sql.Types.CLOB;
|
||||
import static java.sql.Types.DATE;
|
||||
import static java.sql.Types.DECIMAL;
|
||||
import static java.sql.Types.DOUBLE;
|
||||
import static java.sql.Types.FLOAT;
|
||||
import static java.sql.Types.INTEGER;
|
||||
import static java.sql.Types.JAVA_OBJECT;
|
||||
import static java.sql.Types.LONGNVARCHAR;
|
||||
import static java.sql.Types.LONGVARBINARY;
|
||||
import static java.sql.Types.LONGVARCHAR;
|
||||
import static java.sql.Types.NCHAR;
|
||||
import static java.sql.Types.NUMERIC;
|
||||
import static java.sql.Types.NVARCHAR;
|
||||
import static java.sql.Types.OTHER;
|
||||
import static java.sql.Types.REAL;
|
||||
import static java.sql.Types.ROWID;
|
||||
import static java.sql.Types.SMALLINT;
|
||||
import static java.sql.Types.SQLXML;
|
||||
import static java.sql.Types.STRUCT;
|
||||
import static java.sql.Types.TIME;
|
||||
import static java.sql.Types.TIMESTAMP;
|
||||
import static java.sql.Types.TINYINT;
|
||||
import static java.sql.Types.VARBINARY;
|
||||
import static java.sql.Types.VARCHAR;
|
||||
|
||||
/**
|
||||
* JDBC / HiveQL common functions.
|
||||
*/
|
||||
public class HiveJdbcCommon {
|
||||
|
||||
public static final String AVRO = "Avro";
|
||||
public static final String CSV = "CSV";
|
||||
|
||||
public static final String MIME_TYPE_AVRO_BINARY = "application/avro-binary";
|
||||
public static final String CSV_MIME_TYPE = "text/csv";
|
||||
|
||||
|
||||
public static final PropertyDescriptor NORMALIZE_NAMES_FOR_AVRO = new PropertyDescriptor.Builder()
|
||||
.name("hive-normalize-avro")
|
||||
.displayName("Normalize Table/Column Names")
|
||||
.description("Whether to change non-Avro-compatible characters in column names to Avro-compatible characters. For example, colons and periods "
|
||||
+ "will be changed to underscores in order to build a valid Avro record.")
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("false")
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final int maxRows, boolean convertNames) throws SQLException, IOException {
|
||||
return convertToAvroStream(rs, outStream, null, maxRows, convertNames, null);
|
||||
}
|
||||
|
||||
|
||||
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, String recordName, final int maxRows, boolean convertNames, ResultSetRowCallback callback)
|
||||
throws SQLException, IOException {
|
||||
final Schema schema = createSchema(rs, recordName, convertNames);
|
||||
final GenericRecord rec = new GenericData.Record(schema);
|
||||
|
||||
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
|
||||
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
|
||||
dataFileWriter.create(schema, outStream);
|
||||
|
||||
final ResultSetMetaData meta = rs.getMetaData();
|
||||
final int nrOfColumns = meta.getColumnCount();
|
||||
long nrOfRows = 0;
|
||||
while (rs.next()) {
|
||||
if (callback != null) {
|
||||
callback.processRow(rs);
|
||||
}
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
final int javaSqlType = meta.getColumnType(i);
|
||||
Object value = rs.getObject(i);
|
||||
|
||||
if (value == null) {
|
||||
rec.put(i - 1, null);
|
||||
|
||||
} else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == BLOB || javaSqlType == CLOB) {
|
||||
// bytes requires little bit different handling
|
||||
ByteBuffer bb = null;
|
||||
if (value instanceof byte[]) {
|
||||
bb = ByteBuffer.wrap((byte[]) value);
|
||||
} else if (value instanceof ByteBuffer) {
|
||||
bb = (ByteBuffer) value;
|
||||
}
|
||||
if (bb != null) {
|
||||
rec.put(i - 1, bb);
|
||||
} else {
|
||||
throw new IOException("Could not process binary object of type " + value.getClass().getName());
|
||||
}
|
||||
|
||||
} else if (value instanceof Byte) {
|
||||
// tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT
|
||||
// But value is returned by JDBC as java.lang.Byte
|
||||
// (at least H2 JDBC works this way)
|
||||
// direct put to avro record results:
|
||||
// org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte
|
||||
rec.put(i - 1, ((Byte) value).intValue());
|
||||
|
||||
} else if (value instanceof BigDecimal || value instanceof BigInteger) {
|
||||
// Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38"
|
||||
rec.put(i - 1, value.toString());
|
||||
|
||||
} else if (value instanceof Number) {
|
||||
// Need to call the right getXYZ() method (instead of the getObject() method above), since Doubles are sometimes returned
|
||||
// when the JDBC type is 6 (Float) for example.
|
||||
if (javaSqlType == FLOAT) {
|
||||
value = rs.getFloat(i);
|
||||
} else if (javaSqlType == DOUBLE) {
|
||||
value = rs.getDouble(i);
|
||||
} else if (javaSqlType == INTEGER || javaSqlType == TINYINT || javaSqlType == SMALLINT) {
|
||||
value = rs.getInt(i);
|
||||
}
|
||||
|
||||
rec.put(i - 1, value);
|
||||
|
||||
} else if (value instanceof Boolean) {
|
||||
rec.put(i - 1, value);
|
||||
} else if (value instanceof java.sql.SQLXML) {
|
||||
rec.put(i - 1, ((java.sql.SQLXML) value).getString());
|
||||
} else {
|
||||
// The different types that we support are numbers (int, long, double, float),
|
||||
// as well as boolean values and Strings. Since Avro doesn't provide
|
||||
// timestamp types, we want to convert those to Strings. So we will cast anything other
|
||||
// than numbers or booleans to strings by using the toString() method.
|
||||
rec.put(i - 1, value.toString());
|
||||
}
|
||||
}
|
||||
dataFileWriter.append(rec);
|
||||
nrOfRows += 1;
|
||||
|
||||
if (maxRows > 0 && nrOfRows == maxRows)
|
||||
break;
|
||||
}
|
||||
|
||||
return nrOfRows;
|
||||
}
|
||||
}
|
||||
|
||||
public static Schema createSchema(final ResultSet rs, boolean convertNames) throws SQLException {
|
||||
return createSchema(rs, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an Avro schema from a result set. If the table/record name is known a priori and provided, use that as a
|
||||
* fallback for the record name if it cannot be retrieved from the result set, and finally fall back to a default value.
|
||||
*
|
||||
* @param rs The result set to convert to Avro
|
||||
* @param recordName The a priori record name to use if it cannot be determined from the result set.
|
||||
* @param convertNames Whether to convert column/table names to be legal Avro names
|
||||
* @return A Schema object representing the result set converted to an Avro record
|
||||
* @throws SQLException if any error occurs during conversion
|
||||
*/
|
||||
public static Schema createSchema(final ResultSet rs, String recordName, boolean convertNames) throws SQLException {
|
||||
final ResultSetMetaData meta = rs.getMetaData();
|
||||
final int nrOfColumns = meta.getColumnCount();
|
||||
String tableName = StringUtils.isEmpty(recordName) ? "NiFi_SelectHiveQL_Record" : recordName;
|
||||
try {
|
||||
if (nrOfColumns > 0) {
|
||||
// Hive JDBC doesn't support getTableName, instead it returns table.column for column name. Grab the table name from the first column
|
||||
String firstColumnNameFromMeta = meta.getColumnName(1);
|
||||
int tableNameDelimiter = firstColumnNameFromMeta.lastIndexOf(".");
|
||||
if (tableNameDelimiter > -1) {
|
||||
String tableNameFromMeta = firstColumnNameFromMeta.substring(0, tableNameDelimiter);
|
||||
if (!StringUtils.isBlank(tableNameFromMeta)) {
|
||||
tableName = tableNameFromMeta;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (SQLException se) {
|
||||
// Not all drivers support getTableName, so just use the previously-set default
|
||||
}
|
||||
|
||||
if (convertNames) {
|
||||
tableName = normalizeNameForAvro(tableName);
|
||||
}
|
||||
final FieldAssembler<Schema> builder = SchemaBuilder.record(tableName).namespace("any.data").fields();
|
||||
|
||||
/**
|
||||
* Some missing Avro types - Decimal, Date types. May need some additional work.
|
||||
*/
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
String columnNameFromMeta = meta.getColumnName(i);
|
||||
// Hive returns table.column for column name. Grab the column name as the string after the last period
|
||||
int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
|
||||
String columnName = columnNameFromMeta.substring(columnNameDelimiter + 1);
|
||||
switch (meta.getColumnType(i)) {
|
||||
case CHAR:
|
||||
case LONGNVARCHAR:
|
||||
case LONGVARCHAR:
|
||||
case NCHAR:
|
||||
case NVARCHAR:
|
||||
case VARCHAR:
|
||||
case ARRAY:
|
||||
case STRUCT:
|
||||
case JAVA_OBJECT:
|
||||
case OTHER:
|
||||
case SQLXML:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case BIT:
|
||||
case BOOLEAN:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case INTEGER:
|
||||
// Default to signed type unless otherwise noted. Some JDBC drivers don't implement isSigned()
|
||||
boolean signedType = true;
|
||||
try {
|
||||
signedType = meta.isSigned(i);
|
||||
} catch (SQLException se) {
|
||||
// Use signed types as default
|
||||
}
|
||||
if (signedType) {
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
|
||||
} else {
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
|
||||
}
|
||||
break;
|
||||
|
||||
case SMALLINT:
|
||||
case TINYINT:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case BIGINT:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
// java.sql.RowId is interface, is seems to be database
|
||||
// implementation specific, let's convert to String
|
||||
case ROWID:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case FLOAT:
|
||||
case REAL:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case DOUBLE:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
// Did not find direct suitable type, need to be clarified!!!!
|
||||
case DECIMAL:
|
||||
case NUMERIC:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
// Did not find direct suitable type, need to be clarified!!!!
|
||||
case DATE:
|
||||
case TIME:
|
||||
case TIMESTAMP:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case BINARY:
|
||||
case VARBINARY:
|
||||
case LONGVARBINARY:
|
||||
case BLOB:
|
||||
case CLOB:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type");
|
||||
}
|
||||
}
|
||||
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, CsvOutputOptions outputOptions) throws SQLException, IOException {
|
||||
return convertToCsvStream(rs, outStream, null, null, outputOptions);
|
||||
}
|
||||
|
||||
public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, String recordName, ResultSetRowCallback callback, CsvOutputOptions outputOptions)
|
||||
throws SQLException, IOException {
|
||||
|
||||
final ResultSetMetaData meta = rs.getMetaData();
|
||||
final int nrOfColumns = meta.getColumnCount();
|
||||
List<String> columnNames = new ArrayList<>(nrOfColumns);
|
||||
|
||||
if (outputOptions.isHeader()) {
|
||||
if (outputOptions.getAltHeader() == null) {
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
String columnNameFromMeta = meta.getColumnName(i);
|
||||
// Hive returns table.column for column name. Grab the column name as the string after the last period
|
||||
int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
|
||||
columnNames.add(columnNameFromMeta.substring(columnNameDelimiter + 1));
|
||||
}
|
||||
} else {
|
||||
String[] altHeaderNames = outputOptions.getAltHeader().split(",");
|
||||
columnNames = Arrays.asList(altHeaderNames);
|
||||
}
|
||||
}
|
||||
|
||||
// Write column names as header row
|
||||
outStream.write(StringUtils.join(columnNames, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
|
||||
if (outputOptions.isHeader()) {
|
||||
outStream.write("\n".getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
// Iterate over the rows
|
||||
int maxRows = outputOptions.getMaxRowsPerFlowFile();
|
||||
long nrOfRows = 0;
|
||||
while (rs.next()) {
|
||||
if (callback != null) {
|
||||
callback.processRow(rs);
|
||||
}
|
||||
List<String> rowValues = new ArrayList<>(nrOfColumns);
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
final int javaSqlType = meta.getColumnType(i);
|
||||
final Object value = rs.getObject(i);
|
||||
|
||||
switch (javaSqlType) {
|
||||
case CHAR:
|
||||
case LONGNVARCHAR:
|
||||
case LONGVARCHAR:
|
||||
case NCHAR:
|
||||
case NVARCHAR:
|
||||
case VARCHAR:
|
||||
String valueString = rs.getString(i);
|
||||
if (valueString != null) {
|
||||
// Removed extra quotes as those are a part of the escapeCsv when required.
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (outputOptions.isQuote()) {
|
||||
sb.append("\"");
|
||||
if (outputOptions.isEscape()) {
|
||||
sb.append(StringEscapeUtils.escapeCsv(valueString));
|
||||
} else {
|
||||
sb.append(valueString);
|
||||
}
|
||||
sb.append("\"");
|
||||
rowValues.add(sb.toString());
|
||||
} else {
|
||||
if (outputOptions.isEscape()) {
|
||||
rowValues.add(StringEscapeUtils.escapeCsv(valueString));
|
||||
} else {
|
||||
rowValues.add(valueString);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
break;
|
||||
case ARRAY:
|
||||
case STRUCT:
|
||||
case JAVA_OBJECT:
|
||||
String complexValueString = rs.getString(i);
|
||||
if (complexValueString != null) {
|
||||
rowValues.add(StringEscapeUtils.escapeCsv(complexValueString));
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
break;
|
||||
case SQLXML:
|
||||
if (value != null) {
|
||||
rowValues.add(StringEscapeUtils.escapeCsv(((java.sql.SQLXML) value).getString()));
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
default:
|
||||
if (value != null) {
|
||||
rowValues.add(value.toString());
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write row values
|
||||
outStream.write(StringUtils.join(rowValues, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
|
||||
outStream.write("\n".getBytes(StandardCharsets.UTF_8));
|
||||
nrOfRows++;
|
||||
|
||||
if (maxRows > 0 && nrOfRows == maxRows)
|
||||
break;
|
||||
}
|
||||
return nrOfRows;
|
||||
}
|
||||
|
||||
public static String normalizeNameForAvro(String inputName) {
|
||||
String normalizedName = inputName.replaceAll("[^A-Za-z0-9_]", "_");
|
||||
if (Character.isDigit(normalizedName.charAt(0))) {
|
||||
normalizedName = "_" + normalizedName;
|
||||
}
|
||||
return normalizedName;
|
||||
}
|
||||
|
||||
/**
|
||||
* An interface for callback methods which allows processing of a row during the convertToXYZStream() processing.
|
||||
* <b>IMPORTANT:</b> This method should only work on the row pointed at by the current ResultSet reference.
|
||||
* Advancing the cursor (e.g.) can cause rows to be skipped during Avro transformation.
|
||||
*/
|
||||
public interface ResultSetRowCallback {
|
||||
void processRow(ResultSet resultSet) throws IOException;
|
||||
}
|
||||
|
||||
public static Configuration getConfigurationFromFiles(final String configFiles) {
|
||||
final Configuration hiveConfig = new HiveConf();
|
||||
if (StringUtils.isNotBlank(configFiles)) {
|
||||
for (final String configFile : configFiles.split(",")) {
|
||||
hiveConfig.addResource(new Path(configFile.trim()));
|
||||
}
|
||||
}
|
||||
return hiveConfig;
|
||||
}
|
||||
}
|
|
@ -1,155 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
public class HiveOptions implements Serializable {
|
||||
/**
|
||||
* Half of the default Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS
|
||||
*/
|
||||
private static final int DEFAULT_TICK_TUPLE_INTERVAL_SECS = 15;
|
||||
|
||||
protected String databaseName;
|
||||
protected String tableName;
|
||||
protected String metaStoreURI;
|
||||
protected Integer txnsPerBatch = 100;
|
||||
protected Integer maxOpenConnections = 10;
|
||||
protected Integer batchSize = 15000;
|
||||
protected Integer idleTimeout = 60000;
|
||||
protected Integer callTimeout = 0;
|
||||
protected Integer heartBeatInterval = 60;
|
||||
protected Boolean autoCreatePartitions = true;
|
||||
protected String kerberosPrincipal;
|
||||
protected String kerberosKeytab;
|
||||
protected Integer tickTupleInterval = DEFAULT_TICK_TUPLE_INTERVAL_SECS;
|
||||
|
||||
public HiveOptions(String metaStoreURI, String databaseName, String tableName) {
|
||||
this.metaStoreURI = metaStoreURI;
|
||||
this.databaseName = databaseName;
|
||||
this.tableName = tableName;
|
||||
}
|
||||
|
||||
public HiveOptions withTickTupleInterval(Integer tickInterval) {
|
||||
this.tickTupleInterval = tickInterval;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withTxnsPerBatch(Integer txnsPerBatch) {
|
||||
this.txnsPerBatch = txnsPerBatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withMaxOpenConnections(Integer maxOpenConnections) {
|
||||
this.maxOpenConnections = maxOpenConnections;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withBatchSize(Integer batchSize) {
|
||||
this.batchSize = batchSize;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withIdleTimeout(Integer idleTimeout) {
|
||||
this.idleTimeout = idleTimeout;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withCallTimeout(Integer callTimeout) {
|
||||
this.callTimeout = callTimeout;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withHeartBeatInterval(Integer heartBeatInterval) {
|
||||
this.heartBeatInterval = heartBeatInterval;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withAutoCreatePartitions(Boolean autoCreatePartitions) {
|
||||
this.autoCreatePartitions = autoCreatePartitions;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withKerberosKeytab(String kerberosKeytab) {
|
||||
this.kerberosKeytab = kerberosKeytab;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HiveOptions withKerberosPrincipal(String kerberosPrincipal) {
|
||||
this.kerberosPrincipal = kerberosPrincipal;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getMetaStoreURI() {
|
||||
return metaStoreURI;
|
||||
}
|
||||
|
||||
public String getDatabaseName() {
|
||||
return databaseName;
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return tableName;
|
||||
}
|
||||
|
||||
public String getQualifiedTableName() {
|
||||
return databaseName + "." + tableName;
|
||||
}
|
||||
|
||||
public Integer getBatchSize() {
|
||||
return batchSize;
|
||||
}
|
||||
|
||||
public Integer getCallTimeOut() {
|
||||
return callTimeout;
|
||||
}
|
||||
|
||||
public Integer getHeartBeatInterval() {
|
||||
return heartBeatInterval;
|
||||
}
|
||||
|
||||
public Integer getMaxOpenConnections() {
|
||||
return maxOpenConnections;
|
||||
}
|
||||
|
||||
public Integer getIdleTimeout() {
|
||||
return idleTimeout;
|
||||
}
|
||||
|
||||
public Integer getTxnsPerBatch() {
|
||||
return txnsPerBatch;
|
||||
}
|
||||
|
||||
public Boolean getAutoCreatePartitions() {
|
||||
return autoCreatePartitions;
|
||||
}
|
||||
|
||||
public String getKerberosPrincipal() {
|
||||
return kerberosPrincipal;
|
||||
}
|
||||
|
||||
public String getKerberosKeytab() {
|
||||
return kerberosKeytab;
|
||||
}
|
||||
|
||||
public Integer getTickTupleInterval() {
|
||||
return tickTupleInterval;
|
||||
}
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hive.hcatalog.streaming.ConnectionError;
|
||||
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
||||
public class HiveUtils {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HiveUtils.class);
|
||||
|
||||
public static HiveEndPoint makeEndPoint(List<String> partitionVals, HiveOptions options) throws ConnectionError {
|
||||
return new HiveEndPoint(options.getMetaStoreURI(), options.getDatabaseName(), options.getTableName(), partitionVals);
|
||||
}
|
||||
|
||||
public static HiveWriter makeHiveWriter(HiveEndPoint endPoint, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveOptions options, HiveConf hiveConf)
|
||||
throws HiveWriter.ConnectFailure, InterruptedException {
|
||||
return new HiveWriter(endPoint, options.getTxnsPerBatch(), options.getAutoCreatePartitions(),
|
||||
options.getCallTimeOut(), callTimeoutPool, ugi, hiveConf);
|
||||
}
|
||||
|
||||
public static void logAllHiveEndPoints(Map<HiveEndPoint, HiveWriter> allWriters) {
|
||||
for (Map.Entry<HiveEndPoint,HiveWriter> entry : allWriters.entrySet()) {
|
||||
LOG.info("cached writers {} ", entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,462 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
||||
import org.apache.hive.hcatalog.streaming.RecordWriter;
|
||||
import org.apache.hive.hcatalog.streaming.SerializationError;
|
||||
import org.apache.hive.hcatalog.streaming.StreamingConnection;
|
||||
import org.apache.hive.hcatalog.streaming.StreamingException;
|
||||
import org.apache.hive.hcatalog.streaming.StreamingIOFailure;
|
||||
import org.apache.hive.hcatalog.streaming.StrictJsonWriter;
|
||||
import org.apache.hive.hcatalog.streaming.TransactionBatch;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.UndeclaredThrowableException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
|
||||
public class HiveWriter {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HiveWriter.class);
|
||||
|
||||
private final HiveEndPoint endPoint;
|
||||
private final StreamingConnection connection;
|
||||
private final int txnsPerBatch;
|
||||
private final RecordWriter recordWriter;
|
||||
private final ExecutorService callTimeoutPool;
|
||||
private final long callTimeout;
|
||||
private final Object txnBatchLock = new Object();
|
||||
private final UserGroupInformation ugi;
|
||||
private TransactionBatch txnBatch;
|
||||
private long lastUsed; // time of last flush on this writer
|
||||
protected boolean closed; // flag indicating HiveWriter was closed
|
||||
private int totalRecords = 0;
|
||||
|
||||
public HiveWriter(HiveEndPoint endPoint, int txnsPerBatch, boolean autoCreatePartitions, long callTimeout, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveConf hiveConf)
|
||||
throws InterruptedException, ConnectFailure {
|
||||
try {
|
||||
this.ugi = ugi;
|
||||
this.callTimeout = callTimeout;
|
||||
this.callTimeoutPool = callTimeoutPool;
|
||||
this.endPoint = endPoint;
|
||||
this.connection = newConnection(endPoint, autoCreatePartitions, hiveConf, ugi);
|
||||
this.txnsPerBatch = txnsPerBatch;
|
||||
this.recordWriter = getRecordWriter(endPoint, ugi, hiveConf);
|
||||
this.txnBatch = nextTxnBatch(recordWriter);
|
||||
this.closed = false;
|
||||
this.lastUsed = System.currentTimeMillis();
|
||||
} catch (InterruptedException | RuntimeException | ConnectFailure e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw new ConnectFailure(endPoint, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected RecordWriter getRecordWriter(HiveEndPoint endPoint, UserGroupInformation ugi, HiveConf hiveConf) throws StreamingException, IOException, InterruptedException {
|
||||
if (ugi == null) {
|
||||
return new StrictJsonWriter(endPoint, hiveConf);
|
||||
} else {
|
||||
try {
|
||||
return ugi.doAs((PrivilegedExceptionAction<StrictJsonWriter>) () -> new StrictJsonWriter(endPoint, hiveConf));
|
||||
} catch (UndeclaredThrowableException e) {
|
||||
Throwable cause = e.getCause();
|
||||
if (cause instanceof StreamingException) {
|
||||
throw (StreamingException) cause;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{ endPoint = " + endPoint + ", TransactionBatch = " + txnBatch + " }";
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the record data to Hive
|
||||
*
|
||||
* @throws IOException if an error occurs during the write
|
||||
* @throws InterruptedException if the write operation is interrupted
|
||||
*/
|
||||
public synchronized void write(final byte[] record)
|
||||
throws WriteFailure, SerializationError, InterruptedException {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("This hive streaming writer was closed " +
|
||||
"and thus no longer able to write : " + endPoint);
|
||||
}
|
||||
// write the tuple
|
||||
try {
|
||||
LOG.debug("Writing event to {}", endPoint);
|
||||
callWithTimeout(new CallRunner<Void>() {
|
||||
@Override
|
||||
public Void call() throws StreamingException, InterruptedException {
|
||||
txnBatch.write(record);
|
||||
totalRecords++;
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (SerializationError se) {
|
||||
throw new SerializationError(endPoint.toString() + " SerializationError", se);
|
||||
} catch (StreamingException | TimeoutException e) {
|
||||
throw new WriteFailure(endPoint, txnBatch.getCurrentTxnId(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commits the current Txn if totalRecordsPerTransaction > 0 .
|
||||
* If 'rollToNext' is true, will switch to next Txn in batch or to a
|
||||
* new TxnBatch if current Txn batch is exhausted
|
||||
*/
|
||||
public void flush(boolean rollToNext)
|
||||
throws CommitFailure, TxnBatchFailure, TxnFailure, InterruptedException {
|
||||
// if there are no records do not call flush
|
||||
if (totalRecords <= 0) return;
|
||||
try {
|
||||
synchronized (txnBatchLock) {
|
||||
commitTxn();
|
||||
nextTxn(rollToNext);
|
||||
totalRecords = 0;
|
||||
lastUsed = System.currentTimeMillis();
|
||||
}
|
||||
} catch (StreamingException e) {
|
||||
throw new TxnFailure(txnBatch, e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Queues up a heartbeat request on the current and remaining txns using the
|
||||
* heartbeatThdPool and returns immediately
|
||||
*/
|
||||
public void heartBeat() throws InterruptedException {
|
||||
// 1) schedule the heartbeat on one thread in pool
|
||||
synchronized (txnBatchLock) {
|
||||
try {
|
||||
callWithTimeout(new CallRunner<Void>() {
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
try {
|
||||
LOG.info("Sending heartbeat on batch " + txnBatch);
|
||||
txnBatch.heartbeat();
|
||||
} catch (StreamingException e) {
|
||||
LOG.warn("Heartbeat error on batch " + txnBatch, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (InterruptedException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Unable to send heartbeat on Txn Batch " + txnBatch, e);
|
||||
// Suppressing exceptions as we don't care for errors on heartbeats
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns totalRecords written so far in a transaction
|
||||
* @returns totalRecords
|
||||
*/
|
||||
public int getTotalRecords() {
|
||||
return totalRecords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush and Close current transactionBatch.
|
||||
*/
|
||||
public void flushAndClose() throws TxnBatchFailure, TxnFailure, CommitFailure,
|
||||
IOException, InterruptedException {
|
||||
flush(false);
|
||||
close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the Transaction Batch and connection
|
||||
* @throws IOException if an error occurs during close
|
||||
* @throws InterruptedException if the close operation is interrupted
|
||||
*/
|
||||
public void close() throws IOException, InterruptedException {
|
||||
closeTxnBatch();
|
||||
closeConnection();
|
||||
closed = true;
|
||||
}
|
||||
|
||||
protected void closeConnection() throws InterruptedException {
|
||||
LOG.info("Closing connection to end point : {}", endPoint);
|
||||
try {
|
||||
callWithTimeout(new CallRunner<Void>() {
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
connection.close(); // could block
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Error closing connection to EndPoint : " + endPoint, e);
|
||||
// Suppressing exceptions as we don't care for errors on connection close
|
||||
}
|
||||
}
|
||||
|
||||
protected void commitTxn() throws CommitFailure, InterruptedException {
|
||||
LOG.debug("Committing Txn id {} to {}", txnBatch.getCurrentTxnId(), endPoint);
|
||||
try {
|
||||
callWithTimeout(new CallRunner<Void>() {
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
txnBatch.commit(); // could block
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (StreamingException | TimeoutException e) {
|
||||
throw new CommitFailure(endPoint, txnBatch.getCurrentTxnId(), e);
|
||||
}
|
||||
}
|
||||
|
||||
protected StreamingConnection newConnection(HiveEndPoint endPoint, boolean autoCreatePartitions, HiveConf conf, UserGroupInformation ugi) throws InterruptedException, ConnectFailure {
|
||||
try {
|
||||
return callWithTimeout(() -> {
|
||||
return endPoint.newConnection(autoCreatePartitions, conf, ugi); // could block
|
||||
});
|
||||
} catch (StreamingException | TimeoutException e) {
|
||||
throw new ConnectFailure(endPoint, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected TransactionBatch nextTxnBatch(final RecordWriter recordWriter)
|
||||
throws InterruptedException, TxnBatchFailure {
|
||||
LOG.debug("Fetching new Txn Batch for {}", endPoint);
|
||||
TransactionBatch batch = null;
|
||||
try {
|
||||
batch = callWithTimeout(() -> {
|
||||
return connection.fetchTransactionBatch(txnsPerBatch, recordWriter); // could block
|
||||
});
|
||||
batch.beginNextTransaction();
|
||||
LOG.debug("Acquired {}. Switching to first txn", batch);
|
||||
} catch (TimeoutException | StreamingException e) {
|
||||
throw new TxnBatchFailure(endPoint, e);
|
||||
}
|
||||
return batch;
|
||||
}
|
||||
|
||||
protected void closeTxnBatch() throws InterruptedException {
|
||||
try {
|
||||
LOG.debug("Closing Txn Batch {}", txnBatch);
|
||||
callWithTimeout(new CallRunner<Void>() {
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
if (txnBatch != null) {
|
||||
txnBatch.close(); // could block
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (InterruptedException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Error closing txn batch " + txnBatch, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Aborts the current Txn and switches to next Txn.
|
||||
* @throws StreamingException if could not get new Transaction Batch, or switch to next Txn
|
||||
*/
|
||||
public void abort() throws StreamingException, TxnBatchFailure, InterruptedException {
|
||||
synchronized (txnBatchLock) {
|
||||
abortTxn();
|
||||
nextTxn(true); // roll to next
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Aborts current Txn in the txnBatch.
|
||||
*/
|
||||
protected void abortTxn() throws InterruptedException {
|
||||
LOG.info("Aborting Txn id {} on End Point {}", txnBatch.getCurrentTxnId(), endPoint);
|
||||
try {
|
||||
callWithTimeout(new CallRunner<Void>() {
|
||||
@Override
|
||||
public Void call() throws StreamingException, InterruptedException {
|
||||
txnBatch.abort(); // could block
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (InterruptedException e) {
|
||||
throw e;
|
||||
} catch (TimeoutException e) {
|
||||
LOG.warn("Timeout while aborting Txn " + txnBatch.getCurrentTxnId() + " on EndPoint: " + endPoint, e);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Error aborting Txn " + txnBatch.getCurrentTxnId() + " on EndPoint: " + endPoint, e);
|
||||
// Suppressing exceptions as we don't care for errors on abort
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* if there are remainingTransactions in current txnBatch, begins nextTransactions
|
||||
* otherwise creates new txnBatch.
|
||||
* @param rollToNext Whether to roll to the next transaction batch
|
||||
*/
|
||||
protected void nextTxn(boolean rollToNext) throws StreamingException, InterruptedException, TxnBatchFailure {
|
||||
if (txnBatch.remainingTransactions() == 0) {
|
||||
closeTxnBatch();
|
||||
txnBatch = null;
|
||||
if (rollToNext) {
|
||||
txnBatch = nextTxnBatch(recordWriter);
|
||||
}
|
||||
} else if (rollToNext) {
|
||||
LOG.debug("Switching to next Txn for {}", endPoint);
|
||||
txnBatch.beginNextTransaction(); // does not block
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the current thread has been interrupted, then throws an
|
||||
* exception.
|
||||
* @throws InterruptedException uf the current thread has been interrupted
|
||||
*/
|
||||
protected static void checkAndThrowInterruptedException()
|
||||
throws InterruptedException {
|
||||
if (Thread.currentThread().interrupted()) {
|
||||
throw new InterruptedException("Timed out before Hive call was made. "
|
||||
+ "Your callTimeout might be set too low or Hive calls are "
|
||||
+ "taking too long.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the callable on a separate thread and wait for the completion
|
||||
* for the specified amount of time in milliseconds. In case of timeout
|
||||
* cancel the callable and throw an IOException
|
||||
*/
|
||||
private <T> T callWithTimeout(final CallRunner<T> callRunner)
|
||||
throws TimeoutException, StreamingException, InterruptedException {
|
||||
Future<T> future = callTimeoutPool.submit(() -> {
|
||||
if (ugi == null) {
|
||||
return callRunner.call();
|
||||
}
|
||||
try {
|
||||
return ugi.doAs((PrivilegedExceptionAction<T>) () -> callRunner.call());
|
||||
} catch (UndeclaredThrowableException e) {
|
||||
Throwable cause = e.getCause();
|
||||
// Unwrap exception so it is thrown the same way as without ugi
|
||||
if (!(cause instanceof Exception)) {
|
||||
throw e;
|
||||
}
|
||||
throw (Exception)cause;
|
||||
}
|
||||
});
|
||||
try {
|
||||
if (callTimeout > 0) {
|
||||
return future.get(callTimeout, TimeUnit.MILLISECONDS);
|
||||
} else {
|
||||
return future.get();
|
||||
}
|
||||
} catch (TimeoutException eT) {
|
||||
future.cancel(true);
|
||||
throw eT;
|
||||
} catch (ExecutionException e1) {
|
||||
Throwable cause = e1.getCause();
|
||||
if (cause instanceof IOException) {
|
||||
throw new StreamingIOFailure("I/O Failure", (IOException) cause);
|
||||
} else if (cause instanceof StreamingException) {
|
||||
throw (StreamingException) cause;
|
||||
} else if (cause instanceof InterruptedException) {
|
||||
throw (InterruptedException) cause;
|
||||
} else if (cause instanceof RuntimeException) {
|
||||
throw (RuntimeException) cause;
|
||||
} else if (cause instanceof TimeoutException) {
|
||||
throw new StreamingException("Operation Timed Out.", (TimeoutException) cause);
|
||||
} else {
|
||||
throw new RuntimeException(e1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public long getLastUsed() {
|
||||
return lastUsed;
|
||||
}
|
||||
|
||||
private byte[] generateRecord(List<String> tuple) {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
for (String o : tuple) {
|
||||
buf.append(o);
|
||||
buf.append(",");
|
||||
}
|
||||
return buf.toString().getBytes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple interface whose <tt>call</tt> method is called by
|
||||
* {#callWithTimeout} in a new thread inside a
|
||||
* {@linkplain java.security.PrivilegedExceptionAction#run()} call.
|
||||
* @param <T> the type of object returned from the call
|
||||
*/
|
||||
private interface CallRunner<T> {
|
||||
T call() throws Exception;
|
||||
}
|
||||
|
||||
public static class Failure extends Exception {
|
||||
public Failure(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
|
||||
public static class WriteFailure extends Failure {
|
||||
public WriteFailure(HiveEndPoint endPoint, Long currentTxnId, Throwable cause) {
|
||||
super("Failed writing to : " + endPoint + ". TxnID : " + currentTxnId, cause);
|
||||
}
|
||||
}
|
||||
|
||||
public static class CommitFailure extends Failure {
|
||||
public CommitFailure(HiveEndPoint endPoint, Long txnID, Throwable cause) {
|
||||
super("Commit of Txn " + txnID + " failed on EndPoint: " + endPoint, cause);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ConnectFailure extends Failure {
|
||||
public ConnectFailure(HiveEndPoint ep, Throwable cause) {
|
||||
super("Failed connecting to EndPoint " + ep, cause);
|
||||
}
|
||||
}
|
||||
|
||||
public static class TxnBatchFailure extends Failure {
|
||||
public TxnBatchFailure(HiveEndPoint ep, Throwable cause) {
|
||||
super("Failed acquiring Transaction Batch from EndPoint: " + ep, cause);
|
||||
}
|
||||
}
|
||||
|
||||
public static class TxnFailure extends Failure {
|
||||
public TxnFailure(TransactionBatch txnBatch, Throwable cause) {
|
||||
super("Failed switching to next Txn in TxnBatch " + txnBatch, cause);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* A helper class for maintaining loaded configurations (to avoid reloading on use unless necessary)
|
||||
*/
|
||||
public class ValidationResources {
|
||||
|
||||
private final String configResources;
|
||||
private final Configuration configuration;
|
||||
|
||||
public ValidationResources(String configResources, Configuration configuration) {
|
||||
this.configResources = configResources;
|
||||
this.configuration = configuration;
|
||||
}
|
||||
|
||||
public String getConfigResources() {
|
||||
return configResources;
|
||||
}
|
||||
|
||||
public Configuration getConfiguration() {
|
||||
return configuration;
|
||||
}
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.dbcp.hive.HiveConnectionPool
|
|
@ -1,19 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.processors.hive.ConvertAvroToORC
|
||||
org.apache.nifi.processors.hive.SelectHiveQL
|
||||
org.apache.nifi.processors.hive.PutHiveQL
|
||||
org.apache.nifi.processors.hive.PutHiveStreaming
|
||||
org.apache.nifi.processors.hive.UpdateHiveTable
|
|
@ -1,201 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.dbcp.hive;
|
||||
|
||||
import org.apache.commons.dbcp2.BasicDataSource;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.hadoop.KerberosProperties;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.registry.VariableDescriptor;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.util.MockConfigurationContext;
|
||||
import org.apache.nifi.util.MockVariableRegistry;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.UndeclaredThrowableException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.ArgumentMatchers.isA;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
public class HiveConnectionPoolTest {
|
||||
private UserGroupInformation userGroupInformation;
|
||||
private HiveConnectionPool hiveConnectionPool;
|
||||
private BasicDataSource basicDataSource;
|
||||
private ComponentLog componentLog;
|
||||
private KerberosProperties kerberosProperties;
|
||||
private File krb5conf = new File("src/test/resources/krb5.conf");
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws Exception {
|
||||
// have to initialize this system property before anything else
|
||||
System.setProperty("java.security.krb5.conf", krb5conf.getAbsolutePath());
|
||||
System.setProperty("java.security.krb5.realm", "nifi.com");
|
||||
System.setProperty("java.security.krb5.kdc", "nifi.kdc");
|
||||
|
||||
userGroupInformation = mock(UserGroupInformation.class);
|
||||
basicDataSource = mock(BasicDataSource.class);
|
||||
componentLog = mock(ComponentLog.class);
|
||||
kerberosProperties = mock(KerberosProperties.class);
|
||||
|
||||
when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> {
|
||||
try {
|
||||
return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run();
|
||||
} catch (IOException | Error | RuntimeException | InterruptedException e) {
|
||||
throw e;
|
||||
} catch (Throwable e) {
|
||||
throw new UndeclaredThrowableException(e);
|
||||
}
|
||||
});
|
||||
|
||||
when(kerberosProperties.getKerberosKeytab()).thenReturn(new PropertyDescriptor.Builder()
|
||||
.name("Kerberos Principal")
|
||||
.addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build());
|
||||
|
||||
when(kerberosProperties.getKerberosPrincipal()).thenReturn(new PropertyDescriptor.Builder()
|
||||
.name("Kerberos Keytab")
|
||||
.addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build());
|
||||
|
||||
initPool();
|
||||
}
|
||||
|
||||
private void initPool() throws Exception {
|
||||
hiveConnectionPool = new HiveConnectionPool();
|
||||
|
||||
Field ugiField = HiveConnectionPool.class.getDeclaredField("ugi");
|
||||
ugiField.setAccessible(true);
|
||||
ugiField.set(hiveConnectionPool, userGroupInformation);
|
||||
|
||||
Field dataSourceField = HiveConnectionPool.class.getDeclaredField("dataSource");
|
||||
dataSourceField.setAccessible(true);
|
||||
dataSourceField.set(hiveConnectionPool, basicDataSource);
|
||||
|
||||
Field componentLogField = AbstractControllerService.class.getDeclaredField("logger");
|
||||
componentLogField.setAccessible(true);
|
||||
componentLogField.set(hiveConnectionPool, componentLog);
|
||||
|
||||
Field kerberosPropertiesField = HiveConnectionPool.class.getDeclaredField("kerberosProperties");
|
||||
kerberosPropertiesField.setAccessible(true);
|
||||
kerberosPropertiesField.set(hiveConnectionPool, kerberosProperties);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetConnectionSqlException() throws SQLException {
|
||||
SQLException sqlException = new SQLException("bad sql");
|
||||
when(basicDataSource.getConnection()).thenThrow(sqlException);
|
||||
|
||||
ProcessException e = assertThrows(ProcessException.class, () -> hiveConnectionPool.getConnection());
|
||||
assertEquals(sqlException, e.getCause());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExpressionLanguageSupport() throws Exception {
|
||||
final String URL = "jdbc:hive2://localhost:10000/default";
|
||||
final String USER = "user";
|
||||
final String PASS = "pass";
|
||||
final int MAX_CONN = 7;
|
||||
final String MAX_CONN_LIFETIME = "1 sec";
|
||||
final String MAX_WAIT = "10 sec"; // 10000 milliseconds
|
||||
final String CONF = "/path/to/hive-site.xml";
|
||||
hiveConnectionPool = new HiveConnectionPool();
|
||||
|
||||
Map<PropertyDescriptor, String> props = new HashMap<PropertyDescriptor, String>() {{
|
||||
put(HiveConnectionPool.DATABASE_URL, "${url}");
|
||||
put(HiveConnectionPool.DB_USER, "${username}");
|
||||
put(HiveConnectionPool.DB_PASSWORD, "${password}");
|
||||
put(HiveConnectionPool.MAX_TOTAL_CONNECTIONS, "${maxconn}");
|
||||
put(HiveConnectionPool.MAX_CONN_LIFETIME, "${maxconnlifetime}");
|
||||
put(HiveConnectionPool.MAX_WAIT_TIME, "${maxwait}");
|
||||
put(HiveConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${hiveconf}");
|
||||
}};
|
||||
|
||||
MockVariableRegistry registry = new MockVariableRegistry();
|
||||
registry.setVariable(new VariableDescriptor("url"), URL);
|
||||
registry.setVariable(new VariableDescriptor("username"), USER);
|
||||
registry.setVariable(new VariableDescriptor("password"), PASS);
|
||||
registry.setVariable(new VariableDescriptor("maxconn"), Integer.toString(MAX_CONN));
|
||||
registry.setVariable(new VariableDescriptor("maxconnlifetime"), MAX_CONN_LIFETIME);
|
||||
registry.setVariable(new VariableDescriptor("maxwait"), MAX_WAIT);
|
||||
registry.setVariable(new VariableDescriptor("hiveconf"), CONF);
|
||||
|
||||
|
||||
MockConfigurationContext context = new MockConfigurationContext(props, null, registry);
|
||||
hiveConnectionPool.onConfigured(context);
|
||||
|
||||
Field dataSourceField = HiveConnectionPool.class.getDeclaredField("dataSource");
|
||||
dataSourceField.setAccessible(true);
|
||||
basicDataSource = (BasicDataSource) dataSourceField.get(hiveConnectionPool);
|
||||
assertEquals(URL, basicDataSource.getUrl());
|
||||
assertEquals(USER, basicDataSource.getUsername());
|
||||
assertEquals(PASS, basicDataSource.getPassword());
|
||||
assertEquals(MAX_CONN, basicDataSource.getMaxTotal());
|
||||
assertEquals(1000L, basicDataSource.getMaxConnLifetimeMillis());
|
||||
assertEquals(10000L, basicDataSource.getMaxWaitMillis());
|
||||
assertEquals(URL, hiveConnectionPool.getConnectionURL());
|
||||
}
|
||||
|
||||
@EnabledIfSystemProperty(
|
||||
named = "nifi.test.unstable",
|
||||
matches = "true",
|
||||
disabledReason = "Kerberos does not seem to be properly handled in Travis build, but, locally, this test should successfully run")
|
||||
@Test
|
||||
public void testKerberosAuthException() {
|
||||
final String URL = "jdbc:hive2://localhost:10000/default";
|
||||
final String conf = "src/test/resources/hive-site-security.xml";
|
||||
final String ktab = "src/test/resources/fake.keytab";
|
||||
final String kprinc = "bad@PRINCIPAL.COM";
|
||||
|
||||
KerberosProperties kerbProperties = new KerberosProperties(krb5conf);
|
||||
|
||||
Map<PropertyDescriptor, String> props = new HashMap<PropertyDescriptor, String>() {{
|
||||
put(HiveConnectionPool.DATABASE_URL, "${url}");
|
||||
put(HiveConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${conf}");
|
||||
put(kerbProperties.getKerberosKeytab(), "${ktab}");
|
||||
put(kerbProperties.getKerberosPrincipal(), "${kprinc}");
|
||||
}};
|
||||
|
||||
MockVariableRegistry registry = new MockVariableRegistry();
|
||||
registry.setVariable(new VariableDescriptor("url"), URL);
|
||||
registry.setVariable(new VariableDescriptor("conf"), conf);
|
||||
registry.setVariable(new VariableDescriptor("ktab"), ktab);
|
||||
registry.setVariable(new VariableDescriptor("kprinc"), kprinc);
|
||||
|
||||
MockConfigurationContext context = new MockConfigurationContext(props, null, registry);
|
||||
assertThrows(InitializationException.class, () -> hiveConnectionPool.onConfigured(context));
|
||||
}
|
||||
}
|
|
@ -1,568 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.file.DataFileStream;
|
||||
import org.apache.avro.file.DataFileWriter;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.io.DatumReader;
|
||||
import org.apache.avro.io.DatumWriter;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
|
||||
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
|
||||
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
|
||||
import org.apache.hadoop.hive.ql.io.orc.Reader;
|
||||
import org.apache.hadoop.hive.ql.io.orc.RecordReader;
|
||||
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
|
||||
import org.apache.hadoop.io.DoubleWritable;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.apache.nifi.util.orc.TestNiFiOrcUtils;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
|
||||
/**
|
||||
* Unit tests for ConvertAvroToORC processor
|
||||
*/
|
||||
public class TestConvertAvroToORC {
|
||||
|
||||
private ConvertAvroToORC processor;
|
||||
private TestRunner runner;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
processor = new ConvertAvroToORC();
|
||||
runner = TestRunners.newTestRunner(processor);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_routing_to_failure_null_type() throws Exception {
|
||||
String testString = "Hello World";
|
||||
GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithNull(testString);
|
||||
|
||||
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
|
||||
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
fileWriter.create(record.getSchema(), out);
|
||||
fileWriter.append(record);
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
out.close();
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test.avro");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
|
||||
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, null BOOLEAN) STORED AS ORC",
|
||||
resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_routing_to_failure_empty_array_type() throws Exception {
|
||||
String testString = "Hello World";
|
||||
GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithEmptyArray(testString);
|
||||
|
||||
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
|
||||
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
fileWriter.create(record.getSchema(), out);
|
||||
fileWriter.append(record);
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
out.close();
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test.avro");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
|
||||
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (string STRING, emptyArray ARRAY<BOOLEAN>) STORED AS ORC",
|
||||
resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_routing_to_failure_fixed_type() throws Exception {
|
||||
String testString = "Hello!";
|
||||
GenericData.Record record = TestNiFiOrcUtils.buildAvroRecordWithFixed(testString);
|
||||
|
||||
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
|
||||
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
fileWriter.create(record.getSchema(), out);
|
||||
fileWriter.append(record);
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
out.close();
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test.avro");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_FAILURE, 1);
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_FAILURE).get(0);
|
||||
assertEquals("test.avro", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
|
||||
final InputStream in = new ByteArrayInputStream(resultFlowFile.toByteArray());
|
||||
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
|
||||
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
|
||||
assertTrue(dataFileReader.hasNext());
|
||||
GenericRecord testedRecord = dataFileReader.next();
|
||||
|
||||
assertNotNull(testedRecord.get("fixed"));
|
||||
assertArrayEquals(testString.getBytes(StandardCharsets.UTF_8), ((GenericData.Fixed) testedRecord.get("fixed")).bytes());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_primitive_record() throws Exception {
|
||||
GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World");
|
||||
|
||||
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
|
||||
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
fileWriter.create(record.getSchema(), out);
|
||||
fileWriter.append(record);
|
||||
// Put another record in
|
||||
record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record");
|
||||
fileWriter.append(record);
|
||||
// And one more
|
||||
record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!");
|
||||
fileWriter.append(record);
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
out.close();
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test.avro");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
|
||||
|
||||
// Write the flow file out to disk, since the ORC Reader needs a path
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)"
|
||||
+ " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
|
||||
assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
|
||||
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
|
||||
FileOutputStream fos = new FileOutputStream("target/test1.orc");
|
||||
fos.write(resultContents);
|
||||
fos.flush();
|
||||
fos.close();
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.getLocal(conf);
|
||||
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
|
||||
RecordReader rows = reader.rows();
|
||||
Object o = rows.next(null);
|
||||
assertNotNull(o);
|
||||
assertTrue(o instanceof OrcStruct);
|
||||
TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema();
|
||||
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
|
||||
|
||||
// Check some fields in the first row
|
||||
Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int"));
|
||||
assertTrue(intFieldObject instanceof IntWritable);
|
||||
assertEquals(10, ((IntWritable) intFieldObject).get());
|
||||
Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string"));
|
||||
assertTrue(stringFieldObject instanceof Text);
|
||||
assertEquals("World", stringFieldObject.toString());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_complex_record() throws Exception {
|
||||
|
||||
Map<String, Double> mapData1 = new TreeMap<String, Double>() {{
|
||||
put("key1", 1.0);
|
||||
put("key2", 2.0);
|
||||
}};
|
||||
|
||||
BigDecimal sampleBigDecimal = new BigDecimal("12.34");
|
||||
ByteBuffer bigDecimalAsBytes = ByteBuffer.wrap(sampleBigDecimal.unscaledValue().toByteArray());
|
||||
GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20), bigDecimalAsBytes);
|
||||
|
||||
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
|
||||
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
fileWriter.create(record.getSchema(), out);
|
||||
fileWriter.append(record);
|
||||
|
||||
// Put another record in
|
||||
Map<String, Double> mapData2 = new TreeMap<String, Double>() {{
|
||||
put("key1", 3.0);
|
||||
put("key2", 4.0);
|
||||
}};
|
||||
|
||||
record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200), bigDecimalAsBytes);
|
||||
fileWriter.append(record);
|
||||
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
out.close();
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
|
||||
|
||||
// Write the flow file out to disk, since the ORC Reader needs a path
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " +
|
||||
"(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>, myDecimal DECIMAL(10,2))"
|
||||
+ " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
|
||||
assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
|
||||
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
|
||||
FileOutputStream fos = new FileOutputStream("target/test1.orc");
|
||||
fos.write(resultContents);
|
||||
fos.flush();
|
||||
fos.close();
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.getLocal(conf);
|
||||
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
|
||||
RecordReader rows = reader.rows();
|
||||
Object o = rows.next(null);
|
||||
assertNotNull(o);
|
||||
assertTrue(o instanceof OrcStruct);
|
||||
TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema();
|
||||
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
|
||||
|
||||
// Check some fields in the first row
|
||||
Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt"));
|
||||
assertTrue(intFieldObject instanceof IntWritable);
|
||||
assertEquals(10, ((IntWritable) intFieldObject).get());
|
||||
|
||||
Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap"));
|
||||
assertTrue(mapFieldObject instanceof Map);
|
||||
Map map = (Map) mapFieldObject;
|
||||
Object mapValue = map.get(new Text("key1"));
|
||||
assertNotNull(mapValue);
|
||||
assertTrue(mapValue instanceof DoubleWritable);
|
||||
assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
|
||||
|
||||
mapValue = map.get(new Text("key2"));
|
||||
assertNotNull(mapValue);
|
||||
assertTrue(mapValue instanceof DoubleWritable);
|
||||
assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
|
||||
|
||||
Object decimalFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myDecimal"));
|
||||
assertTrue(decimalFieldObject instanceof HiveDecimalWritable);
|
||||
assertEquals(sampleBigDecimal, ((HiveDecimalWritable) decimalFieldObject).getHiveDecimal().bigDecimalValue());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_complex_records_with_bigdecimals() throws Exception {
|
||||
|
||||
Map<String, Double> mapData1 = new TreeMap<String, Double>() {{
|
||||
put("key1", 1.0);
|
||||
put("key2", 2.0);
|
||||
}};
|
||||
|
||||
|
||||
BigDecimal sampleBigDecimal1 = new BigDecimal("3500.12");
|
||||
BigDecimal sampleBigDecimal2 = new BigDecimal("0.01");
|
||||
|
||||
GenericData.Record record1 = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData1, "XYZ", 4L, Arrays.asList(100, 200), toByteBuffer(sampleBigDecimal1));
|
||||
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record1.getSchema());
|
||||
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
fileWriter.create(record1.getSchema(), out);
|
||||
fileWriter.append(record1);
|
||||
fileWriter.append(TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData1, "XYZ", 4L, Arrays.asList(100, 200), toByteBuffer(sampleBigDecimal2)));
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
out.close();
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
|
||||
|
||||
// Write the flow file out to disk, since the ORC Reader needs a path
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
|
||||
assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
|
||||
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
|
||||
FileOutputStream fos = new FileOutputStream("target/test1.orc");
|
||||
fos.write(resultContents);
|
||||
fos.flush();
|
||||
fos.close();
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.getLocal(conf);
|
||||
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
|
||||
RecordReader rows = reader.rows();
|
||||
TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema();
|
||||
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
|
||||
|
||||
Object result1 = rows.next(null);
|
||||
assertNotNull(result1);
|
||||
Object decimalFieldObject1 = inspector.getStructFieldData(result1, inspector.getStructFieldRef("myDecimal"));
|
||||
assertEquals(sampleBigDecimal1, ((HiveDecimalWritable) decimalFieldObject1).getHiveDecimal().bigDecimalValue());
|
||||
|
||||
Object result2 = rows.next(null);
|
||||
assertNotNull(result2);
|
||||
Object decimalFieldObject2 = inspector.getStructFieldData(result2, inspector.getStructFieldRef("myDecimal"));
|
||||
assertEquals(sampleBigDecimal2, ((HiveDecimalWritable) decimalFieldObject2).getHiveDecimal().bigDecimalValue());
|
||||
}
|
||||
|
||||
private ByteBuffer toByteBuffer(BigDecimal sampleBigDecimal) {
|
||||
return ByteBuffer.wrap(sampleBigDecimal.unscaledValue().toByteArray());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_array_of_records() throws Exception {
|
||||
final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
|
||||
List<GenericRecord> innerRecords = new LinkedList<>();
|
||||
|
||||
final GenericRecord outerRecord = new GenericData.Record(schema);
|
||||
|
||||
Schema arraySchema = schema.getField("records").schema();
|
||||
Schema innerRecordSchema = arraySchema.getElementType();
|
||||
final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
|
||||
innerRecord1.put("name", "Joe");
|
||||
innerRecord1.put("age", 42);
|
||||
|
||||
innerRecords.add(innerRecord1);
|
||||
|
||||
final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
|
||||
innerRecord2.put("name", "Mary");
|
||||
innerRecord2.put("age", 28);
|
||||
|
||||
innerRecords.add(innerRecord2);
|
||||
|
||||
GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
|
||||
outerRecord.put("records", array);
|
||||
|
||||
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
|
||||
dataFileWriter.create(schema, out);
|
||||
dataFileWriter.append(outerRecord);
|
||||
}
|
||||
out.close();
|
||||
|
||||
// Build a flow file from the Avro record
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
|
||||
|
||||
// Write the flow file out to disk, since the ORC Reader needs a path
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " +
|
||||
"(records ARRAY<STRUCT<name:STRING, age:INT>>)"
|
||||
+ " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
|
||||
assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
|
||||
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
|
||||
FileOutputStream fos = new FileOutputStream("target/test1.orc");
|
||||
fos.write(resultContents);
|
||||
fos.flush();
|
||||
fos.close();
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.getLocal(conf);
|
||||
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
|
||||
RecordReader rows = reader.rows();
|
||||
Object o = rows.next(null);
|
||||
assertNotNull(o);
|
||||
assertTrue(o instanceof OrcStruct);
|
||||
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));
|
||||
|
||||
// Verify the record contains an array
|
||||
Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
|
||||
assertTrue(arrayFieldObject instanceof ArrayList);
|
||||
ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
|
||||
assertEquals(2, arrayField.size());
|
||||
|
||||
// Verify the first element. Should be a record with two fields "name" and "age"
|
||||
Object element = arrayField.get(0);
|
||||
assertTrue(element instanceof OrcStruct);
|
||||
StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
|
||||
Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
|
||||
assertTrue(nameObject instanceof Text);
|
||||
assertEquals("Joe", nameObject.toString());
|
||||
Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
|
||||
assertTrue(ageObject instanceof IntWritable);
|
||||
assertEquals(42, ((IntWritable) ageObject).get());
|
||||
|
||||
// Verify the first element. Should be a record with two fields "name" and "age"
|
||||
element = arrayField.get(1);
|
||||
assertTrue(element instanceof OrcStruct);
|
||||
nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
|
||||
assertTrue(nameObject instanceof Text);
|
||||
assertEquals("Mary", nameObject.toString());
|
||||
ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
|
||||
assertTrue(ageObject instanceof IntWritable);
|
||||
assertEquals(28, ((IntWritable) ageObject).get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_onTrigger_nested_complex_record() throws Exception {
|
||||
|
||||
Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {{
|
||||
put("key1", Arrays.asList(1.0, 2.0));
|
||||
put("key2", Arrays.asList(3.0, 4.0));
|
||||
}};
|
||||
|
||||
Map<String, String> arrayMap11 = new TreeMap<String, String>() {{
|
||||
put("key1", "v1");
|
||||
put("key2", "v2");
|
||||
}};
|
||||
Map<String, String> arrayMap12 = new TreeMap<String, String>() {{
|
||||
put("key3", "v3");
|
||||
put("key4", "v4");
|
||||
}};
|
||||
|
||||
GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12));
|
||||
|
||||
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
|
||||
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
fileWriter.create(record.getSchema(), out);
|
||||
fileWriter.append(record);
|
||||
|
||||
// Put another record in
|
||||
Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {{
|
||||
put("key1", Arrays.asList(-1.0, -2.0));
|
||||
put("key2", Arrays.asList(-3.0, -4.0));
|
||||
}};
|
||||
|
||||
Map<String, String> arrayMap21 = new TreeMap<String, String>() {{
|
||||
put("key1", "v-1");
|
||||
put("key2", "v-2");
|
||||
}};
|
||||
Map<String, String> arrayMap22 = new TreeMap<String, String>() {{
|
||||
put("key3", "v-3");
|
||||
put("key4", "v-4");
|
||||
}};
|
||||
|
||||
record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
|
||||
fileWriter.append(record);
|
||||
|
||||
fileWriter.flush();
|
||||
fileWriter.close();
|
||||
out.close();
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>() {{
|
||||
put(CoreAttributes.FILENAME.key(), "test");
|
||||
}};
|
||||
runner.enqueue(out.toByteArray(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
|
||||
|
||||
// Write the flow file out to disk, since the ORC Reader needs a path
|
||||
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " +
|
||||
"(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)"
|
||||
+ " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
|
||||
assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
|
||||
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
|
||||
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
|
||||
FileOutputStream fos = new FileOutputStream("target/test1.orc");
|
||||
fos.write(resultContents);
|
||||
fos.flush();
|
||||
fos.close();
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.getLocal(conf);
|
||||
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
|
||||
RecordReader rows = reader.rows();
|
||||
Object o = rows.next(null);
|
||||
assertNotNull(o);
|
||||
assertTrue(o instanceof OrcStruct);
|
||||
TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
|
||||
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
|
||||
|
||||
|
||||
// check values
|
||||
Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
|
||||
assertTrue(myMapOfArray instanceof Map);
|
||||
Map map = (Map) myMapOfArray;
|
||||
Object mapValue = map.get(new Text("key1"));
|
||||
assertNotNull(mapValue);
|
||||
assertTrue(mapValue instanceof List);
|
||||
assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);
|
||||
|
||||
Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
|
||||
assertTrue(myArrayOfMap instanceof List);
|
||||
List list = (List) myArrayOfMap;
|
||||
Object el0 = list.get(0);
|
||||
assertNotNull(el0);
|
||||
assertTrue(el0 instanceof Map);
|
||||
assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
|
||||
}
|
||||
}
|
|
@ -1,292 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.util.MockProcessContext;
|
||||
import org.apache.nifi.util.MockProcessorInitializationContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestHiveParser extends AbstractHiveQLProcessor {
|
||||
|
||||
@BeforeEach
|
||||
public void initialize() {
|
||||
final MockProcessContext processContext = new MockProcessContext(this);
|
||||
final ProcessorInitializationContext initializationContext = new MockProcessorInitializationContext(this, processContext);
|
||||
initialize(initializationContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSelect() {
|
||||
String query = "select a.empid, to_something(b.saraly) from " +
|
||||
"company.emp a inner join default.salary b where a.empid = b.empid";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(2, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName("company", "emp", true)));
|
||||
assertTrue(tableNames.contains(new TableName("default", "salary", true)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSelectPrepared() {
|
||||
String query = "select empid from company.emp a where a.firstName = ?";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName("company", "emp", true)));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void parseLongSelect() {
|
||||
String query = "select\n" +
|
||||
"\n" +
|
||||
" i_item_id,\n" +
|
||||
"\n" +
|
||||
" i_item_desc,\n" +
|
||||
"\n" +
|
||||
" s_state,\n" +
|
||||
"\n" +
|
||||
" count(ss_quantity) as store_sales_quantitycount,\n" +
|
||||
"\n" +
|
||||
" avg(ss_quantity) as store_sales_quantityave,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(ss_quantity) as store_sales_quantitystdev,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov,\n" +
|
||||
"\n" +
|
||||
" count(sr_return_quantity) as store_returns_quantitycount,\n" +
|
||||
"\n" +
|
||||
" avg(sr_return_quantity) as store_returns_quantityave,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(sr_return_quantity) as store_returns_quantitystdev,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov,\n" +
|
||||
"\n" +
|
||||
" count(cs_quantity) as catalog_sales_quantitycount,\n" +
|
||||
"\n" +
|
||||
" avg(cs_quantity) as catalog_sales_quantityave,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitystdev,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov\n" +
|
||||
"\n" +
|
||||
"from\n" +
|
||||
"\n" +
|
||||
" store_sales,\n" +
|
||||
"\n" +
|
||||
" store_returns,\n" +
|
||||
"\n" +
|
||||
" catalog_sales,\n" +
|
||||
"\n" +
|
||||
" date_dim d1,\n" +
|
||||
"\n" +
|
||||
" date_dim d2,\n" +
|
||||
"\n" +
|
||||
" date_dim d3,\n" +
|
||||
"\n" +
|
||||
" store,\n" +
|
||||
"\n" +
|
||||
" item\n" +
|
||||
"\n" +
|
||||
"where\n" +
|
||||
"\n" +
|
||||
" d1.d_quarter_name = '2000Q1'\n" +
|
||||
"\n" +
|
||||
" and d1.d_date_sk = ss_sold_date_sk\n" +
|
||||
"\n" +
|
||||
" and i_item_sk = ss_item_sk\n" +
|
||||
"\n" +
|
||||
" and s_store_sk = ss_store_sk\n" +
|
||||
"\n" +
|
||||
" and ss_customer_sk = sr_customer_sk\n" +
|
||||
"\n" +
|
||||
" and ss_item_sk = sr_item_sk\n" +
|
||||
"\n" +
|
||||
" and ss_ticket_number = sr_ticket_number\n" +
|
||||
"\n" +
|
||||
" and sr_returned_date_sk = d2.d_date_sk\n" +
|
||||
"\n" +
|
||||
" and d2.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
|
||||
"\n" +
|
||||
" and sr_customer_sk = cs_bill_customer_sk\n" +
|
||||
"\n" +
|
||||
" and sr_item_sk = cs_item_sk\n" +
|
||||
"\n" +
|
||||
" and cs_sold_date_sk = d3.d_date_sk\n" +
|
||||
"\n" +
|
||||
" and d3.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
|
||||
"\n" +
|
||||
"group by i_item_id , i_item_desc , s_state\n" +
|
||||
"\n" +
|
||||
"order by i_item_id , i_item_desc , s_state\n" +
|
||||
"\n" +
|
||||
"limit 100";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(6, tableNames.size());
|
||||
AtomicInteger cnt = new AtomicInteger(0);
|
||||
for (TableName tableName : tableNames) {
|
||||
if (tableName.equals(new TableName(null, "store_sales", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "store_returns", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "catalog_sales", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "date_dim", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "store", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "item", true))) {
|
||||
cnt.incrementAndGet();
|
||||
}
|
||||
}
|
||||
assertEquals(6, cnt.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSelectInsert() {
|
||||
String query = "insert into databaseA.tableA select key, max(value) from databaseA.tableA where category = 'x'";
|
||||
|
||||
// The same database.tableName can appear two times for input and output.
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(2, tableNames.size());
|
||||
AtomicInteger cnt = new AtomicInteger(0);
|
||||
tableNames.forEach(tableName -> {
|
||||
if (tableName.equals(new TableName("databaseA", "tableA", false))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName("databaseA", "tableA", true))) {
|
||||
cnt.incrementAndGet();
|
||||
}
|
||||
});
|
||||
assertEquals(2, cnt.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseInsert() {
|
||||
String query = "insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(3, tableNames.size());
|
||||
AtomicInteger cnt = new AtomicInteger(0);
|
||||
tableNames.forEach(tableName -> {
|
||||
if (tableName.equals(new TableName("databaseB", "tableB1", false))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "tableA1", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "tableA2", true))) {
|
||||
cnt.incrementAndGet();
|
||||
}
|
||||
});
|
||||
assertEquals(3, cnt.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseUpdate() {
|
||||
String query = "update table_a set y = 'updated' where x > 100";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseDelete() {
|
||||
String query = "delete from table_a where x > 100";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseDDL() {
|
||||
String query = "CREATE TABLE IF NOT EXISTS EMPLOYEES(\n" +
|
||||
"EmployeeID INT,FirstName STRING, Title STRING,\n" +
|
||||
"State STRING, Laptop STRING)\n" +
|
||||
"COMMENT 'Employee Names'\n" +
|
||||
"STORED AS ORC";
|
||||
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "EMPLOYEES", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSetProperty() {
|
||||
String query = " set 'hive.exec.dynamic.partition.mode'=nonstrict";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSetRole() {
|
||||
String query = "set role all";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseShowRoles() {
|
||||
String query = "show roles";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseMsck() {
|
||||
String query = "msck repair table table_a";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseAddJar() {
|
||||
String query = "ADD JAR hdfs:///tmp/my_jar.jar";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,846 +0,0 @@
|
|||
package org.apache.nifi.processors.hive;/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.dbcp.DBCPService;
|
||||
import org.apache.nifi.dbcp.hive.HiveDBCPService;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.DisabledOnOs;
|
||||
import org.junit.jupiter.api.condition.OS;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.sql.Types;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
@DisabledOnOs(OS.WINDOWS)
|
||||
public class TestPutHiveQL {
|
||||
private static final String createPersons = "CREATE TABLE PERSONS (id integer primary key, name varchar(100), code integer)";
|
||||
private static final String createPersonsAutoId = "CREATE TABLE PERSONS (id INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1), name VARCHAR(100), code INTEGER check(code <= 100))";
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
System.setProperty("derby.stream.error.file", "target/derby.log");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDirectStatements(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', 84)".getBytes());
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
|
||||
runner.enqueue("UPDATE PERSONS SET NAME='George' WHERE ID=1".getBytes());
|
||||
runner.run();
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("George", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadStatement(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1);
|
||||
runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3);
|
||||
runner.getFlowFilesForRelationship(PutHiveQL.REL_SUCCESS)
|
||||
.forEach(f -> f.assertAttributeEquals(PutHiveQL.ATTR_OUTPUT_TABLES, "PERSONS"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadStatementRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||
runner.run();
|
||||
|
||||
// The 1st one should be routed to success, others should stay in queue.
|
||||
assertEquals(3, runner.getQueueSize().getObjectCount());
|
||||
runner.assertTransferCount(PutHiveQL.REL_FAILURE, 0);
|
||||
runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailAtBeginning(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1);
|
||||
runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailAtBeginningRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||
|
||||
AssertionError e = assertThrows(AssertionError.class, () -> runner.run());
|
||||
assertTrue(e.getCause() instanceof ProcessException);
|
||||
|
||||
assertEquals(3, runner.getQueueSize().getObjectCount());
|
||||
runner.assertTransferCount(PutHiveQL.REL_FAILURE, 0);
|
||||
runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadParameterType(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final Map<String, String> goodAttributes = new HashMap<>();
|
||||
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
goodAttributes.put("hiveql.args.1.value", "84");
|
||||
|
||||
final Map<String, String> badAttributes = new HashMap<>();
|
||||
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
|
||||
badAttributes.put("hiveql.args.1.value", "hello");
|
||||
|
||||
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, badAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1);
|
||||
runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadParameterValue(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final Map<String, String> goodAttributes = new HashMap<>();
|
||||
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
goodAttributes.put("hiveql.args.1.value", "84");
|
||||
|
||||
final Map<String, String> badAttributes = new HashMap<>();
|
||||
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
badAttributes.put("hiveql.args.1.value", "101"); // Constraint violation, up to 100
|
||||
|
||||
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, badAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3);
|
||||
runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertTrue(rs.next());
|
||||
assertTrue(rs.next());
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadNumberFormat(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final Map<String, String> goodAttributes = new HashMap<>();
|
||||
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
goodAttributes.put("hiveql.args.1.value", "84");
|
||||
|
||||
final Map<String, String> badAttributes = new HashMap<>();
|
||||
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
badAttributes.put("hiveql.args.1.value", "NOT_NUMBER");
|
||||
|
||||
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, badAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHiveQL.REL_SUCCESS, 3);
|
||||
runner.assertTransferCount(PutHiveQL.REL_FAILURE, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertTrue(rs.next());
|
||||
assertTrue(rs.next());
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUsingSqlDataTypesWithNegativeValues(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate("CREATE TABLE PERSONS (id integer primary key, name varchar(100), code bigint)");
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", "-5");
|
||||
attributes.put("hiveql.args.1.value", "84");
|
||||
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', ?)".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1);
|
||||
runner.getFlowFilesForRelationship(PutHiveQL.REL_SUCCESS).get(0).assertAttributeEquals(PutHiveQL.ATTR_OUTPUT_TABLES, "PERSONS");
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStatementsWithPreparedParameters(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
|
||||
runner.clearTransferState();
|
||||
|
||||
attributes.clear();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.1.value", "George");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.2.value", "1");
|
||||
|
||||
runner.enqueue("UPDATE PERSONS SET NAME=? WHERE ID=?".getBytes(), attributes);
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("George", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMultipleStatementsWithinFlowFile(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because of the semicolon
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1);
|
||||
runner.getFlowFilesForRelationship(PutHiveQL.REL_SUCCESS)
|
||||
.forEach(f -> f.assertAttributeEquals(PutHiveQL.ATTR_OUTPUT_TABLES, "PERSONS"));
|
||||
|
||||
// Now we can check that the values were inserted by the multi-statement script.
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals( 1, rs.getInt(1), "Record ID mismatch");
|
||||
assertEquals("George", rs.getString(2), "Record NAME mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleStatementsWithinFlowFilePlusEmbeddedDelimiter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George\\;' WHERE ID=?; ";
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because of the semicolon
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1);
|
||||
|
||||
// Now we can check that the values were inserted by the multi-statement script.
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1), "Record ID mismatch");
|
||||
assertEquals( "George\\;", rs.getString(2), "Record NAME mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testWithNullParameter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
|
||||
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(0, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidStatement(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE SOME_RANDOM_TABLE NAME='George' WHERE ID=?; ";
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because of the table is invalid
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_FAILURE, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRetryableFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final DBCPService service = new SQLExceptionService(null);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because there isn't a valid connection and tables don't exist.
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRetryableFailureRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final DBCPService service = new SQLExceptionService(null);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
|
||||
AssertionError e = assertThrows(AssertionError.class, () -> runner.run());
|
||||
assertTrue(e.getCause() instanceof ProcessException);
|
||||
|
||||
assertEquals(1, runner.getQueueSize().getObjectCount());
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnknownFailure() throws InitializationException, ProcessException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final SQLExceptionService service = new SQLExceptionService(null);
|
||||
service.setErrorCode(2);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because there isn't a valid connection and tables don't exist.
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnknownFailureRollbackOnFailure() throws InitializationException, ProcessException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHiveQL.class);
|
||||
final SQLExceptionService service = new SQLExceptionService(null);
|
||||
service.setErrorCode(0);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
|
||||
AssertionError e = assertThrows(AssertionError.class, () -> runner.run());
|
||||
assertTrue(e.getCause() instanceof ProcessException);
|
||||
|
||||
assertEquals(1, runner.getQueueSize().getObjectCount());
|
||||
runner.assertAllFlowFilesTransferred(PutHiveQL.REL_RETRY, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for testing purposes
|
||||
*/
|
||||
private static class MockDBCPService extends AbstractControllerService implements HiveDBCPService {
|
||||
private final String dbLocation;
|
||||
|
||||
MockDBCPService(final String dbLocation) {
|
||||
this.dbLocation = dbLocation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
|
||||
return DriverManager.getConnection("jdbc:derby:" + dbLocation + ";create=true");
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return "jdbc:derby:" + dbLocation + ";create=true";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for testing purposes
|
||||
*/
|
||||
private static class SQLExceptionService extends AbstractControllerService implements HiveDBCPService {
|
||||
private final HiveDBCPService service;
|
||||
private int allowedBeforeFailure = 0;
|
||||
private int successful = 0;
|
||||
private int errorCode = 30000; // Default to a retryable exception code
|
||||
|
||||
SQLExceptionService(final HiveDBCPService service) {
|
||||
this.service = service;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
if (++successful > allowedBeforeFailure) {
|
||||
final Connection conn = Mockito.mock(Connection.class);
|
||||
Mockito.when(conn.prepareStatement(Mockito.any(String.class))).thenThrow(new SQLException("Unit Test Generated SQLException", "42000", errorCode));
|
||||
return conn;
|
||||
} else {
|
||||
return service.getConnection();
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return service != null ? service.getConnectionURL() : null;
|
||||
}
|
||||
|
||||
void setErrorCode(int errorCode) {
|
||||
this.errorCode = errorCode;
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,736 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.avro.file.DataFileStream;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.io.DatumReader;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.dbcp.DBCPService;
|
||||
import org.apache.nifi.dbcp.hive.HiveDBCPService;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.provenance.ProvenanceEventRecord;
|
||||
import org.apache.nifi.provenance.ProvenanceEventType;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.apache.nifi.util.hive.HiveJdbcCommon;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.sql.Types;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.apache.nifi.processors.hive.SelectHiveQL.HIVEQL_OUTPUT_FORMAT;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestSelectHiveQL {
|
||||
|
||||
private static final Logger LOGGER;
|
||||
private final static String MAX_ROWS_KEY = "maxRows";
|
||||
private final int NUM_OF_ROWS = 100;
|
||||
|
||||
|
||||
static {
|
||||
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info");
|
||||
System.setProperty("org.slf4j.simpleLogger.showDateTime", "true");
|
||||
System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug");
|
||||
System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHiveQL", "debug");
|
||||
System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHiveQL", "debug");
|
||||
LOGGER = LoggerFactory.getLogger(TestSelectHiveQL.class);
|
||||
}
|
||||
|
||||
private final static String DB_LOCATION = "target/db";
|
||||
|
||||
private final static String QUERY_WITH_EL = "select "
|
||||
+ " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
|
||||
+ " from persons PER"
|
||||
+ " where PER.ID > ${person.id}";
|
||||
|
||||
private final static String QUERY_WITHOUT_EL = "select "
|
||||
+ " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
|
||||
+ " from persons PER"
|
||||
+ " where PER.ID > 10";
|
||||
|
||||
|
||||
@BeforeAll
|
||||
public static void setupClass() {
|
||||
System.setProperty("derby.stream.error.file", "target/derby.log");
|
||||
}
|
||||
|
||||
private TestRunner runner;
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws InitializationException {
|
||||
final DBCPService dbcp = new DBCPServiceSimpleImpl();
|
||||
final Map<String, String> dbcpProperties = new HashMap<>();
|
||||
|
||||
runner = TestRunners.newTestRunner(SelectHiveQL.class);
|
||||
runner.addControllerService("dbcp", dbcp, dbcpProperties);
|
||||
runner.enableControllerService(dbcp);
|
||||
runner.setProperty(SelectHiveQL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncomingConnectionWithNoFlowFile() throws InitializationException {
|
||||
runner.setIncomingConnection(true);
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons");
|
||||
runner.run();
|
||||
runner.assertTransferCount(SelectHiveQL.REL_SUCCESS, 0);
|
||||
runner.assertTransferCount(SelectHiveQL.REL_FAILURE, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
runner.setIncomingConnection(false);
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro");
|
||||
|
||||
final List<ProvenanceEventRecord> provenanceEvents = runner.getProvenanceEvents();
|
||||
final ProvenanceEventRecord provenance0 = provenanceEvents.get(0);
|
||||
assertEquals(ProvenanceEventType.RECEIVE, provenance0.getEventType());
|
||||
assertEquals("jdbc:derby:target/db;create=true", provenance0.getTransitUri());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITH_EL, true, "Avro");
|
||||
|
||||
final List<ProvenanceEventRecord> provenanceEvents = runner.getProvenanceEvents();
|
||||
assertEquals(4, provenanceEvents.size());
|
||||
|
||||
final ProvenanceEventRecord provenance0 = provenanceEvents.get(0);
|
||||
assertEquals(ProvenanceEventType.FORK, provenance0.getEventType());
|
||||
|
||||
final ProvenanceEventRecord provenance1 = provenanceEvents.get(1);
|
||||
assertEquals(ProvenanceEventType.FETCH, provenance1.getEventType());
|
||||
assertEquals("jdbc:derby:target/db;create=true", provenance1.getTransitUri());
|
||||
|
||||
final ProvenanceEventRecord provenance2 = provenanceEvents.get(2);
|
||||
assertEquals(ProvenanceEventType.FORK, provenance2.getEventType());
|
||||
|
||||
// The last one was removed as empty
|
||||
final ProvenanceEventRecord provenance3 = provenanceEvents.get(3);
|
||||
assertEquals(ProvenanceEventType.DROP, provenance3.getEventType());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testWithNullIntColumn() throws SQLException {
|
||||
// remove previous test database, if any
|
||||
final File dbLocation = new File(DB_LOCATION);
|
||||
dbLocation.delete();
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_NULL_INT");
|
||||
} catch (final SQLException sqle) {
|
||||
// Nothing to do, probably means the table didn't exist
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))");
|
||||
|
||||
stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)");
|
||||
stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)");
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT");
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME);
|
||||
|
||||
final List<MockFlowFile> flowfiles = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS);
|
||||
flowfiles.get(0).assertAttributeEquals(SelectHiveQL.RESULT_ROW_COUNT, "2");
|
||||
final long executionTime = Long.parseLong(flowfiles.get(0).getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(flowfiles.get(0).getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(flowfiles.get(0).getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithSqlException() throws SQLException {
|
||||
// remove previous test database, if any
|
||||
final File dbLocation = new File(DB_LOCATION);
|
||||
dbLocation.delete();
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_NO_ROWS");
|
||||
} catch (final SQLException sqle) {
|
||||
// Nothing to do, probably means the table didn't exist
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_NO_ROWS (id integer)");
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS");
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPreQueriesNoIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
"select 'no exception' from persons; select exception from persons",
|
||||
null);
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPreQueriesWithIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, true, CSV,
|
||||
"select 'no exception' from persons; select exception from persons",
|
||||
null);
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPostQueriesNoIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
null,
|
||||
"select 'no exception' from persons; select exception from persons");
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPostQueriesWithIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, true, CSV,
|
||||
null,
|
||||
"select 'no exception' from persons; select exception from persons");
|
||||
|
||||
// with incoming connections, it should be rolled back
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithBadSQL() throws SQLException {
|
||||
final String BAD_SQL = "create table TEST_NO_ROWS (id integer)";
|
||||
|
||||
// Test with incoming flow file (it should be routed to failure intact, i.e. same content and no parent)
|
||||
runner.setIncomingConnection(true);
|
||||
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||
runner.enqueue(BAD_SQL);
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1);
|
||||
MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHiveQL.REL_FAILURE).get(0);
|
||||
flowFile.assertContentEquals(BAD_SQL);
|
||||
flowFile.assertAttributeEquals("parentIds", null);
|
||||
runner.clearTransferState();
|
||||
|
||||
// Test with no incoming flow file (an empty flow file is transferred)
|
||||
runner.setIncomingConnection(false);
|
||||
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, BAD_SQL);
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_FAILURE, 1);
|
||||
flowFile = runner.getFlowFilesForRelationship(SelectHiveQL.REL_FAILURE).get(0);
|
||||
flowFile.assertContentEquals("");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithCsv()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithAvro()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, AVRO);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithValidPreQieries()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
"select '1' from persons; select '2' from persons", //should not be 'select'. But Derby driver doesn't support "set param=val" format.
|
||||
null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithValidPostQieries()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
null,
|
||||
//should not be 'select'. But Derby driver doesn't support "set param=val" format,
|
||||
//so just providing any "compilable" query.
|
||||
" select '4' from persons; \nselect '5' from persons");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithValidPrePostQieries()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
//should not be 'select'. But Derby driver doesn't support "set param=val" format,
|
||||
//so just providing any "compilable" query.
|
||||
"select '1' from persons; select '2' from persons",
|
||||
" select '4' from persons; \nselect '5' from persons");
|
||||
}
|
||||
|
||||
|
||||
public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat)
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(query, incomingFlowFile, outputFormat, null, null);
|
||||
}
|
||||
|
||||
public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat,
|
||||
String preQueries, String postQueries)
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
TestRunner runner = doOnTrigger(query, incomingFlowFile, outputFormat, preQueries, postQueries);
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME);
|
||||
|
||||
final List<MockFlowFile> flowfiles = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS);
|
||||
MockFlowFile flowFile = flowfiles.get(0);
|
||||
final InputStream in = new ByteArrayInputStream(flowFile.toByteArray());
|
||||
long recordsFromStream = 0;
|
||||
if (AVRO.equals(outputFormat)) {
|
||||
assertEquals(MIME_TYPE_AVRO_BINARY, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
|
||||
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
|
||||
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
|
||||
GenericRecord record = null;
|
||||
while (dataFileReader.hasNext()) {
|
||||
// Reuse record object by passing it to next(). This saves us from
|
||||
// allocating and garbage collecting many objects for files with
|
||||
// many items.
|
||||
record = dataFileReader.next(record);
|
||||
recordsFromStream++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assertEquals(CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
|
||||
String headerRow = br.readLine();
|
||||
// Derby capitalizes column names
|
||||
assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow);
|
||||
|
||||
// Validate rows
|
||||
String line;
|
||||
while ((line = br.readLine()) != null) {
|
||||
recordsFromStream++;
|
||||
String[] values = line.split(",");
|
||||
if (recordsFromStream < (NUM_OF_ROWS - 10)) {
|
||||
assertEquals(3, values.length);
|
||||
assertTrue(values[1].startsWith("\""));
|
||||
assertTrue(values[1].endsWith("\""));
|
||||
} else {
|
||||
assertEquals(2, values.length); // Middle value is null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final long executionTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(NUM_OF_ROWS - 10, recordsFromStream);
|
||||
assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHiveQL.RESULT_ROW_COUNT)));
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
flowFile.assertAttributeEquals(AbstractHiveQLProcessor.ATTR_INPUT_TABLES, "persons");
|
||||
}
|
||||
|
||||
public TestRunner doOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat,
|
||||
String preQueries, String postQueries)
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
// remove previous test database, if any
|
||||
final File dbLocation = new File(DB_LOCATION);
|
||||
dbLocation.delete();
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((HiveDBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
final Statement stmt = con.createStatement();
|
||||
try {
|
||||
stmt.execute("drop table persons");
|
||||
} catch (final SQLException sqle) {
|
||||
// Nothing to do here, the table didn't exist
|
||||
}
|
||||
|
||||
stmt.execute("create table persons (id integer, name varchar(100), code integer)");
|
||||
Random rng = new Random(53496);
|
||||
stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")");
|
||||
for (int i = 2; i < NUM_OF_ROWS; i++) {
|
||||
stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")");
|
||||
}
|
||||
stmt.executeUpdate("insert into persons values (" + NUM_OF_ROWS + ", 'Last Person', NULL)");
|
||||
|
||||
LOGGER.info("test data loaded");
|
||||
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, query);
|
||||
runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat);
|
||||
if (preQueries != null) {
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_PRE_QUERY, preQueries);
|
||||
}
|
||||
if (postQueries != null) {
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_POST_QUERY, postQueries);
|
||||
}
|
||||
|
||||
if (incomingFlowFile) {
|
||||
// incoming FlowFile content is not used, but attributes are used
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("person.id", "10");
|
||||
runner.enqueue("Hello".getBytes(), attributes);
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(incomingFlowFile);
|
||||
runner.run();
|
||||
|
||||
return runner;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxRowsPerFlowFileAvro() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
InputStream in;
|
||||
MockFlowFile mff;
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
|
||||
runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
|
||||
runner.setVariable(MAX_ROWS_KEY, "9");
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 12);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME);
|
||||
|
||||
//ensure all but the last file have 9 records each
|
||||
for (int ff = 0; ff < 11; ff++) {
|
||||
mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(ff);
|
||||
final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
assertEquals(9, getNumberOfRecordsFromStream(in));
|
||||
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
}
|
||||
|
||||
//last file should have 1 record
|
||||
mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(11);
|
||||
final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
assertEquals(1, getNumberOfRecordsFromStream(in));
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParametrizedQuery() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(true);
|
||||
runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
|
||||
runner.setVariable(MAX_ROWS_KEY, "9");
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id = ?", attributes );
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 1);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME);
|
||||
|
||||
MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(0);
|
||||
final long executionTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(flowFile.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
|
||||
// Assert the attributes from the incoming flow file are preserved in the outgoing flow file(s)
|
||||
flowFile.assertAttributeEquals("hiveql.args.1.value", "1");
|
||||
flowFile.assertAttributeEquals("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
InputStream in;
|
||||
MockFlowFile mff;
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(true);
|
||||
runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV);
|
||||
|
||||
runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap<String, String>() {{
|
||||
put(MAX_ROWS_KEY, "9");
|
||||
}});
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, 12);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME);
|
||||
|
||||
//ensure all but the last file have 9 records (10 lines = 9 records + header) each
|
||||
for (int ff = 0; ff < 11; ff++) {
|
||||
mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(ff);
|
||||
final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
assertEquals(10, br.lines().count());
|
||||
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
}
|
||||
|
||||
//last file should have 1 record (2 lines = 1 record + header)
|
||||
mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(11);
|
||||
final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
assertEquals(2, br.lines().count());
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxRowsPerFlowFileWithMaxFragments() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
InputStream in;
|
||||
MockFlowFile mff;
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
runner.setProperty(SelectHiveQL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
|
||||
runner.setProperty(SelectHiveQL.MAX_ROWS_PER_FLOW_FILE, "9");
|
||||
Integer maxFragments = 3;
|
||||
runner.setProperty(SelectHiveQL.MAX_FRAGMENTS, maxFragments.toString());
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHiveQL.REL_SUCCESS, maxFragments);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_ROW_COUNT);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_DURATION);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_EXECUTION_TIME);
|
||||
runner.assertAllFlowFilesContainAttribute(SelectHiveQL.REL_SUCCESS, SelectHiveQL.RESULT_QUERY_FETCH_TIME);
|
||||
|
||||
for (int i = 0; i < maxFragments; i++) {
|
||||
mff = runner.getFlowFilesForRelationship(SelectHiveQL.REL_SUCCESS).get(i);
|
||||
final long executionTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_EXECUTION_TIME));
|
||||
final long fetchTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_FETCH_TIME));
|
||||
final long durationTime = Long.parseLong(mff.getAttribute(SelectHiveQL.RESULT_QUERY_DURATION));
|
||||
|
||||
assertEquals(durationTime, fetchTime + executionTime);
|
||||
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
assertEquals(9, getNumberOfRecordsFromStream(in));
|
||||
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(i), mff.getAttribute("fragment.index"));
|
||||
assertEquals(maxFragments.toString(), mff.getAttribute("fragment.count"));
|
||||
}
|
||||
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
|
||||
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
|
||||
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
|
||||
GenericRecord record = null;
|
||||
long recordsFromStream = 0;
|
||||
while (dataFileReader.hasNext()) {
|
||||
// Reuse record object by passing it to next(). This saves us from
|
||||
// allocating and garbage collecting many objects for files with
|
||||
// many items.
|
||||
record = dataFileReader.next(record);
|
||||
recordsFromStream += 1;
|
||||
}
|
||||
|
||||
return recordsFromStream;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for SelectHiveQL processor testing.
|
||||
*/
|
||||
private class DBCPServiceSimpleImpl extends AbstractControllerService implements HiveDBCPService {
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
|
||||
return DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true");
|
||||
} catch (final Exception e) {
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return "jdbc:derby:" + DB_LOCATION + ";create=true";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,449 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.dbcp.DBCPService;
|
||||
import org.apache.nifi.dbcp.hive.HiveDBCPService;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.schema.access.SchemaNotFoundException;
|
||||
import org.apache.nifi.serialization.RecordReader;
|
||||
import org.apache.nifi.serialization.SimpleRecordSchema;
|
||||
import org.apache.nifi.serialization.record.MockRecordParser;
|
||||
import org.apache.nifi.serialization.record.RecordField;
|
||||
import org.apache.nifi.serialization.record.RecordFieldType;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.DisabledOnOs;
|
||||
import org.junit.jupiter.api.condition.OS;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.stubbing.Answer;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.BiFunction;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.anyInt;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@DisabledOnOs(OS.WINDOWS)
|
||||
public class TestUpdateHiveTable {
|
||||
|
||||
private static final String TEST_CONF_PATH = "src/test/resources/core-site.xml";
|
||||
private static final String TARGET_HIVE = "target/hive";
|
||||
|
||||
private static final String[] SHOW_TABLES_COLUMN_NAMES = new String[]{"tab_name"};
|
||||
private static final String[][] SHOW_TABLES_RESULTSET = new String[][]{
|
||||
new String[]{"messages"},
|
||||
new String[]{"users"},
|
||||
};
|
||||
|
||||
private static final String[] DESC_MESSAGES_TABLE_COLUMN_NAMES = new String[]{"id", "msg"};
|
||||
private static final String[][] DESC_MESSAGES_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"# col_name", "data_type", "comment"},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"id", "int", ""},
|
||||
new String[]{"msg", "string", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Partition Information", null, null},
|
||||
new String[]{"# col_name", "data_type", "comment"},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"continent", "string", ""},
|
||||
new String[]{"country", "string", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages", null}
|
||||
};
|
||||
|
||||
private static final String[] DESC_USERS_TABLE_COLUMN_NAMES = new String[]{"name", "favorite_number", "favorite_color", "scale"};
|
||||
private static final String[][] DESC_USERS_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"name", "string", ""},
|
||||
new String[]{"favorite_number", "int", ""},
|
||||
new String[]{"favorite_color", "string", ""},
|
||||
new String[]{"scale", "double", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users", null}
|
||||
};
|
||||
private static final String[][] DESC_EXTERNAL_USERS_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"name", "string", ""},
|
||||
new String[]{"favorite_number", "int", ""},
|
||||
new String[]{"favorite_color", "string", ""},
|
||||
new String[]{"scale", "double", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/path/to/users", null}
|
||||
};
|
||||
|
||||
private static final String[] DESC_NEW_TABLE_COLUMN_NAMES = DESC_USERS_TABLE_COLUMN_NAMES;
|
||||
private static final String[][] DESC_NEW_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"# col_name", "data_type", "comment"},
|
||||
new String[]{"name", "string", ""},
|
||||
new String[]{"favorite_number", "int", ""},
|
||||
new String[]{"favorite_color", "string", ""},
|
||||
new String[]{"scale", "double", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable", null}
|
||||
};
|
||||
|
||||
private TestRunner runner;
|
||||
private MockUpdateHiveTable processor;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
|
||||
Configuration testConf = new Configuration();
|
||||
testConf.addResource(new Path(TEST_CONF_PATH));
|
||||
|
||||
// Delete any temp files from previous tests
|
||||
try {
|
||||
FileUtils.deleteDirectory(new File(TARGET_HIVE));
|
||||
} catch (IOException ioe) {
|
||||
// Do nothing, directory may not have existed
|
||||
}
|
||||
|
||||
processor = new MockUpdateHiveTable();
|
||||
}
|
||||
|
||||
private void configure(final UpdateHiveTable processor, final int numUsers) throws InitializationException {
|
||||
configure(processor, numUsers, false, -1);
|
||||
}
|
||||
|
||||
private void configure(final UpdateHiveTable processor, final int numUsers, boolean failOnCreateReader, int failAfter) throws InitializationException {
|
||||
configure(processor, numUsers, failOnCreateReader, failAfter, null);
|
||||
}
|
||||
|
||||
private void configure(final UpdateHiveTable processor, final int numUsers, final boolean failOnCreateReader, final int failAfter,
|
||||
final BiFunction<Integer, MockRecordParser, Void> recordGenerator) throws InitializationException {
|
||||
runner = TestRunners.newTestRunner(processor);
|
||||
MockRecordParser readerFactory = new MockRecordParser() {
|
||||
@Override
|
||||
public RecordReader createRecordReader(Map<String, String> variables, InputStream in, long inputLength, ComponentLog logger) throws IOException, SchemaNotFoundException {
|
||||
if (failOnCreateReader) {
|
||||
throw new SchemaNotFoundException("test");
|
||||
}
|
||||
return super.createRecordReader(variables, in, inputLength, logger);
|
||||
}
|
||||
};
|
||||
List<RecordField> fields = Arrays.asList(
|
||||
new RecordField("name", RecordFieldType.STRING.getDataType()),
|
||||
new RecordField("favorite_number", RecordFieldType.INT.getDataType()),
|
||||
new RecordField("favorite_color", RecordFieldType.STRING.getDataType()),
|
||||
new RecordField("scale", RecordFieldType.DOUBLE.getDataType())
|
||||
);
|
||||
final SimpleRecordSchema recordSchema = new SimpleRecordSchema(fields);
|
||||
for (final RecordField recordField : recordSchema.getFields()) {
|
||||
readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType(), recordField.isNullable());
|
||||
}
|
||||
|
||||
if (recordGenerator == null) {
|
||||
for (int i = 0; i < numUsers; i++) {
|
||||
readerFactory.addRecord("name" + i, i, "blue" + i, i * 10.0);
|
||||
}
|
||||
} else {
|
||||
recordGenerator.apply(numUsers, readerFactory);
|
||||
}
|
||||
|
||||
readerFactory.failAfter(failAfter);
|
||||
|
||||
runner.addControllerService("mock-reader-factory", readerFactory);
|
||||
runner.enableControllerService(readerFactory);
|
||||
|
||||
runner.setProperty(UpdateHiveTable.RECORD_READER, "mock-reader-factory");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetup(@TempDir java.nio.file.Path tempDir) throws Exception {
|
||||
configure(processor, 0);
|
||||
runner.assertNotValid();
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockHiveConnectionPool(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.assertNotValid();
|
||||
runner.setProperty(UpdateHiveTable.TABLE_NAME, "users");
|
||||
runner.assertValid();
|
||||
runner.run();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNoStatementsExecuted() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_NAME, "users");
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("test");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(UpdateHiveTable.PARTITION_CLAUSE, "continent, country");
|
||||
HashMap<String,String> attrs = new HashMap<>();
|
||||
attrs.put("continent", "Asia");
|
||||
attrs.put("country", "China");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "users");
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users");
|
||||
assertTrue(service.getExecutedStatements().isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateManagedTable() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_NAME, "${table.name}");
|
||||
runner.setProperty(UpdateHiveTable.CREATE_TABLE, UpdateHiveTable.CREATE_IF_NOT_EXISTS);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_STORAGE_FORMAT, UpdateHiveTable.PARQUET);
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp");
|
||||
Map<String, String> attrs = new HashMap<>();
|
||||
attrs.put("db.name", "default");
|
||||
attrs.put("table.name", "_newTable");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "_newTable");
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(1, statements.size());
|
||||
assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET",
|
||||
statements.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateManagedTableWithPartition() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_NAME, "${table.name}");
|
||||
runner.setProperty(UpdateHiveTable.CREATE_TABLE, UpdateHiveTable.CREATE_IF_NOT_EXISTS);
|
||||
runner.setProperty(UpdateHiveTable.PARTITION_CLAUSE, "age int");
|
||||
runner.setProperty(UpdateHiveTable.TABLE_STORAGE_FORMAT, UpdateHiveTable.PARQUET);
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp");
|
||||
Map<String, String> attrs = new HashMap<>();
|
||||
attrs.put("db.name", "default");
|
||||
attrs.put("table.name", "_newTable");
|
||||
attrs.put("age", "23");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "_newTable");
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(1, statements.size());
|
||||
assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) PARTITIONED BY (`age` int) STORED AS PARQUET",
|
||||
statements.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateExternalTable() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_NAME, "${table.name}");
|
||||
runner.setProperty(UpdateHiveTable.CREATE_TABLE, UpdateHiveTable.CREATE_IF_NOT_EXISTS);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_MANAGEMENT_STRATEGY, UpdateHiveTable.EXTERNAL_TABLE);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_STORAGE_FORMAT, UpdateHiveTable.PARQUET);
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("ext_users");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.assertNotValid(); // Needs location specified
|
||||
runner.setProperty(UpdateHiveTable.EXTERNAL_TABLE_LOCATION, "/path/to/users");
|
||||
runner.assertValid();
|
||||
Map<String, String> attrs = new HashMap<>();
|
||||
attrs.put("db.name", "default");
|
||||
attrs.put("table.name", "ext_users");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "ext_users");
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/path/to/users");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(1, statements.size());
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS `ext_users` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET "
|
||||
+ "LOCATION '/path/to/users'",
|
||||
statements.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddColumnsAndPartition() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_NAME, "messages");
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("test");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(UpdateHiveTable.PARTITION_CLAUSE, "continent, country");
|
||||
HashMap<String,String> attrs = new HashMap<>();
|
||||
attrs.put("continent", "Asia");
|
||||
attrs.put("country", "China");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHiveTable.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_TABLE, "messages");
|
||||
flowFile.assertAttributeEquals(UpdateHiveTable.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages/continent=Asia/country=China");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(2, statements.size());
|
||||
// All columns from users table/data should be added to the table, and a new partition should be added
|
||||
assertEquals("ALTER TABLE `messages` ADD COLUMNS (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE)",
|
||||
statements.get(0));
|
||||
assertEquals("ALTER TABLE `messages` ADD IF NOT EXISTS PARTITION (`continent`='Asia', `country`='China')",
|
||||
statements.get(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMissingPartitionValues() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHiveTable.TABLE_NAME, "messages");
|
||||
final DBCPService service = new MockHiveConnectionPool("test");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHiveTable.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue(new byte[0]);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHiveTable.REL_SUCCESS, 0);
|
||||
runner.assertTransferCount(UpdateHiveTable.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
private static final class MockUpdateHiveTable extends UpdateHiveTable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for testing purposes
|
||||
*/
|
||||
private static class MockHiveConnectionPool extends AbstractControllerService implements HiveDBCPService {
|
||||
private final String dbLocation;
|
||||
|
||||
private final List<String> executedStatements = new ArrayList<>();
|
||||
|
||||
MockHiveConnectionPool(final String dbLocation) {
|
||||
this.dbLocation = dbLocation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
Connection conn = mock(Connection.class);
|
||||
Statement s = mock(Statement.class);
|
||||
when(conn.createStatement()).thenReturn(s);
|
||||
when(s.executeQuery(anyString())).thenAnswer((Answer<ResultSet>) invocation -> {
|
||||
final String query = invocation.getArgument(0);
|
||||
if ("SHOW TABLES".equals(query)) {
|
||||
return new MockResultSet(SHOW_TABLES_COLUMN_NAMES, SHOW_TABLES_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `messages`".equals(query)) {
|
||||
return new MockResultSet(DESC_MESSAGES_TABLE_COLUMN_NAMES, DESC_MESSAGES_TABLE_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `users`".equals(query)) {
|
||||
return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_USERS_TABLE_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `ext_users`".equals(query)) {
|
||||
return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_EXTERNAL_USERS_TABLE_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `_newTable`".equals(query)) {
|
||||
return new MockResultSet(DESC_NEW_TABLE_COLUMN_NAMES, DESC_NEW_TABLE_RESULTSET).createResultSet();
|
||||
} else {
|
||||
return new MockResultSet(new String[]{}, new String[][]{new String[]{}}).createResultSet();
|
||||
}
|
||||
});
|
||||
when(s.execute(anyString())).thenAnswer((Answer<Boolean>) invocation -> {
|
||||
executedStatements.add(invocation.getArgument(0));
|
||||
return false;
|
||||
});
|
||||
return conn;
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return "jdbc:fake:" + dbLocation;
|
||||
}
|
||||
|
||||
List<String> getExecutedStatements() {
|
||||
return executedStatements;
|
||||
}
|
||||
}
|
||||
|
||||
private static class MockResultSet {
|
||||
String[] colNames;
|
||||
String[][] data;
|
||||
int currentRow;
|
||||
|
||||
MockResultSet(String[] colNames, String[][] data) {
|
||||
this.colNames = colNames;
|
||||
this.data = data;
|
||||
currentRow = 0;
|
||||
}
|
||||
|
||||
ResultSet createResultSet() throws SQLException {
|
||||
ResultSet rs = mock(ResultSet.class);
|
||||
when(rs.next()).thenAnswer((Answer<Boolean>) invocation -> (data != null) && (++currentRow <= data.length));
|
||||
when(rs.getString(anyInt())).thenAnswer((Answer<String>) invocation -> {
|
||||
final int index = invocation.getArgument(0);
|
||||
if (index < 1) {
|
||||
throw new SQLException("Columns start with index 1");
|
||||
}
|
||||
if (currentRow > data.length) {
|
||||
throw new SQLException("This result set is already closed");
|
||||
}
|
||||
return data[currentRow - 1][index - 1];
|
||||
});
|
||||
|
||||
return rs;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,137 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import com.google.common.util.concurrent.UncheckedExecutionException;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
||||
import org.apache.hive.hcatalog.streaming.InvalidTable;
|
||||
import org.apache.hive.hcatalog.streaming.RecordWriter;
|
||||
import org.apache.hive.hcatalog.streaming.StreamingConnection;
|
||||
import org.apache.hive.hcatalog.streaming.StreamingException;
|
||||
import org.apache.hive.hcatalog.streaming.TransactionBatch;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.mockito.stubbing.Answer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.UndeclaredThrowableException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyLong;
|
||||
import static org.mockito.ArgumentMatchers.isA;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
public class HiveWriterTest {
|
||||
private HiveEndPoint hiveEndPoint;
|
||||
private int txnsPerBatch;
|
||||
private boolean autoCreatePartitions;
|
||||
private int callTimeout;
|
||||
private ExecutorService executorService;
|
||||
private UserGroupInformation userGroupInformation;
|
||||
private HiveConf hiveConf;
|
||||
private HiveWriter hiveWriter;
|
||||
private StreamingConnection streamingConnection;
|
||||
private RecordWriter recordWriter;
|
||||
private Callable<RecordWriter> recordWriterCallable;
|
||||
private TransactionBatch transactionBatch;
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws Exception {
|
||||
hiveEndPoint = mock(HiveEndPoint.class);
|
||||
txnsPerBatch = 100;
|
||||
autoCreatePartitions = true;
|
||||
callTimeout = 0;
|
||||
executorService = mock(ExecutorService.class);
|
||||
streamingConnection = mock(StreamingConnection.class);
|
||||
transactionBatch = mock(TransactionBatch.class);
|
||||
userGroupInformation = mock(UserGroupInformation.class);
|
||||
hiveConf = mock(HiveConf.class);
|
||||
recordWriter = mock(RecordWriter.class);
|
||||
recordWriterCallable = mock(Callable.class);
|
||||
when(recordWriterCallable.call()).thenReturn(recordWriter);
|
||||
|
||||
when(hiveEndPoint.newConnection(autoCreatePartitions, hiveConf, userGroupInformation)).thenReturn(streamingConnection);
|
||||
when(streamingConnection.fetchTransactionBatch(txnsPerBatch, recordWriter)).thenReturn(transactionBatch);
|
||||
when(executorService.submit(isA(Callable.class))).thenAnswer(invocation -> {
|
||||
Future future = mock(Future.class);
|
||||
Answer<Object> answer = i -> ((Callable) invocation.getArguments()[0]).call();
|
||||
when(future.get()).thenAnswer(answer);
|
||||
when(future.get(anyLong(), any(TimeUnit.class))).thenAnswer(answer);
|
||||
return future;
|
||||
});
|
||||
when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> {
|
||||
try {
|
||||
try {
|
||||
return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run();
|
||||
} catch (UncheckedExecutionException e) {
|
||||
// Creation of strict json writer will fail due to external deps, this gives us chance to catch it
|
||||
for (StackTraceElement stackTraceElement : e.getStackTrace()) {
|
||||
if (stackTraceElement.toString().startsWith("org.apache.hive.hcatalog.streaming.StrictJsonWriter.<init>(")) {
|
||||
return recordWriterCallable.call();
|
||||
}
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
} catch (IOException | Error | RuntimeException | InterruptedException e) {
|
||||
throw e;
|
||||
} catch (Throwable e) {
|
||||
throw new UndeclaredThrowableException(e);
|
||||
}
|
||||
});
|
||||
|
||||
initWriter();
|
||||
}
|
||||
|
||||
private void initWriter() throws Exception {
|
||||
hiveWriter = new HiveWriter(hiveEndPoint, txnsPerBatch, autoCreatePartitions, callTimeout, executorService, userGroupInformation, hiveConf);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNormal() {
|
||||
assertNotNull(hiveWriter);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewConnectionInvalidTable() throws Exception {
|
||||
hiveEndPoint = mock(HiveEndPoint.class);
|
||||
InvalidTable invalidTable = new InvalidTable("badDb", "badTable");
|
||||
when(hiveEndPoint.newConnection(autoCreatePartitions, hiveConf, userGroupInformation)).thenThrow(invalidTable);
|
||||
HiveWriter.ConnectFailure e = assertThrows(HiveWriter.ConnectFailure.class, () -> initWriter());
|
||||
assertEquals(invalidTable, e.getCause());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRecordWriterStreamingException() throws Exception {
|
||||
recordWriterCallable = mock(Callable.class);
|
||||
StreamingException streamingException = new StreamingException("Test Exception");
|
||||
when(recordWriterCallable.call()).thenThrow(streamingException);
|
||||
HiveWriter.ConnectFailure e = assertThrows(HiveWriter.ConnectFailure.class, () -> initWriter());
|
||||
assertEquals(streamingException, e.getCause());
|
||||
}
|
||||
}
|
|
@ -1,467 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.orc;
|
||||
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.SchemaBuilder;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.util.Utf8;
|
||||
import org.apache.hadoop.hive.ql.io.orc.NiFiOrcUtils;
|
||||
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
|
||||
import org.apache.hadoop.hive.serde2.objectinspector.UnionObject;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
|
||||
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
|
||||
import org.apache.hadoop.io.DoubleWritable;
|
||||
import org.apache.hadoop.io.FloatWritable;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Unit tests for the NiFiOrcUtils helper class
|
||||
*/
|
||||
public class TestNiFiOrcUtils {
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_primitive() throws Exception {
|
||||
// Expected ORC types
|
||||
TypeInfo[] expectedTypes = {
|
||||
TypeInfoFactory.getPrimitiveTypeInfo("int"),
|
||||
TypeInfoFactory.getPrimitiveTypeInfo("bigint"),
|
||||
TypeInfoFactory.getPrimitiveTypeInfo("boolean"),
|
||||
TypeInfoFactory.getPrimitiveTypeInfo("float"),
|
||||
TypeInfoFactory.getPrimitiveTypeInfo("double"),
|
||||
TypeInfoFactory.getPrimitiveTypeInfo("binary"),
|
||||
TypeInfoFactory.getPrimitiveTypeInfo("string")
|
||||
};
|
||||
|
||||
// Build a fake Avro record with all types
|
||||
Schema testSchema = buildPrimitiveAvroSchema();
|
||||
List<Schema.Field> fields = testSchema.getFields();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
assertEquals(expectedTypes[i], NiFiOrcUtils.getOrcField(fields.get(i).schema()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_union_optional_type() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("union").type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
|
||||
assertEquals(TypeInfoCreator.createBoolean(), orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_union() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
|
||||
assertEquals(
|
||||
TypeInfoFactory.getUnionTypeInfo(Arrays.asList(
|
||||
TypeInfoCreator.createInt(),
|
||||
TypeInfoCreator.createBoolean())),
|
||||
orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_map() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("map").type().map().values().doubleType().noDefault();
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
|
||||
assertEquals(
|
||||
TypeInfoFactory.getMapTypeInfo(
|
||||
TypeInfoCreator.createString(),
|
||||
TypeInfoCreator.createDouble()),
|
||||
orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_nested_map() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("map").type().map().values().map().values().doubleType().noDefault();
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("map").schema());
|
||||
assertEquals(
|
||||
TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(),
|
||||
TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createDouble())),
|
||||
orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_array() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("array").type().array().items().longType().noDefault();
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema());
|
||||
assertEquals(
|
||||
TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createLong()),
|
||||
orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_complex_array() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("array").type().array().items().map().values().floatType().noDefault();
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("array").schema());
|
||||
assertEquals(
|
||||
TypeInfoFactory.getListTypeInfo(TypeInfoFactory.getMapTypeInfo(TypeInfoCreator.createString(), TypeInfoCreator.createFloat())),
|
||||
orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_record() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("int").type().intType().noDefault();
|
||||
builder.name("long").type().longType().longDefault(1L);
|
||||
builder.name("array").type().array().items().stringType().noDefault();
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema);
|
||||
assertEquals(
|
||||
TypeInfoFactory.getStructTypeInfo(
|
||||
Arrays.asList("int", "long", "array"),
|
||||
Arrays.asList(
|
||||
TypeInfoCreator.createInt(),
|
||||
TypeInfoCreator.createLong(),
|
||||
TypeInfoFactory.getListTypeInfo(TypeInfoCreator.createString()))),
|
||||
orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getOrcField_enum() throws Exception {
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
|
||||
builder.name("enumField").type().enumeration("enum").symbols("a", "b", "c").enumDefault("a");
|
||||
Schema testSchema = builder.endRecord();
|
||||
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("enumField").schema());
|
||||
assertEquals(TypeInfoCreator.createString(), orcType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getPrimitiveOrcTypeFromPrimitiveAvroType() throws Exception {
|
||||
// Expected ORC types
|
||||
TypeInfo[] expectedTypes = {
|
||||
TypeInfoCreator.createInt(),
|
||||
TypeInfoCreator.createLong(),
|
||||
TypeInfoCreator.createBoolean(),
|
||||
TypeInfoCreator.createFloat(),
|
||||
TypeInfoCreator.createDouble(),
|
||||
TypeInfoCreator.createBinary(),
|
||||
TypeInfoCreator.createString(),
|
||||
};
|
||||
|
||||
Schema testSchema = buildPrimitiveAvroSchema();
|
||||
List<Schema.Field> fields = testSchema.getFields();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
assertEquals(expectedTypes[i], NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(fields.get(i).schema().getType()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getPrimitiveOrcTypeFromPrimitiveAvroType_badType() throws Exception {
|
||||
Schema.Type nonPrimitiveType = Schema.Type.ARRAY;
|
||||
assertThrows(IllegalArgumentException.class, () -> NiFiOrcUtils.getPrimitiveOrcTypeFromPrimitiveAvroType(nonPrimitiveType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getWritable() throws Exception {
|
||||
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1) instanceof IntWritable);
|
||||
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1L) instanceof LongWritable);
|
||||
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0f) instanceof FloatWritable);
|
||||
assertTrue(NiFiOrcUtils.convertToORCObject(null, 1.0) instanceof DoubleWritable);
|
||||
assertTrue(NiFiOrcUtils.convertToORCObject(null, BigDecimal.valueOf(1.0D)) instanceof HiveDecimalWritable);
|
||||
assertTrue(NiFiOrcUtils.convertToORCObject(null, new int[]{1, 2, 3}) instanceof List);
|
||||
assertTrue(NiFiOrcUtils.convertToORCObject(null, Arrays.asList(1, 2, 3)) instanceof List);
|
||||
Map<String, Float> map = new HashMap<>();
|
||||
map.put("Hello", 1.0f);
|
||||
map.put("World", 2.0f);
|
||||
|
||||
Object convMap = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("map<string,float>"), map);
|
||||
assertTrue(convMap instanceof Map);
|
||||
((Map) convMap).forEach((key, value) -> {
|
||||
assertTrue(key instanceof Text);
|
||||
assertTrue(value instanceof FloatWritable);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getHiveTypeFromAvroType_primitive() throws Exception {
|
||||
// Expected ORC types
|
||||
String[] expectedTypes = {
|
||||
"INT",
|
||||
"BIGINT",
|
||||
"BOOLEAN",
|
||||
"FLOAT",
|
||||
"DOUBLE",
|
||||
"BINARY",
|
||||
"STRING",
|
||||
};
|
||||
|
||||
Schema testSchema = buildPrimitiveAvroSchema();
|
||||
List<Schema.Field> fields = testSchema.getFields();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_getHiveTypeFromAvroType_complex() throws Exception {
|
||||
// Expected ORC types
|
||||
String[] expectedTypes = {
|
||||
"INT",
|
||||
"MAP<STRING, DOUBLE>",
|
||||
"STRING",
|
||||
"UNIONTYPE<BIGINT, FLOAT>",
|
||||
"ARRAY<INT>",
|
||||
"DECIMAL(10,2)"
|
||||
};
|
||||
|
||||
Schema testSchema = buildComplexAvroSchema();
|
||||
List<Schema.Field> fields = testSchema.getFields();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
assertEquals(expectedTypes[i], NiFiOrcUtils.getHiveTypeFromAvroType(fields.get(i).schema()));
|
||||
}
|
||||
|
||||
assertEquals("STRUCT<myInt:INT, myMap:MAP<STRING, DOUBLE>, myEnum:STRING, myLongOrFloat:UNIONTYPE<BIGINT, FLOAT>, myIntList:ARRAY<INT>, myDecimal:DECIMAL(10,2)>",
|
||||
NiFiOrcUtils.getHiveTypeFromAvroType(testSchema));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_generateHiveDDL_primitive() throws Exception {
|
||||
Schema avroSchema = buildPrimitiveAvroSchema();
|
||||
String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable");
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)"
|
||||
+ " STORED AS ORC", ddl);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_generateHiveDDL_complex() throws Exception {
|
||||
Schema avroSchema = buildComplexAvroSchema();
|
||||
String ddl = NiFiOrcUtils.generateHiveDDL(avroSchema, "myHiveTable");
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS myHiveTable "
|
||||
+ "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>, myDecimal DECIMAL(10,2))"
|
||||
+ " STORED AS ORC", ddl);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_convertToORCObject() {
|
||||
Schema schema = SchemaBuilder.enumeration("myEnum").symbols("x", "y", "z");
|
||||
List<Object> objects = Arrays.asList(new Utf8("Hello"), new GenericData.EnumSymbol(schema, "x"));
|
||||
objects.forEach((avroObject) -> {
|
||||
Object o = NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,string>"), avroObject);
|
||||
assertTrue(o instanceof UnionObject);
|
||||
UnionObject uo = (UnionObject) o;
|
||||
assertTrue(uo.getObject() instanceof Text);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_convertToORCObjectBadUnion() {
|
||||
assertThrows(IllegalArgumentException.class, () -> NiFiOrcUtils.convertToORCObject(TypeInfoUtils.getTypeInfoFromTypeString("uniontype<bigint,long>"), "Hello"));
|
||||
}
|
||||
|
||||
|
||||
//////////////////
|
||||
// Helper methods
|
||||
//////////////////
|
||||
|
||||
public static Schema buildPrimitiveAvroSchema() {
|
||||
// Build a fake Avro record with all primitive types
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("test.record").namespace("any.data").fields();
|
||||
builder.name("int").type().intType().noDefault();
|
||||
builder.name("long").type().longType().longDefault(1L);
|
||||
builder.name("boolean").type().booleanType().booleanDefault(true);
|
||||
builder.name("float").type().floatType().floatDefault(0.0f);
|
||||
builder.name("double").type().doubleType().doubleDefault(0.0);
|
||||
builder.name("bytes").type().bytesType().noDefault();
|
||||
builder.name("string").type().stringType().stringDefault("default");
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static Schema buildAvroSchemaWithNull() {
|
||||
// Build a fake Avro record which contains null
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("test.record").namespace("any.data").fields();
|
||||
builder.name("string").type().stringType().stringDefault("default");
|
||||
builder.name("null").type().nullType().noDefault();
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static Schema buildAvroSchemaWithEmptyArray() {
|
||||
// Build a fake Avro record which contains empty array
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("test.record").namespace("any.data").fields();
|
||||
builder.name("string").type().stringType().stringDefault("default");
|
||||
builder.name("emptyArray").type().array().items().nullType().noDefault();
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static Schema buildAvroSchemaWithFixed() {
|
||||
// Build a fake Avro record which contains null
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("test.record").namespace("any.data").fields();
|
||||
builder.name("fixed").type().fixed("fixedField").size(6).fixedDefault("123456");
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static GenericData.Record buildPrimitiveAvroRecord(int i, long l, boolean b, float f, double d, ByteBuffer bytes, String string) {
|
||||
Schema schema = buildPrimitiveAvroSchema();
|
||||
GenericData.Record row = new GenericData.Record(schema);
|
||||
row.put("int", i);
|
||||
row.put("long", l);
|
||||
row.put("boolean", b);
|
||||
row.put("float", f);
|
||||
row.put("double", d);
|
||||
row.put("bytes", bytes);
|
||||
row.put("string", string);
|
||||
return row;
|
||||
}
|
||||
|
||||
public static TypeInfo buildPrimitiveOrcSchema() {
|
||||
return TypeInfoFactory.getStructTypeInfo(Arrays.asList("int", "long", "boolean", "float", "double", "bytes", "string"),
|
||||
Arrays.asList(
|
||||
TypeInfoCreator.createInt(),
|
||||
TypeInfoCreator.createLong(),
|
||||
TypeInfoCreator.createBoolean(),
|
||||
TypeInfoCreator.createFloat(),
|
||||
TypeInfoCreator.createDouble(),
|
||||
TypeInfoCreator.createBinary(),
|
||||
TypeInfoCreator.createString()));
|
||||
}
|
||||
|
||||
public static Schema buildComplexAvroSchema() {
|
||||
// Build a fake Avro record with nested types
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("complex.record").namespace("any.data").fields();
|
||||
builder.name("myInt").type().unionOf().nullType().and().intType().endUnion().nullDefault();
|
||||
builder.name("myMap").type().map().values().doubleType().noDefault();
|
||||
builder.name("myEnum").type().enumeration("myEnum").symbols("ABC", "DEF", "XYZ").enumDefault("ABC");
|
||||
builder.name("myLongOrFloat").type().unionOf().longType().and().floatType().endUnion().noDefault();
|
||||
builder.name("myIntList").type().array().items().intType().noDefault();
|
||||
builder.name("myDecimal").type().bytesBuilder()
|
||||
.prop("logicalType", "decimal")
|
||||
.prop("precision", "10")
|
||||
.prop("scale", "2")
|
||||
.endBytes().noDefault();
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static GenericData.Record buildComplexAvroRecord(Integer i, Map<String, Double> m, String e, Object unionVal, List<Integer> intArray, ByteBuffer decimal) {
|
||||
Schema schema = buildComplexAvroSchema();
|
||||
Schema enumSchema = schema.getField("myEnum").schema();
|
||||
GenericData.Record row = new GenericData.Record(schema);
|
||||
row.put("myInt", i);
|
||||
row.put("myMap", m);
|
||||
row.put("myEnum", new GenericData.EnumSymbol(enumSchema, e));
|
||||
row.put("myLongOrFloat", unionVal);
|
||||
row.put("myIntList", intArray);
|
||||
row.put("myDecimal", decimal);
|
||||
return row;
|
||||
}
|
||||
|
||||
public static GenericData.Record buildAvroRecordWithNull(String string) {
|
||||
Schema schema = buildAvroSchemaWithNull();
|
||||
GenericData.Record row = new GenericData.Record(schema);
|
||||
row.put("string", string);
|
||||
row.put("null", null);
|
||||
return row;
|
||||
}
|
||||
|
||||
public static GenericData.Record buildAvroRecordWithEmptyArray(String string) {
|
||||
Schema schema = buildAvroSchemaWithEmptyArray();
|
||||
GenericData.Record row = new GenericData.Record(schema);
|
||||
row.put("string", string);
|
||||
row.put("emptyArray", Collections.emptyList());
|
||||
return row;
|
||||
}
|
||||
|
||||
public static GenericData.Record buildAvroRecordWithFixed(String string) {
|
||||
Schema schema = buildAvroSchemaWithFixed();
|
||||
GenericData.Record row = new GenericData.Record(schema);
|
||||
row.put("fixed", new GenericData.Fixed(schema, string.getBytes(StandardCharsets.UTF_8)));
|
||||
return row;
|
||||
}
|
||||
|
||||
public static TypeInfo buildComplexOrcSchema() {
|
||||
return TypeInfoUtils.getTypeInfoFromTypeString("struct<myInt:int,myMap:map<string,double>,myEnum:string,myLongOrFloat:uniontype<int>,myIntList:array<int>,myDecimal:decimal(10,2)>");
|
||||
}
|
||||
|
||||
public static Schema buildNestedComplexAvroSchema() {
|
||||
// Build a fake Avro record with nested complex types
|
||||
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("nested.complex.record").namespace("any.data").fields();
|
||||
builder.name("myMapOfArray").type().map().values().array().items().doubleType().noDefault();
|
||||
builder.name("myArrayOfMap").type().array().items().map().values().stringType().noDefault();
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static GenericData.Record buildNestedComplexAvroRecord(Map<String, List<Double>> m, List<Map<String, String>> a) {
|
||||
Schema schema = buildNestedComplexAvroSchema();
|
||||
GenericData.Record row = new GenericData.Record(schema);
|
||||
row.put("myMapOfArray", m);
|
||||
row.put("myArrayOfMap", a);
|
||||
return row;
|
||||
}
|
||||
|
||||
public static TypeInfo buildNestedComplexOrcSchema() {
|
||||
return TypeInfoUtils.getTypeInfoFromTypeString("struct<myMapOfArray:map<string,array<double>>,myArrayOfMap:array<map<string,string>>>");
|
||||
}
|
||||
|
||||
private static class TypeInfoCreator {
|
||||
static TypeInfo createInt() {
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("int");
|
||||
}
|
||||
|
||||
static TypeInfo createLong() {
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("bigint");
|
||||
}
|
||||
|
||||
static TypeInfo createBoolean() {
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("boolean");
|
||||
}
|
||||
|
||||
static TypeInfo createFloat() {
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("float");
|
||||
}
|
||||
|
||||
static TypeInfo createDouble() {
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("double");
|
||||
}
|
||||
|
||||
static TypeInfo createBinary() {
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("binary");
|
||||
}
|
||||
|
||||
static TypeInfo createString() {
|
||||
return TypeInfoFactory.getPrimitiveTypeInfo("string");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
{
|
||||
"namespace" : "org.apache.nifi",
|
||||
"name" : "outer_record",
|
||||
"type" : "record",
|
||||
"fields" : [ {
|
||||
"name" : "records",
|
||||
"type" : {
|
||||
"type" : "array",
|
||||
"items" : {
|
||||
"type" : "record",
|
||||
"name" : "inner_record",
|
||||
"fields" : [ {
|
||||
"name" : "name",
|
||||
"type" : "string"
|
||||
}, {
|
||||
"name" : "age",
|
||||
"type" : "int"
|
||||
} ]
|
||||
}
|
||||
}
|
||||
} ]
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hive</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>kerberos</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authorization</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,22 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hive</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,30 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hive</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.server2.authentication</name>
|
||||
<value>KERBEROS</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>kerberos</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,22 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>file:///</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,10 +0,0 @@
|
|||
[libdefaults]
|
||||
default_realm = EXAMPLE.COM
|
||||
dns_lookup_kdc = false
|
||||
dns_lookup_realm = false
|
||||
|
||||
[realms]
|
||||
EXAMPLE.COM = {
|
||||
kdc = kerberos.example.com
|
||||
admin_server = kerberos.example.com
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
{"namespace": "example.avro",
|
||||
"type": "record",
|
||||
"name": "User",
|
||||
"fields": [
|
||||
{"name": "name", "type": "string"},
|
||||
{"name": "favorite_number", "type": ["int", "null"]},
|
||||
{"name": "favorite_color", "type": ["string", "null"]},
|
||||
{"name": "scale", "type": ["double", "null"]}
|
||||
]
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.dbcp.hive;
|
||||
|
||||
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
|
||||
/**
|
||||
* Definition for Hive 1.1 Database Connection Pooling Service.
|
||||
*
|
||||
*/
|
||||
@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"})
|
||||
@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 1.1.x. Connections can be asked from pool and returned after usage.")
|
||||
public interface Hive_1_1DBCPService extends HiveDBCPService {
|
||||
public String getConnectionURL();
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-bundle</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-hive_1_1-nar</artifactId>
|
||||
<packaging>nar</packaging>
|
||||
<properties>
|
||||
<maven.javadoc.skip>true</maven.javadoc.skip>
|
||||
<source.skip>true</source.skip>
|
||||
<!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
|
||||
<hadoop.version>${hive11.hadoop.version}</hadoop.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-services-api-nar</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive_1_1-processors</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,329 +0,0 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
APACHE NIFI SUBCOMPONENTS:
|
||||
|
||||
The Apache NiFi project contains subcomponents with separate copyright
|
||||
notices and license terms. Your use of the source code for the these
|
||||
subcomponents is subject to the terms and conditions of the following
|
||||
licenses.
|
||||
The binary distribution of this product bundles 'Bouncy Castle JDK 1.5'
|
||||
under an MIT style license.
|
||||
|
||||
Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
The binary distribution of this product includes modules from Groovy which bundles ANTLR
|
||||
SOFTWARE RIGHTS
|
||||
|
||||
ANTLR 1989-2006 Developed by Terence Parr
|
||||
Partially supported by University of San Francisco & jGuru.com
|
||||
|
||||
We reserve no legal rights to the ANTLR--it is fully in the
|
||||
public domain. An individual or company may do whatever
|
||||
they wish with source code distributed with ANTLR or the
|
||||
code generated by ANTLR, including the incorporation of
|
||||
ANTLR, or its output, into commerical software.
|
||||
|
||||
We encourage users to develop software with ANTLR. However,
|
||||
we do ask that credit is given to us for developing
|
||||
ANTLR. By "credit", we mean that if you use ANTLR or
|
||||
incorporate any source code into one of your programs
|
||||
(commercial product, research project, or otherwise) that
|
||||
you acknowledge this fact somewhere in the documentation,
|
||||
research report, etc... If you like ANTLR and have
|
||||
developed a nice tool with the output, please mention that
|
||||
you developed it using ANTLR. In addition, we ask that the
|
||||
headers remain intact in our source code. As long as these
|
||||
guidelines are kept, we expect to continue enhancing this
|
||||
system and expect to make other tools available as they are
|
||||
completed.
|
||||
|
||||
The primary ANTLR guy:
|
||||
|
||||
Terence Parr
|
||||
parrt@cs.usfca.edu
|
||||
parrt@antlr.org
|
||||
|
||||
The binary distribution of this product includes modules from Groovy which bundles ASM
|
||||
/***
|
||||
* http://asm.objectweb.org/
|
||||
*
|
||||
* ASM: a very small and fast Java bytecode manipulation framework
|
||||
* Copyright (c) 2000-2005 INRIA, France Telecom
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
The binary distribution of this product includes modules from Groovy which bundles source from JSR-223
|
||||
The following notice applies to the files:
|
||||
|
||||
src/main/org/codehaus/groovy/jsr223/GroovyCompiledScript.java
|
||||
src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineFactory.java
|
||||
src/main/org/codehaus/groovy/jsr223/GroovyScriptEngineImpl.java
|
||||
|
||||
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met: Redistributions of source code
|
||||
* must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
* conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution. Neither the name of the Sun Microsystems nor the names of
|
||||
* is contributors may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
||||
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
|
@ -1,288 +0,0 @@
|
|||
nifi-hive_1_1-nar
|
||||
Copyright 2014-2023 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
This includes derived works from the Apache Storm (ASLv2 licensed) project (https://github.com/apache/storm):
|
||||
Copyright 2015 The Apache Software Foundation
|
||||
The derived work is adapted from
|
||||
org/apache/storm/hive/common/HiveWriter.java
|
||||
org/apache/storm/hive/common/HiveOptions.java
|
||||
and can be found in the org.apache.nifi.util.hive package
|
||||
|
||||
===========================================
|
||||
Apache Software License v2
|
||||
===========================================
|
||||
|
||||
The following binary components are provided under the Apache Software License v2
|
||||
|
||||
(ASLv2) Apache Ant
|
||||
The following NOTICE information applies:
|
||||
Apache Ant
|
||||
Copyright 1999-2016 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Commons Codec
|
||||
The following NOTICE information applies:
|
||||
Apache Commons Codec
|
||||
Copyright 2002-2014 The Apache Software Foundation
|
||||
|
||||
src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
|
||||
contains test data from http://aspell.net/test/orig/batch0.tab.
|
||||
Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
|
||||
|
||||
===============================================================================
|
||||
|
||||
The content of package org.apache.commons.codec.language.bm has been translated
|
||||
from the original php source code available at http://stevemorse.org/phoneticinfo.htm
|
||||
with permission from the original authors.
|
||||
Original source copyright:
|
||||
Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
|
||||
|
||||
(ASLv2) Apache Commons DBCP
|
||||
The following NOTICE information applies:
|
||||
Apache Commons DBCP
|
||||
Copyright 2001-2015 The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache HttpComponents
|
||||
The following NOTICE information applies:
|
||||
Apache HttpComponents Client
|
||||
Copyright 1999-2016 The Apache Software Foundation
|
||||
Apache HttpComponents Core - HttpCore
|
||||
Copyright 2006-2009 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Commons Pool
|
||||
The following NOTICE information applies:
|
||||
Apache Commons Pool
|
||||
Copyright 1999-2009 The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache Commons IO
|
||||
The following NOTICE information applies:
|
||||
Apache Commons IO
|
||||
Copyright 2002-2016 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Hive
|
||||
The following NOTICE information applies:
|
||||
Apache Hive
|
||||
Copyright 2008-2015 The Apache Software Foundation
|
||||
|
||||
This product includes software developed by The Apache Software
|
||||
Foundation (http://www.apache.org/).
|
||||
|
||||
This product includes Jersey (https://jersey.java.net/)
|
||||
Copyright (c) 2010-2014 Oracle and/or its affiliates.
|
||||
|
||||
This project includes software copyrighted by Microsoft Corporation and
|
||||
licensed under the Apache License, Version 2.0.
|
||||
|
||||
This project includes software copyrighted by Dell SecureWorks and
|
||||
licensed under the Apache License, Version 2.0.
|
||||
|
||||
(ASLv2) Jackson JSON processor
|
||||
The following NOTICE information applies:
|
||||
# Jackson JSON processor
|
||||
|
||||
Jackson is a high-performance, Free/Open Source JSON processing library.
|
||||
It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
|
||||
been in development since 2007.
|
||||
It is currently developed by a community of developers, as well as supported
|
||||
commercially by FasterXML.com.
|
||||
|
||||
## Licensing
|
||||
|
||||
Jackson core and extension components may licensed under different licenses.
|
||||
To find the details that apply to this artifact see the accompanying LICENSE file.
|
||||
For more information, including possible other licensing options, contact
|
||||
FasterXML.com (http://fasterxml.com).
|
||||
|
||||
## Credits
|
||||
|
||||
A list of contributors may be found from CREDITS file, which is included
|
||||
in some artifacts (usually source distributions); but is always available
|
||||
from the source code management (SCM) system project uses.
|
||||
|
||||
(ASLv2) BoneCP
|
||||
The following NOTICE information applies:
|
||||
BoneCP
|
||||
Copyright 2010 Wallace Wadge
|
||||
|
||||
(ASLv2) Apache Hadoop
|
||||
The following NOTICE information applies:
|
||||
The binary distribution of this product bundles binaries of
|
||||
org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the
|
||||
following notices:
|
||||
* Copyright 2011 Dain Sundstrom <dain@iq80.com>
|
||||
* Copyright 2011 FuseSource Corp. http://fusesource.com
|
||||
|
||||
The binary distribution of this product bundles binaries of
|
||||
org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni),
|
||||
which has the following notices:
|
||||
* This product includes software developed by FuseSource Corp.
|
||||
http://fusesource.com
|
||||
* This product includes software developed at
|
||||
Progress Software Corporation and/or its subsidiaries or affiliates.
|
||||
* This product includes software developed by IBM Corporation and others.
|
||||
|
||||
(ASLv2) Apache Commons Lang
|
||||
The following NOTICE information applies:
|
||||
Apache Commons Lang
|
||||
Copyright 2001-2015 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Curator
|
||||
The following NOTICE information applies:
|
||||
Apache Curator
|
||||
Copyright 2013-2014 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Derby
|
||||
The following NOTICE information applies:
|
||||
Apache Derby
|
||||
Copyright 2004-2014 Apache, Apache DB, Apache Derby, Apache Torque, Apache JDO, Apache DDLUtils,
|
||||
the Derby hat logo, the Apache JDO logo, and the Apache feather logo are trademarks of The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache DS
|
||||
The following NOTICE information applies:
|
||||
ApacheDS
|
||||
Copyright 2003-2015 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Apache Geronimo
|
||||
The following NOTICE information applies:
|
||||
Apache Geronimo
|
||||
Copyright 2003-2008 The Apache Software Foundation
|
||||
|
||||
(ASLv2) HTrace Core
|
||||
The following NOTICE information applies:
|
||||
In addition, this product includes software dependencies. See
|
||||
the accompanying LICENSE.txt for a listing of dependencies
|
||||
that are NOT Apache licensed (with pointers to their licensing)
|
||||
|
||||
Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin
|
||||
is a distributed tracing system that is Apache 2.0 Licensed.
|
||||
Copyright 2012 Twitter, Inc.
|
||||
|
||||
(ASLv2) Jettison
|
||||
The following NOTICE information applies:
|
||||
Copyright 2006 Envoi Solutions LLC
|
||||
|
||||
(ASLv2) Jetty
|
||||
The following NOTICE information applies:
|
||||
Jetty Web Container
|
||||
Copyright 1995-2019 Mort Bay Consulting Pty Ltd.
|
||||
|
||||
(ASLv2) Apache log4j
|
||||
The following NOTICE information applies:
|
||||
Apache log4j
|
||||
Copyright 2007 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Parquet MR
|
||||
The following NOTICE information applies:
|
||||
Parquet MR
|
||||
Copyright 2012 Twitter, Inc.
|
||||
|
||||
This project includes code from https://github.com/lemire/JavaFastPFOR
|
||||
parquet-column/src/main/java/parquet/column/values/bitpacking/LemireBitPacking.java
|
||||
Apache License Version 2.0 http://www.apache.org/licenses/.
|
||||
(c) Daniel Lemire, http://lemire.me/en/
|
||||
|
||||
(ASLv2) Apache Thrift
|
||||
The following NOTICE information applies:
|
||||
Apache Thrift
|
||||
Copyright 2006-2010 The Apache Software Foundation.
|
||||
|
||||
(ASLv2) Apache Twill
|
||||
The following NOTICE information applies:
|
||||
Apache Twill
|
||||
Copyright 2013-2016 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Dropwizard Metrics
|
||||
The following NOTICE information applies:
|
||||
Metrics
|
||||
Copyright 2010-2013 Coda Hale and Yammer, Inc.
|
||||
|
||||
This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
|
||||
LongAdder), which was released with the following comments:
|
||||
|
||||
Written by Doug Lea with assistance from members of JCP JSR-166
|
||||
Expert Group and released to the public domain, as explained at
|
||||
http://creativecommons.org/publicdomain/zero/1.0/
|
||||
|
||||
(ASLv2) Joda Time
|
||||
The following NOTICE information applies:
|
||||
This product includes software developed by
|
||||
Joda.org (http://www.joda.org/).
|
||||
|
||||
(ASLv2) The Netty Project
|
||||
The following NOTICE information applies:
|
||||
The Netty Project
|
||||
Copyright 2011 The Netty Project
|
||||
|
||||
(ASLv2) Apache Tomcat
|
||||
The following NOTICE information applies:
|
||||
Apache Tomcat
|
||||
Copyright 2007 The Apache Software Foundation
|
||||
|
||||
Java Management Extensions (JMX) support is provided by
|
||||
the MX4J package, which is open source software. The
|
||||
original software and related information is available
|
||||
at http://mx4j.sourceforge.net.
|
||||
|
||||
Java compilation software for JSP pages is provided by Eclipse,
|
||||
which is open source software. The orginal software and
|
||||
related infomation is available at
|
||||
http://www.eclipse.org.
|
||||
|
||||
(ASLv2) Apache ZooKeeper
|
||||
The following NOTICE information applies:
|
||||
Apache ZooKeeper
|
||||
Copyright 2009-2012 The Apache Software Foundation
|
||||
|
||||
(ASLv2) Google GSON
|
||||
The following NOTICE information applies:
|
||||
Copyright 2008 Google Inc.
|
||||
|
||||
(ASLv2) Groovy (org.codehaus.groovy:groovy-all:jar:2.1.6 - http://www.groovy-lang.org)
|
||||
The following NOTICE information applies:
|
||||
Groovy Language
|
||||
Copyright 2003-2012 The respective authors and developers
|
||||
Developers and Contributors are listed in the project POM file
|
||||
and Gradle build file
|
||||
|
||||
This product includes software developed by
|
||||
The Groovy community (http://groovy.codehaus.org/).
|
||||
|
||||
(ASLv2) JPam
|
||||
The following NOTICE information applies:
|
||||
Copyright 2003-2006 Greg Luck
|
||||
|
||||
************************
|
||||
Common Development and Distribution License 1.1
|
||||
************************
|
||||
|
||||
The following binary components are provided under the Common Development and Distribution License 1.1. See project link for details.
|
||||
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.9 - https://jersey.java.net)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:jar:1.9 - https://jersey.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/)
|
||||
(CDDL 1.1) (GPL2 w/ CPE) JavaMail API (compat) (javax.mail:mail:jar:1.4.7 - http://kenai.com/projects/javamail/mail)
|
||||
|
||||
|
||||
************************
|
||||
Common Development and Distribution License 1.0
|
||||
************************
|
||||
|
||||
The following binary components are provided under the Common Development and Distribution License 1.0. See project link for details.
|
||||
|
||||
(CDDL 1.0) JavaServlet(TM) Specification (javax.servlet:servlet-api:jar:2.5 - no url available)
|
||||
(CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided)
|
||||
(CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp)
|
||||
(CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net)
|
||||
|
||||
*****************
|
||||
Public Domain
|
||||
*****************
|
||||
|
||||
The following binary components are provided to the 'Public Domain'. See project link for details.
|
||||
|
||||
(Public Domain) AOP Alliance 1.0 (http://aopalliance.sourceforge.net/)
|
|
@ -1,225 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-bundle</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-hive_1_1-processors</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<!-- Need to override hadoop.version here, for Hive and hadoop-client transitive dependencies -->
|
||||
<hadoop.version>${hive11.hadoop.version}</hadoop.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-api</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-put-pattern</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-security-kerberos</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-dbcp-service-api</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hive-services-api</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-kerberos-credentials-service-api</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<!-- Override groovy-all:2.1.6 from Hive -->
|
||||
<dependency>
|
||||
<groupId>org.codehaus.groovy</groupId>
|
||||
<artifactId>groovy-all</artifactId>
|
||||
<version>2.4.21</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive11.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>apache-log4j-extras</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive.hcatalog</groupId>
|
||||
<artifactId>hive-hcatalog-streaming</artifactId>
|
||||
<version>${hive11.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>apache-log4j-extras</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hive.hcatalog</groupId>
|
||||
<artifactId>hive-hcatalog-core</artifactId>
|
||||
<version>${hive11.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.google.code.findbugs</groupId>
|
||||
<artifactId>jsr305</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>apache-log4j-extras</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hadoop-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-hadoop-record-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-record-serialization-service-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-record</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.stephenc.findbugs</groupId>
|
||||
<artifactId>findbugs-annotations</artifactId>
|
||||
<version>1.3.9-1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-text</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-dbcp2</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>log4j-over-slf4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>jcl-over-slf4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-mock</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-mock-record-utils</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,453 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.dbcp.hive;
|
||||
|
||||
import org.apache.commons.dbcp2.BasicDataSource;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hive.jdbc.HiveDriver;
|
||||
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnDisabled;
|
||||
import org.apache.nifi.annotation.lifecycle.OnEnabled;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.components.PropertyValue;
|
||||
import org.apache.nifi.components.ValidationContext;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.components.resource.ResourceCardinality;
|
||||
import org.apache.nifi.components.resource.ResourceType;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.controller.ConfigurationContext;
|
||||
import org.apache.nifi.controller.ControllerServiceInitializationContext;
|
||||
import org.apache.nifi.dbcp.DBCPValidator;
|
||||
import org.apache.nifi.expression.AttributeExpression;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.hadoop.SecurityUtil;
|
||||
import org.apache.nifi.kerberos.KerberosCredentialsService;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.security.krb.KerberosKeytabUser;
|
||||
import org.apache.nifi.security.krb.KerberosLoginException;
|
||||
import org.apache.nifi.security.krb.KerberosPasswordUser;
|
||||
import org.apache.nifi.security.krb.KerberosUser;
|
||||
import org.apache.nifi.util.hive.AuthenticationFailedException;
|
||||
import org.apache.nifi.util.hive.HiveConfigurator;
|
||||
import org.apache.nifi.util.hive.ValidationResources;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.UndeclaredThrowableException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* Implementation for Database Connection Pooling Service used for Apache Hive 1.1
|
||||
* connections. Apache DBCP is used for connection pooling functionality.
|
||||
*/
|
||||
@RequiresInstanceClassLoading
|
||||
@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"})
|
||||
@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 1.1.x. Connections can be asked from pool and returned after usage.")
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.dbcp.hive.Hive3ConnectionPool")
|
||||
public class Hive_1_1ConnectionPool extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||
|
||||
private static final String DEFAULT_MAX_CONN_LIFETIME = "-1";
|
||||
|
||||
public static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder()
|
||||
.name("hive-db-connect-url")
|
||||
.displayName("Database Connection URL")
|
||||
.description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters."
|
||||
+ " The exact syntax of a database connection URL is specified by the Hive documentation. For example, the server principal is often included "
|
||||
+ "as a connection parameter when connecting to a secure Hive server.")
|
||||
.defaultValue(null)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder()
|
||||
.name("hive-config-resources")
|
||||
.displayName("Hive Configuration Resources")
|
||||
.description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop "
|
||||
+ "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication "
|
||||
+ "with Kerberos e.g., the appropriate properties must be set in the configuration files. Please see the Hive documentation for more details.")
|
||||
.required(false)
|
||||
.identifiesExternalResource(ResourceCardinality.MULTIPLE, ResourceType.FILE)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder()
|
||||
.name("hive-db-user")
|
||||
.displayName("Database User")
|
||||
.description("Database user name")
|
||||
.defaultValue(null)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor DB_PASSWORD = new PropertyDescriptor.Builder()
|
||||
.name("hive-db-password")
|
||||
.displayName("Password")
|
||||
.description("The password for the database user")
|
||||
.defaultValue(null)
|
||||
.required(false)
|
||||
.sensitive(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_WAIT_TIME = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-wait-time")
|
||||
.displayName("Max Wait Time")
|
||||
.description("The maximum amount of time that the pool will wait (when there are no available connections) "
|
||||
+ " for a connection to be returned before failing, or -1 to wait indefinitely. ")
|
||||
.defaultValue("500 millis")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.TIME_PERIOD_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_TOTAL_CONNECTIONS = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-total-connections")
|
||||
.displayName("Max Total Connections")
|
||||
.description("The maximum number of active connections that can be allocated from this pool at the same time, "
|
||||
+ "or negative for no limit.")
|
||||
.defaultValue("8")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_CONN_LIFETIME = new PropertyDescriptor.Builder()
|
||||
.displayName("Max Connection Lifetime")
|
||||
.name("hive-max-conn-lifetime")
|
||||
.description("The maximum lifetime in milliseconds of a connection. After this time is exceeded the " +
|
||||
"connection pool will invalidate the connection. A value of zero or -1 " +
|
||||
"means the connection has an infinite lifetime.")
|
||||
.defaultValue(DEFAULT_MAX_CONN_LIFETIME)
|
||||
.required(true)
|
||||
.addValidator(DBCPValidator.CUSTOM_TIME_PERIOD_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor VALIDATION_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("Validation-query")
|
||||
.displayName("Validation query")
|
||||
.description("Validation query used to validate connections before returning them. "
|
||||
+ "When a borrowed connection is invalid, it gets dropped and a new valid connection will be returned. "
|
||||
+ "NOTE: Using validation may have a performance penalty.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder()
|
||||
.name("kerberos-credentials-service")
|
||||
.displayName("Kerberos Credentials Service")
|
||||
.description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos")
|
||||
.identifiesControllerService(KerberosCredentialsService.class)
|
||||
.required(false)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor KERBEROS_PRINCIPAL = new PropertyDescriptor.Builder()
|
||||
.name("kerberos-principal")
|
||||
.displayName("Kerberos Principal")
|
||||
.description("The principal to use when specifying the principal and password directly in the processor for authenticating via Kerberos.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.addValidator(StandardValidators.createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING))
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor KERBEROS_PASSWORD = new PropertyDescriptor.Builder()
|
||||
.name("kerberos-password")
|
||||
.displayName("Kerberos Password")
|
||||
.description("The password to use when specifying the principal and password directly in the processor for authenticating via Kerberos.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.sensitive(true)
|
||||
.build();
|
||||
|
||||
|
||||
private List<PropertyDescriptor> properties;
|
||||
|
||||
private String connectionUrl = "unknown";
|
||||
|
||||
// Holder of cached Configuration information so validation does not reload the same config over and over
|
||||
private final AtomicReference<ValidationResources> validationResourceHolder = new AtomicReference<>();
|
||||
|
||||
private volatile BasicDataSource dataSource;
|
||||
|
||||
private volatile HiveConfigurator hiveConfigurator = new HiveConfigurator();
|
||||
private volatile UserGroupInformation ugi;
|
||||
private final AtomicReference<KerberosUser> kerberosUserReference = new AtomicReference<>();
|
||||
|
||||
@Override
|
||||
protected void init(final ControllerServiceInitializationContext context) {
|
||||
List<PropertyDescriptor> props = new ArrayList<>();
|
||||
props.add(DATABASE_URL);
|
||||
props.add(HIVE_CONFIGURATION_RESOURCES);
|
||||
props.add(DB_USER);
|
||||
props.add(DB_PASSWORD);
|
||||
props.add(MAX_WAIT_TIME);
|
||||
props.add(MAX_TOTAL_CONNECTIONS);
|
||||
props.add(MAX_CONN_LIFETIME);
|
||||
props.add(VALIDATION_QUERY);
|
||||
props.add(KERBEROS_CREDENTIALS_SERVICE);
|
||||
props.add(KERBEROS_PRINCIPAL);
|
||||
props.add(KERBEROS_PASSWORD);
|
||||
|
||||
properties = props;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
|
||||
boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet();
|
||||
|
||||
final List<ValidationResult> problems = new ArrayList<>();
|
||||
|
||||
if (confFileProvided) {
|
||||
final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
|
||||
final String explicitPrincipal = validationContext.getProperty(KERBEROS_PRINCIPAL).evaluateAttributeExpressions().getValue();
|
||||
final String explicitPassword = validationContext.getProperty(KERBEROS_PASSWORD).getValue();
|
||||
|
||||
final String resolvedPrincipal;
|
||||
final String resolvedKeytab;
|
||||
if (credentialsService != null) {
|
||||
resolvedPrincipal = credentialsService.getPrincipal();
|
||||
resolvedKeytab = credentialsService.getKeytab();
|
||||
} else {
|
||||
resolvedPrincipal = explicitPrincipal;
|
||||
resolvedKeytab = null;
|
||||
}
|
||||
|
||||
final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
|
||||
problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, explicitPassword, validationResourceHolder, getLogger()));
|
||||
|
||||
if (credentialsService != null && (explicitPrincipal != null || explicitPassword != null)) {
|
||||
problems.add(new ValidationResult.Builder()
|
||||
.subject(KERBEROS_CREDENTIALS_SERVICE.getDisplayName())
|
||||
.valid(false)
|
||||
.explanation("kerberos principal/password and kerberos credential service cannot be configured at the same time")
|
||||
.build());
|
||||
}
|
||||
}
|
||||
|
||||
return problems;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures connection pool by creating an instance of the
|
||||
* {@link BasicDataSource} based on configuration provided with
|
||||
* {@link ConfigurationContext}.
|
||||
* <p>
|
||||
* This operation makes no guarantees that the actual connection could be
|
||||
* made since the underlying system may still go off-line during normal
|
||||
* operation of the connection pool.
|
||||
* <p/>
|
||||
* As of Apache NiFi 1.5.0, due to changes made to
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this class invoking
|
||||
* {@link HiveConfigurator#authenticate(Configuration, String, String)}
|
||||
* to authenticate a principal with Kerberos, Hive controller services no longer use a separate thread to
|
||||
* relogin, and instead call {@link UserGroupInformation#checkTGTAndReloginFromKeytab()} from
|
||||
* {@link Hive_1_1ConnectionPool#getConnection()}. The relogin request is performed in a synchronized block to prevent
|
||||
* threads from requesting concurrent relogins. For more information, please read the documentation for
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
|
||||
* <p/>
|
||||
* In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
|
||||
* {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
|
||||
* controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions
|
||||
* with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
|
||||
* {@link UserGroupInformation} instance. One of these threads could leave the
|
||||
* {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
|
||||
* while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
|
||||
* authentication attempts that would leave the Hive controller service in an unrecoverable state.
|
||||
*
|
||||
* @see SecurityUtil#loginKerberos(Configuration, String, String)
|
||||
* @see HiveConfigurator#authenticate(Configuration, String, String)
|
||||
* @see HiveConfigurator#authenticate(Configuration, String, String, long)
|
||||
* @param context the configuration context
|
||||
* @throws InitializationException if unable to create a database connection
|
||||
*/
|
||||
@OnEnabled
|
||||
public void onConfigured(final ConfigurationContext context) throws InitializationException {
|
||||
|
||||
ComponentLog log = getLogger();
|
||||
|
||||
final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue();
|
||||
final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles);
|
||||
final String validationQuery = context.getProperty(VALIDATION_QUERY).evaluateAttributeExpressions().getValue();
|
||||
|
||||
// add any dynamic properties to the Hive configuration
|
||||
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
|
||||
final PropertyDescriptor descriptor = entry.getKey();
|
||||
if (descriptor.isDynamic()) {
|
||||
hiveConfig.set(descriptor.getName(), context.getProperty(descriptor).evaluateAttributeExpressions().getValue());
|
||||
}
|
||||
}
|
||||
|
||||
final String drv = HiveDriver.class.getName();
|
||||
if (SecurityUtil.isSecurityEnabled(hiveConfig)) {
|
||||
final String explicitPrincipal = context.getProperty(KERBEROS_PRINCIPAL).evaluateAttributeExpressions().getValue();
|
||||
final String explicitPassword = context.getProperty(KERBEROS_PASSWORD).getValue();
|
||||
final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
|
||||
|
||||
final String resolvedPrincipal;
|
||||
final String resolvedKeytab;
|
||||
if (credentialsService != null) {
|
||||
resolvedPrincipal = credentialsService.getPrincipal();
|
||||
resolvedKeytab = credentialsService.getKeytab();
|
||||
} else {
|
||||
resolvedPrincipal = explicitPrincipal;
|
||||
resolvedKeytab = null;
|
||||
}
|
||||
|
||||
if (resolvedKeytab != null) {
|
||||
kerberosUserReference.set(new KerberosKeytabUser(resolvedPrincipal, resolvedKeytab));
|
||||
log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab});
|
||||
} else if (explicitPassword != null) {
|
||||
kerberosUserReference.set(new KerberosPasswordUser(resolvedPrincipal, explicitPassword));
|
||||
log.info("Hive Security Enabled, logging in as principal {} with password", new Object[] {resolvedPrincipal});
|
||||
} else {
|
||||
throw new InitializationException("Unable to authenticate with Kerberos, no keytab or password was provided");
|
||||
}
|
||||
|
||||
try {
|
||||
ugi = hiveConfigurator.authenticate(hiveConfig, kerberosUserReference.get());
|
||||
} catch (AuthenticationFailedException ae) {
|
||||
log.error(ae.getMessage(), ae);
|
||||
throw new InitializationException(ae);
|
||||
}
|
||||
|
||||
getLogger().info("Successfully logged in as principal " + resolvedPrincipal);
|
||||
}
|
||||
|
||||
final String user = context.getProperty(DB_USER).evaluateAttributeExpressions().getValue();
|
||||
final String passw = context.getProperty(DB_PASSWORD).evaluateAttributeExpressions().getValue();
|
||||
final Long maxWaitMillis = context.getProperty(MAX_WAIT_TIME).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS);
|
||||
final Integer maxTotal = context.getProperty(MAX_TOTAL_CONNECTIONS).evaluateAttributeExpressions().asInteger();
|
||||
final long maxConnectionLifetimeMillis = extractMillisWithInfinite(context.getProperty(MAX_CONN_LIFETIME).evaluateAttributeExpressions());
|
||||
|
||||
dataSource = new BasicDataSource();
|
||||
dataSource.setDriverClassName(drv);
|
||||
|
||||
connectionUrl = context.getProperty(DATABASE_URL).evaluateAttributeExpressions().getValue();
|
||||
|
||||
dataSource.setMaxWaitMillis(maxWaitMillis);
|
||||
dataSource.setMaxTotal(maxTotal);
|
||||
dataSource.setMaxConnLifetimeMillis(maxConnectionLifetimeMillis);
|
||||
|
||||
if (validationQuery != null && !validationQuery.isEmpty()) {
|
||||
dataSource.setValidationQuery(validationQuery);
|
||||
dataSource.setTestOnBorrow(true);
|
||||
}
|
||||
|
||||
dataSource.setUrl(connectionUrl);
|
||||
dataSource.setUsername(user);
|
||||
dataSource.setPassword(passw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown pool, close all open connections.
|
||||
*/
|
||||
@OnDisabled
|
||||
public void shutdown() {
|
||||
try {
|
||||
if(dataSource != null) {
|
||||
dataSource.close();
|
||||
}
|
||||
} catch (final SQLException e) {
|
||||
throw new ProcessException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
if (ugi != null) {
|
||||
/*
|
||||
* Explicitly check the TGT and relogin if necessary with the KerberosUser instance. No synchronization
|
||||
* is necessary in the client code, since AbstractKerberosUser's checkTGTAndRelogin method is synchronized.
|
||||
*/
|
||||
getLogger().trace("getting UGI instance");
|
||||
if (kerberosUserReference.get() != null) {
|
||||
// if there's a KerberosUser associated with this UGI, check the TGT and relogin if it is close to expiring
|
||||
KerberosUser kerberosUser = kerberosUserReference.get();
|
||||
getLogger().debug("kerberosUser is " + kerberosUser);
|
||||
try {
|
||||
getLogger().debug("checking TGT on kerberosUser " + kerberosUser);
|
||||
kerberosUser.checkTGTAndRelogin();
|
||||
} catch (final KerberosLoginException e) {
|
||||
throw new ProcessException("Unable to relogin with kerberos credentials for " + kerberosUser.getPrincipal(), e);
|
||||
}
|
||||
} else {
|
||||
getLogger().debug("kerberosUser was null, will not refresh TGT with KerberosUser");
|
||||
// no synchronization is needed for UserGroupInformation.checkTGTAndReloginFromKeytab; UGI handles the synchronization internally
|
||||
ugi.checkTGTAndReloginFromKeytab();
|
||||
}
|
||||
try {
|
||||
return ugi.doAs((PrivilegedExceptionAction<Connection>) () -> dataSource.getConnection());
|
||||
} catch (UndeclaredThrowableException e) {
|
||||
Throwable cause = e.getCause();
|
||||
if (cause instanceof SQLException) {
|
||||
throw (SQLException) cause;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
getLogger().info("Simple Authentication");
|
||||
return dataSource.getConnection();
|
||||
}
|
||||
} catch (SQLException | IOException | InterruptedException e) {
|
||||
getLogger().error("Error getting Hive connection", e);
|
||||
throw new ProcessException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HiveConnectionPool[id=" + getIdentifier() + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return connectionUrl;
|
||||
}
|
||||
|
||||
private long extractMillisWithInfinite(PropertyValue prop) {
|
||||
if (prop.getValue() == null || DEFAULT_MAX_CONN_LIFETIME.equals(prop.getValue())) {
|
||||
return -1;
|
||||
} else {
|
||||
return prop.asTimePeriod(TimeUnit.MILLISECONDS);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,344 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.antlr.runtime.tree.CommonTree;
|
||||
import org.apache.hadoop.hive.ql.parse.ASTNode;
|
||||
import org.apache.hadoop.hive.ql.parse.ParseDriver;
|
||||
import org.apache.hadoop.hive.ql.parse.ParseException;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.AbstractSessionFactoryProcessor;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.stream.io.StreamUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.charset.Charset;
|
||||
import java.sql.Date;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLDataException;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Time;
|
||||
import java.sql.Timestamp;
|
||||
import java.sql.Types;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* An abstract base class for HiveQL processors to share common data, methods, etc.
|
||||
*/
|
||||
public abstract class AbstractHive_1_1QLProcessor extends AbstractSessionFactoryProcessor {
|
||||
|
||||
protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type");
|
||||
protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+");
|
||||
static String ATTR_INPUT_TABLES = "query.input.tables";
|
||||
static String ATTR_OUTPUT_TABLES = "query.output.tables";
|
||||
|
||||
|
||||
public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder()
|
||||
.name("Hive Database Connection Pooling Service")
|
||||
.description("The Hive Controller Service that is used to obtain connection(s) to the Hive database")
|
||||
.required(true)
|
||||
.identifiesControllerService(Hive_1_1DBCPService.class)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder()
|
||||
.name("hive-charset")
|
||||
.displayName("Character Set")
|
||||
.description("Specifies the character set of the record data.")
|
||||
.required(true)
|
||||
.defaultValue("UTF-8")
|
||||
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
|
||||
.build();
|
||||
|
||||
/**
|
||||
* Determines the HiveQL statement that should be executed for the given FlowFile
|
||||
*
|
||||
* @param session the session that can be used to access the given FlowFile
|
||||
* @param flowFile the FlowFile whose HiveQL statement should be executed
|
||||
* @return the HiveQL that is associated with the given FlowFile
|
||||
*/
|
||||
protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) {
|
||||
// Read the HiveQL from the FlowFile's content
|
||||
final byte[] buffer = new byte[(int) flowFile.getSize()];
|
||||
session.read(flowFile, new InputStreamCallback() {
|
||||
@Override
|
||||
public void process(final InputStream in) throws IOException {
|
||||
StreamUtils.fillBuffer(in, buffer);
|
||||
}
|
||||
});
|
||||
|
||||
// Create the PreparedStatement to use for this FlowFile.
|
||||
return new String(buffer, charset);
|
||||
}
|
||||
|
||||
private class ParameterHolder {
|
||||
String attributeName;
|
||||
int jdbcType;
|
||||
String value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes.
|
||||
*
|
||||
* @param stmt the statement to set the parameters on
|
||||
* @param attributes the attributes from which to derive parameter indices, values, and types
|
||||
* @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called
|
||||
*/
|
||||
protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map<String, String> attributes) throws SQLException {
|
||||
|
||||
Map<Integer, ParameterHolder> parmMap = new TreeMap<Integer, ParameterHolder>();
|
||||
|
||||
for (final Map.Entry<String, String> entry : attributes.entrySet()) {
|
||||
final String key = entry.getKey();
|
||||
final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key);
|
||||
if (matcher.matches()) {
|
||||
final int parameterIndex = Integer.parseInt(matcher.group(1));
|
||||
if (parameterIndex >= base && parameterIndex < base + paramCount) {
|
||||
final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches();
|
||||
if (!isNumeric) {
|
||||
throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType");
|
||||
}
|
||||
|
||||
final String valueAttrName = "hiveql.args." + parameterIndex + ".value";
|
||||
|
||||
ParameterHolder ph = new ParameterHolder();
|
||||
int realIndexLoc = parameterIndex - base +1;
|
||||
|
||||
ph.jdbcType = Integer.parseInt(entry.getValue());
|
||||
ph.value = attributes.get(valueAttrName);
|
||||
ph.attributeName = valueAttrName;
|
||||
|
||||
parmMap.put(realIndexLoc, ph);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Now that's we've retrieved the correct number of parameters and it's sorted, let's set them.
|
||||
for (final Map.Entry<Integer, ParameterHolder> entry : parmMap.entrySet()) {
|
||||
final Integer index = entry.getKey();
|
||||
final ParameterHolder ph = entry.getValue();
|
||||
|
||||
try {
|
||||
setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType);
|
||||
} catch (final NumberFormatException nfe) {
|
||||
throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe);
|
||||
}
|
||||
}
|
||||
return base + paramCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the
|
||||
* provided PreparedStatement
|
||||
*
|
||||
* @param stmt the PreparedStatement to set the parameter on
|
||||
* @param attrName the name of the attribute that the parameter is coming from - for logging purposes
|
||||
* @param parameterIndex the index of the HiveQL parameter to set
|
||||
* @param parameterValue the value of the HiveQL parameter to set
|
||||
* @param jdbcType the JDBC Type of the HiveQL parameter to set
|
||||
* @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter
|
||||
*/
|
||||
protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException {
|
||||
if (parameterValue == null) {
|
||||
stmt.setNull(parameterIndex, jdbcType);
|
||||
} else {
|
||||
try {
|
||||
switch (jdbcType) {
|
||||
case Types.BIT:
|
||||
case Types.BOOLEAN:
|
||||
stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue));
|
||||
break;
|
||||
case Types.TINYINT:
|
||||
stmt.setByte(parameterIndex, Byte.parseByte(parameterValue));
|
||||
break;
|
||||
case Types.SMALLINT:
|
||||
stmt.setShort(parameterIndex, Short.parseShort(parameterValue));
|
||||
break;
|
||||
case Types.INTEGER:
|
||||
stmt.setInt(parameterIndex, Integer.parseInt(parameterValue));
|
||||
break;
|
||||
case Types.BIGINT:
|
||||
stmt.setLong(parameterIndex, Long.parseLong(parameterValue));
|
||||
break;
|
||||
case Types.REAL:
|
||||
stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue));
|
||||
break;
|
||||
case Types.FLOAT:
|
||||
case Types.DOUBLE:
|
||||
stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue));
|
||||
break;
|
||||
case Types.DECIMAL:
|
||||
case Types.NUMERIC:
|
||||
stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue));
|
||||
break;
|
||||
case Types.DATE:
|
||||
stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue)));
|
||||
break;
|
||||
case Types.TIME:
|
||||
stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue)));
|
||||
break;
|
||||
case Types.TIMESTAMP:
|
||||
stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue)));
|
||||
break;
|
||||
case Types.CHAR:
|
||||
case Types.VARCHAR:
|
||||
case Types.LONGNVARCHAR:
|
||||
case Types.LONGVARCHAR:
|
||||
stmt.setString(parameterIndex, parameterValue);
|
||||
break;
|
||||
default:
|
||||
stmt.setObject(parameterIndex, parameterValue, jdbcType);
|
||||
break;
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
// Log which attribute/parameter had an error, then rethrow to be handled at the top level
|
||||
getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected static class TableName {
|
||||
private final String database;
|
||||
private final String table;
|
||||
private final boolean input;
|
||||
|
||||
TableName(String database, String table, boolean input) {
|
||||
this.database = database;
|
||||
this.table = table;
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
public String getDatabase() {
|
||||
return database;
|
||||
}
|
||||
|
||||
public String getTable() {
|
||||
return table;
|
||||
}
|
||||
|
||||
public boolean isInput() {
|
||||
return input;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return database == null || database.isEmpty() ? table : database + '.' + table;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
TableName tableName = (TableName) o;
|
||||
|
||||
if (input != tableName.input) return false;
|
||||
if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false;
|
||||
return table.equals(tableName.table);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = database != null ? database.hashCode() : 0;
|
||||
result = 31 * result + table.hashCode();
|
||||
result = 31 * result + (input ? 1 : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
protected Set<TableName> findTableNames(final String query) {
|
||||
final ASTNode node;
|
||||
try {
|
||||
node = new ParseDriver().parse(normalize(query));
|
||||
} catch (ParseException e) {
|
||||
// If failed to parse the query, just log a message, but continue.
|
||||
getLogger().debug("Failed to parse query: {} due to {}", new Object[]{query, e}, e);
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
final HashSet<TableName> tableNames = new HashSet<>();
|
||||
findTableNames(node, tableNames);
|
||||
return tableNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize query.
|
||||
* Hive resolves prepared statement parameters before executing a query,
|
||||
* see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail.
|
||||
* HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one.
|
||||
* In this normalize method, '?' is replaced to 'x' to avoid that.
|
||||
*/
|
||||
private String normalize(String query) {
|
||||
return query.replace('?', 'x');
|
||||
}
|
||||
|
||||
private void findTableNames(final Object obj, final Set<TableName> tableNames) {
|
||||
if (!(obj instanceof CommonTree)) {
|
||||
return;
|
||||
}
|
||||
final CommonTree tree = (CommonTree) obj;
|
||||
final int childCount = tree.getChildCount();
|
||||
if ("TOK_TABNAME".equals(tree.getText())) {
|
||||
final TableName tableName;
|
||||
final boolean isInput = "TOK_TABREF".equals(tree.getParent().getText());
|
||||
switch (childCount) {
|
||||
case 1 :
|
||||
tableName = new TableName(null, tree.getChild(0).getText(), isInput);
|
||||
break;
|
||||
case 2:
|
||||
tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText(), isInput);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount);
|
||||
}
|
||||
// If parent is TOK_TABREF, then it is an input table.
|
||||
tableNames.add(tableName);
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < childCount; i++) {
|
||||
findTableNames(tree.getChild(i), tableNames);
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, String> toQueryTableAttributes(Set<TableName> tableNames) {
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
for (TableName tableName : tableNames) {
|
||||
final String attributeName = tableName.isInput() ? ATTR_INPUT_TABLES : ATTR_OUTPUT_TABLES;
|
||||
if (attributes.containsKey(attributeName)) {
|
||||
attributes.put(attributeName, attributes.get(attributeName) + "," + tableName);
|
||||
} else {
|
||||
attributes.put(attributeName, tableName.toString());
|
||||
}
|
||||
}
|
||||
return attributes;
|
||||
}
|
||||
}
|
|
@ -1,300 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttribute;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttributes;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processor.util.pattern.ErrorTypes;
|
||||
import org.apache.nifi.processor.util.pattern.ExceptionHandler;
|
||||
import org.apache.nifi.processor.util.pattern.ExceptionHandler.OnError;
|
||||
import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles;
|
||||
import org.apache.nifi.processor.util.pattern.PartialFunctions.InitConnection;
|
||||
import org.apache.nifi.processor.util.pattern.Put;
|
||||
import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
|
||||
import org.apache.nifi.processor.util.pattern.RoutingResult;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.SQLNonTransientException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@SeeAlso(SelectHive_1_1QL.class)
|
||||
@InputRequirement(Requirement.INPUT_REQUIRED)
|
||||
@Tags({"sql", "hive", "put", "database", "update", "insert"})
|
||||
@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command "
|
||||
+ "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes "
|
||||
+ "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be "
|
||||
+ "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.")
|
||||
@ReadsAttributes({
|
||||
@ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer "
|
||||
+ "that represents the JDBC Type of the parameter."),
|
||||
@ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as "
|
||||
+ "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.")
|
||||
})
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "query.input.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
|
||||
+ "and contains input table names (if any) in comma delimited 'databaseName.tableName' format."),
|
||||
@WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, "
|
||||
+ "and contains the target table names in 'databaseName.tableName' format.")
|
||||
})
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.PutHive3QL")
|
||||
public class PutHive_1_1QL extends AbstractHive_1_1QLProcessor {
|
||||
|
||||
public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder()
|
||||
.name("hive-batch-size")
|
||||
.displayName("Batch Size")
|
||||
.description("The preferred number of FlowFiles to put to the database in a single transaction")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
|
||||
.defaultValue("100")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder()
|
||||
.name("statement-delimiter")
|
||||
.displayName("Statement Delimiter")
|
||||
.description("Statement Delimiter used to separate SQL statements in a multiple statement script")
|
||||
.required(true)
|
||||
.defaultValue(";")
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("A FlowFile is routed to this relationship after the database is successfully updated")
|
||||
.build();
|
||||
public static final Relationship REL_RETRY = new Relationship.Builder()
|
||||
.name("retry")
|
||||
.description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed")
|
||||
.build();
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, "
|
||||
+ "such as an invalid query or an integrity constraint violation")
|
||||
.build();
|
||||
|
||||
|
||||
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||
private final static Set<Relationship> relationships;
|
||||
|
||||
/*
|
||||
* Will ensure that the list of property descriptors is built only once.
|
||||
* Will also create a Set of relationships
|
||||
*/
|
||||
static {
|
||||
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
|
||||
_propertyDescriptors.add(HIVE_DBCP_SERVICE);
|
||||
_propertyDescriptors.add(BATCH_SIZE);
|
||||
_propertyDescriptors.add(CHARSET);
|
||||
_propertyDescriptors.add(STATEMENT_DELIMITER);
|
||||
_propertyDescriptors.add(RollbackOnFailure.ROLLBACK_ON_FAILURE);
|
||||
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
|
||||
|
||||
Set<Relationship> _relationships = new HashSet<>();
|
||||
_relationships.add(REL_SUCCESS);
|
||||
_relationships.add(REL_FAILURE);
|
||||
_relationships.add(REL_RETRY);
|
||||
relationships = Collections.unmodifiableSet(_relationships);
|
||||
}
|
||||
|
||||
private Put<FunctionContext, Connection> process;
|
||||
private ExceptionHandler<FunctionContext> exceptionHandler;
|
||||
|
||||
@OnScheduled
|
||||
public void constructProcess() {
|
||||
exceptionHandler = new ExceptionHandler<>();
|
||||
exceptionHandler.mapException(e -> {
|
||||
if (e instanceof SQLNonTransientException) {
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else if (e instanceof SQLException) {
|
||||
// Use the SQLException's vendor code for guidance -- see Hive's ErrorMsg class for details on error codes
|
||||
int errorCode = ((SQLException) e).getErrorCode();
|
||||
getLogger().debug("Error occurred during Hive operation, Hive returned error code {}", new Object[]{errorCode});
|
||||
if (errorCode >= 10000 && errorCode < 20000) {
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else if (errorCode >= 20000 && errorCode < 30000) {
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else if (errorCode >= 30000 && errorCode < 40000) {
|
||||
return ErrorTypes.TemporalInputFailure;
|
||||
} else if (errorCode >= 40000 && errorCode < 50000) {
|
||||
// These are unknown errors (to include some parse errors), but rather than generating an UnknownFailure which causes
|
||||
// a ProcessException, we'll route to failure via an InvalidInput error type.
|
||||
return ErrorTypes.InvalidInput;
|
||||
} else {
|
||||
// Default unknown errors to TemporalFailure (as they were implemented originally), so they can be routed to failure
|
||||
// or rolled back depending on the user's setting of Rollback On Failure.
|
||||
return ErrorTypes.TemporalFailure;
|
||||
}
|
||||
} else {
|
||||
return ErrorTypes.UnknownFailure;
|
||||
}
|
||||
});
|
||||
exceptionHandler.adjustError(RollbackOnFailure.createAdjustError(getLogger()));
|
||||
|
||||
process = new Put<>();
|
||||
process.setLogger(getLogger());
|
||||
process.initConnection(initConnection);
|
||||
process.fetchFlowFiles(fetchFlowFiles);
|
||||
process.putFlowFile(putFlowFile);
|
||||
process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return propertyDescriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
private class FunctionContext extends RollbackOnFailure {
|
||||
final Charset charset;
|
||||
final String statementDelimiter;
|
||||
final long startNanos = System.nanoTime();
|
||||
|
||||
String connectionUrl;
|
||||
|
||||
|
||||
private FunctionContext(boolean rollbackOnFailure, Charset charset, String statementDelimiter) {
|
||||
super(rollbackOnFailure, false);
|
||||
this.charset = charset;
|
||||
this.statementDelimiter = statementDelimiter;
|
||||
}
|
||||
}
|
||||
|
||||
private InitConnection<FunctionContext, Connection> initConnection = (context, session, fc, ffs) -> {
|
||||
final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class);
|
||||
final Connection connection = dbcpService.getConnection(ffs == null || ffs.isEmpty() ? Collections.emptyMap() : ffs.get(0).getAttributes());
|
||||
fc.connectionUrl = dbcpService.getConnectionURL();
|
||||
return connection;
|
||||
};
|
||||
|
||||
private FetchFlowFiles<FunctionContext> fetchFlowFiles = (context, session, functionContext, result) -> {
|
||||
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
|
||||
return session.get(batchSize);
|
||||
};
|
||||
|
||||
private Put.PutFlowFile<FunctionContext, Connection> putFlowFile = (context, session, fc, conn, flowFile, result) -> {
|
||||
final String script = getHiveQL(session, flowFile, fc.charset);
|
||||
String regex = "(?<!\\\\)" + Pattern.quote(fc.statementDelimiter);
|
||||
|
||||
String[] hiveQLs = script.split(regex);
|
||||
|
||||
final Set<TableName> tableNames = new HashSet<>();
|
||||
exceptionHandler.execute(fc, flowFile, input -> {
|
||||
int loc = 1;
|
||||
for (String hiveQLStr: hiveQLs) {
|
||||
getLogger().debug("HiveQL: {}", new Object[]{hiveQLStr});
|
||||
|
||||
final String hiveQL = hiveQLStr.trim();
|
||||
if (!StringUtils.isEmpty(hiveQL)) {
|
||||
try (final PreparedStatement stmt = conn.prepareStatement(hiveQL)) {
|
||||
|
||||
// Get ParameterMetadata
|
||||
// Hive JDBC Doesn't support this yet:
|
||||
// ParameterMetaData pmd = stmt.getParameterMetaData();
|
||||
// int paramCount = pmd.getParameterCount();
|
||||
int paramCount = StringUtils.countMatches(hiveQL, "?");
|
||||
|
||||
if (paramCount > 0) {
|
||||
loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes());
|
||||
}
|
||||
|
||||
// Parse hiveQL and extract input/output tables
|
||||
try {
|
||||
tableNames.addAll(findTableNames(hiveQL));
|
||||
} catch (Exception e) {
|
||||
// If failed to parse the query, just log a warning message, but continue.
|
||||
getLogger().warn("Failed to parse hiveQL: {} due to {}", new Object[]{hiveQL, e}, e);
|
||||
}
|
||||
|
||||
// Execute the statement
|
||||
stmt.execute();
|
||||
fc.proceed();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit a Provenance SEND event
|
||||
final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos);
|
||||
|
||||
final FlowFile updatedFlowFile = session.putAllAttributes(flowFile, toQueryTableAttributes(tableNames));
|
||||
session.getProvenanceReporter().send(updatedFlowFile, fc.connectionUrl, transmissionMillis, true);
|
||||
result.routeTo(flowFile, REL_SUCCESS);
|
||||
|
||||
}, onFlowFileError(context, session, result));
|
||||
|
||||
};
|
||||
|
||||
private OnError<FunctionContext, FlowFile> onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) {
|
||||
OnError<FunctionContext, FlowFile> onFlowFileError = ExceptionHandler.createOnError(context, session, result, REL_FAILURE, REL_RETRY);
|
||||
onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> {
|
||||
switch (r.destination()) {
|
||||
case Failure:
|
||||
getLogger().error("Failed to update Hive for {} due to {}; routing to failure", new Object[] {i, e}, e);
|
||||
break;
|
||||
case Retry:
|
||||
getLogger().error("Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry",
|
||||
new Object[] {i, e}, e);
|
||||
break;
|
||||
case Self:
|
||||
getLogger().error("Failed to update Hive for {} due to {};", new Object[] {i, e}, e);
|
||||
break;
|
||||
}
|
||||
});
|
||||
return RollbackOnFailure.createOnError(onFlowFileError);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||
final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean();
|
||||
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
|
||||
final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue();
|
||||
final FunctionContext functionContext = new FunctionContext(rollbackOnFailure, charset, statementDelimiter);
|
||||
RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext));
|
||||
}
|
||||
}
|
|
@ -1,554 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processor.util.pattern.PartialFunctions;
|
||||
import org.apache.nifi.util.StopWatch;
|
||||
import org.apache.nifi.util.hive.CsvOutputOptions;
|
||||
import org.apache.nifi.util.hive.HiveJdbcCommon;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO;
|
||||
|
||||
@EventDriven
|
||||
@InputRequirement(Requirement.INPUT_ALLOWED)
|
||||
@Tags({"hive", "sql", "select", "jdbc", "query", "database"})
|
||||
@CapabilityDescription("Execute provided HiveQL SELECT query against a Hive database connection. Query result will be converted to Avro or CSV format."
|
||||
+ " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on "
|
||||
+ "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. "
|
||||
+ "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the "
|
||||
+ "select query. FlowFile attribute 'selecthiveql.row.count' indicates how many rows were selected.")
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "mime.type", description = "Sets the MIME type for the outgoing flowfile to application/avro-binary for Avro or text/csv for CSV."),
|
||||
@WritesAttribute(attribute = "filename", description = "Adds .avro or .csv to the filename attribute depending on which output format is selected."),
|
||||
@WritesAttribute(attribute = "selecthiveql.row.count", description = "Indicates how many rows were selected/returned by the query."),
|
||||
@WritesAttribute(attribute = "fragment.identifier", description = "If 'Max Rows Per Flow File' is set then all FlowFiles from the same query result set "
|
||||
+ "will have the same value for the fragment.identifier attribute. This can then be used to correlate the results."),
|
||||
@WritesAttribute(attribute = "fragment.count", description = "If 'Max Rows Per Flow File' is set then this is the total number of "
|
||||
+ "FlowFiles produced by a single ResultSet. This can be used in conjunction with the "
|
||||
+ "fragment.identifier attribute in order to know how many FlowFiles belonged to the same incoming ResultSet."),
|
||||
@WritesAttribute(attribute = "fragment.index", description = "If 'Max Rows Per Flow File' is set then the position of this FlowFile in the list of "
|
||||
+ "outgoing FlowFiles that were all derived from the same result set FlowFile. This can be "
|
||||
+ "used in conjunction with the fragment.identifier attribute to know which FlowFiles originated from the same query result set and in what order "
|
||||
+ "FlowFiles were produced"),
|
||||
@WritesAttribute(attribute = "query.input.tables", description = "Contains input table names in comma delimited 'databaseName.tableName' format.")
|
||||
})
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.SelectHive3QL")
|
||||
public class SelectHive_1_1QL extends AbstractHive_1_1QLProcessor {
|
||||
|
||||
public static final String RESULT_ROW_COUNT = "selecthiveql.row.count";
|
||||
|
||||
// Relationships
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("Successfully created FlowFile from HiveQL query result set.")
|
||||
.build();
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("HiveQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship.")
|
||||
.build();
|
||||
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_PRE_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("hive-pre-query")
|
||||
.displayName("HiveQL Pre-Query")
|
||||
.description("A semicolon-delimited list of queries executed before the main SQL query is executed. "
|
||||
+ "Example: 'set tez.queue.name=queue1; set hive.exec.orc.split.strategy=ETL; set hive.exec.reducers.bytes.per.reducer=1073741824'. "
|
||||
+ "Note, the results/outputs of these queries will be suppressed if successfully executed.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_SELECT_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("hive-query")
|
||||
.displayName("HiveQL Select Query")
|
||||
.description("HiveQL SELECT query to execute. If this is not set, the query is assumed to be in the content of an incoming FlowFile.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_POST_QUERY = new PropertyDescriptor.Builder()
|
||||
.name("hive-post-query")
|
||||
.displayName("HiveQL Post-Query")
|
||||
.description("A semicolon-delimited list of queries executed after the main SQL query is executed. "
|
||||
+ "Note, the results/outputs of these queries will be suppressed if successfully executed.")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder()
|
||||
.name("hive-fetch-size")
|
||||
.displayName("Fetch Size")
|
||||
.description("The number of result rows to be fetched from the result set at a time. This is a hint to the driver and may not be "
|
||||
+ "honored and/or exact. If the value specified is zero, then the hint is ignored.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_ROWS_PER_FLOW_FILE = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-rows")
|
||||
.displayName("Max Rows Per Flow File")
|
||||
.description("The maximum number of result rows that will be included in a single FlowFile. " +
|
||||
"This will allow you to break up very large result sets into multiple FlowFiles. If the value specified is zero, then all rows are returned in a single FlowFile.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MAX_FRAGMENTS = new PropertyDescriptor.Builder()
|
||||
.name("hive-max-frags")
|
||||
.displayName("Maximum Number of Fragments")
|
||||
.description("The maximum number of fragments. If the value specified is zero, then all fragments are returned. " +
|
||||
"This prevents OutOfMemoryError when this processor ingests huge table.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_HEADER = new PropertyDescriptor.Builder()
|
||||
.name("csv-header")
|
||||
.displayName("CSV Header")
|
||||
.description("Include Header in Output")
|
||||
.required(true)
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("true")
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_ALT_HEADER = new PropertyDescriptor.Builder()
|
||||
.name("csv-alt-header")
|
||||
.displayName("Alternate CSV Header")
|
||||
.description("Comma separated list of header fields")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_DELIMITER = new PropertyDescriptor.Builder()
|
||||
.name("csv-delimiter")
|
||||
.displayName("CSV Delimiter")
|
||||
.description("CSV Delimiter used to separate fields")
|
||||
.required(true)
|
||||
.defaultValue(",")
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_CSV_QUOTE = new PropertyDescriptor.Builder()
|
||||
.name("csv-quote")
|
||||
.displayName("CSV Quote")
|
||||
.description("Whether to force quoting of CSV fields. Note that this might conflict with the setting for CSV Escape.")
|
||||
.required(true)
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("true")
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
public static final PropertyDescriptor HIVEQL_CSV_ESCAPE = new PropertyDescriptor.Builder()
|
||||
.name("csv-escape")
|
||||
.displayName("CSV Escape")
|
||||
.description("Whether to escape CSV strings in output. Note that this might conflict with the setting for CSV Quote.")
|
||||
.required(true)
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("true")
|
||||
.addValidator(StandardValidators.BOOLEAN_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HIVEQL_OUTPUT_FORMAT = new PropertyDescriptor.Builder()
|
||||
.name("hive-output-format")
|
||||
.displayName("Output Format")
|
||||
.description("How to represent the records coming from Hive (Avro, CSV, e.g.)")
|
||||
.required(true)
|
||||
.allowableValues(AVRO, CSV)
|
||||
.defaultValue(AVRO)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.NONE)
|
||||
.build();
|
||||
|
||||
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||
private final static Set<Relationship> relationships;
|
||||
|
||||
/*
|
||||
* Will ensure that the list of property descriptors is built only once.
|
||||
* Will also create a Set of relationships
|
||||
*/
|
||||
static {
|
||||
List<PropertyDescriptor> _propertyDescriptors = new ArrayList<>();
|
||||
_propertyDescriptors.add(HIVE_DBCP_SERVICE);
|
||||
_propertyDescriptors.add(HIVEQL_PRE_QUERY);
|
||||
_propertyDescriptors.add(HIVEQL_SELECT_QUERY);
|
||||
_propertyDescriptors.add(HIVEQL_POST_QUERY);
|
||||
_propertyDescriptors.add(FETCH_SIZE);
|
||||
_propertyDescriptors.add(MAX_ROWS_PER_FLOW_FILE);
|
||||
_propertyDescriptors.add(MAX_FRAGMENTS);
|
||||
_propertyDescriptors.add(HIVEQL_OUTPUT_FORMAT);
|
||||
_propertyDescriptors.add(NORMALIZE_NAMES_FOR_AVRO);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_HEADER);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_ALT_HEADER);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_DELIMITER);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_QUOTE);
|
||||
_propertyDescriptors.add(HIVEQL_CSV_ESCAPE);
|
||||
_propertyDescriptors.add(CHARSET);
|
||||
propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors);
|
||||
|
||||
Set<Relationship> _relationships = new HashSet<>();
|
||||
_relationships.add(REL_SUCCESS);
|
||||
_relationships.add(REL_FAILURE);
|
||||
relationships = Collections.unmodifiableSet(_relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return propertyDescriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
@OnScheduled
|
||||
public void setup(ProcessContext context) {
|
||||
// If the query is not set, then an incoming flow file is needed. Otherwise fail the initialization
|
||||
if (!context.getProperty(HIVEQL_SELECT_QUERY).isSet() && !context.hasIncomingConnection()) {
|
||||
final String errorString = "Either the Select Query must be specified or there must be an incoming connection "
|
||||
+ "providing flowfile(s) containing a SQL select query";
|
||||
getLogger().error(errorString);
|
||||
throw new ProcessException(errorString);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||
PartialFunctions.onTrigger(context, sessionFactory, getLogger(), session -> onTrigger(context, session));
|
||||
}
|
||||
|
||||
private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||
FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null);
|
||||
FlowFile flowfile = null;
|
||||
|
||||
// If we have no FlowFile, and all incoming connections are self-loops then we can continue on.
|
||||
// However, if we have no FlowFile and we have connections coming from other Processors, then
|
||||
// we know that we should run only if we have a FlowFile.
|
||||
if (context.hasIncomingConnection()) {
|
||||
if (fileToProcess == null && context.hasNonLoopConnection()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
final ComponentLog logger = getLogger();
|
||||
final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class);
|
||||
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
|
||||
|
||||
List<String> preQueries = getQueries(context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
|
||||
List<String> postQueries = getQueries(context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue());
|
||||
|
||||
final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet());
|
||||
|
||||
// Source the SQL
|
||||
String hqlStatement;
|
||||
|
||||
if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) {
|
||||
hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||
} else {
|
||||
// If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query.
|
||||
// If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled.
|
||||
final StringBuilder queryContents = new StringBuilder();
|
||||
session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset)));
|
||||
hqlStatement = queryContents.toString();
|
||||
}
|
||||
|
||||
|
||||
final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger();
|
||||
final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger();
|
||||
final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet()
|
||||
? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger()
|
||||
: 0;
|
||||
final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue();
|
||||
final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean();
|
||||
final StopWatch stopWatch = new StopWatch(true);
|
||||
final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
|
||||
final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||
final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue();
|
||||
final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean();
|
||||
final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean();
|
||||
final String fragmentIdentifier = UUID.randomUUID().toString();
|
||||
|
||||
try (final Connection con = dbcpService.getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes());
|
||||
final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement())
|
||||
) {
|
||||
Pair<String,SQLException> failure = executeConfigStatements(con, preQueries);
|
||||
if (failure != null) {
|
||||
// In case of failure, assigning config query to "hqlStatement" to follow current error handling
|
||||
hqlStatement = failure.getLeft();
|
||||
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||
fileToProcess = null;
|
||||
throw failure.getRight();
|
||||
}
|
||||
if (fetchSize != null && fetchSize > 0) {
|
||||
try {
|
||||
st.setFetchSize(fetchSize);
|
||||
} catch (SQLException se) {
|
||||
// Not all drivers support this, just log the error (at debug level) and move on
|
||||
logger.debug("Cannot set fetch size to {} due to {}", new Object[]{fetchSize, se.getLocalizedMessage()}, se);
|
||||
}
|
||||
}
|
||||
|
||||
final List<FlowFile> resultSetFlowFiles = new ArrayList<>();
|
||||
try {
|
||||
logger.debug("Executing query {}", new Object[]{hqlStatement});
|
||||
if (flowbased) {
|
||||
// Hive JDBC Doesn't Support this yet:
|
||||
// ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData();
|
||||
// int paramCount = pmd.getParameterCount();
|
||||
|
||||
// Alternate way to determine number of params in SQL.
|
||||
int paramCount = StringUtils.countMatches(hqlStatement, "?");
|
||||
|
||||
if (paramCount > 0) {
|
||||
setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes());
|
||||
}
|
||||
}
|
||||
|
||||
final ResultSet resultSet;
|
||||
|
||||
try {
|
||||
resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement));
|
||||
} catch (SQLException se) {
|
||||
// If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here
|
||||
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||
fileToProcess = null;
|
||||
throw se;
|
||||
}
|
||||
|
||||
int fragmentIndex = 0;
|
||||
String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null;
|
||||
while (true) {
|
||||
final AtomicLong nrOfRows = new AtomicLong(0L);
|
||||
flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess);
|
||||
if (baseFilename == null) {
|
||||
baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key());
|
||||
}
|
||||
try {
|
||||
flowfile = session.write(flowfile, out -> {
|
||||
try {
|
||||
if (AVRO.equals(outputFormat)) {
|
||||
nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro));
|
||||
} else if (CSV.equals(outputFormat)) {
|
||||
CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile);
|
||||
nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options));
|
||||
} else {
|
||||
nrOfRows.set(0L);
|
||||
throw new ProcessException("Unsupported output format: " + outputFormat);
|
||||
}
|
||||
} catch (final SQLException | RuntimeException e) {
|
||||
throw new ProcessException("Error during database query or conversion of records.", e);
|
||||
}
|
||||
});
|
||||
} catch (ProcessException e) {
|
||||
// Add flowfile to results before rethrowing so it will be removed from session in outer catch
|
||||
resultSetFlowFiles.add(flowfile);
|
||||
throw e;
|
||||
}
|
||||
|
||||
if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) {
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
// Set attribute for how many rows were selected
|
||||
attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
|
||||
|
||||
try {
|
||||
// Set input/output table names by parsing the query
|
||||
attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement)));
|
||||
} catch (Exception e) {
|
||||
// If failed to parse the query, just log a warning message, but continue.
|
||||
getLogger().warn("Failed to parse query: {} due to {}", new Object[]{hqlStatement, e}, e);
|
||||
}
|
||||
|
||||
// Set MIME type on output document and add extension to filename
|
||||
if (AVRO.equals(outputFormat)) {
|
||||
attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY);
|
||||
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro");
|
||||
} else if (CSV.equals(outputFormat)) {
|
||||
attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
|
||||
attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv");
|
||||
}
|
||||
|
||||
if (maxRowsPerFlowFile > 0) {
|
||||
attributes.put("fragment.identifier", fragmentIdentifier);
|
||||
attributes.put("fragment.index", String.valueOf(fragmentIndex));
|
||||
}
|
||||
|
||||
flowfile = session.putAllAttributes(flowfile, attributes);
|
||||
|
||||
logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'",
|
||||
new Object[]{flowfile, nrOfRows.get()});
|
||||
|
||||
if (context.hasIncomingConnection()) {
|
||||
// If the flow file came from an incoming connection, issue a Fetch provenance event
|
||||
session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(),
|
||||
"Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
|
||||
} else {
|
||||
// If we created a flow file from rows received from Hive, issue a Receive provenance event
|
||||
session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS));
|
||||
}
|
||||
resultSetFlowFiles.add(flowfile);
|
||||
} else {
|
||||
// If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on
|
||||
session.remove(flowfile);
|
||||
if (resultSetFlowFiles != null && resultSetFlowFiles.size()>0) {
|
||||
flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size()-1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
fragmentIndex++;
|
||||
if (maxFragments > 0 && fragmentIndex >= maxFragments) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < resultSetFlowFiles.size(); i++) {
|
||||
// Set count on all FlowFiles
|
||||
if (maxRowsPerFlowFile > 0) {
|
||||
resultSetFlowFiles.set(i,
|
||||
session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex)));
|
||||
}
|
||||
}
|
||||
|
||||
} catch (final SQLException e) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
failure = executeConfigStatements(con, postQueries);
|
||||
if (failure != null) {
|
||||
hqlStatement = failure.getLeft();
|
||||
if (resultSetFlowFiles != null) {
|
||||
resultSetFlowFiles.forEach(ff -> session.remove(ff));
|
||||
}
|
||||
flowfile = (fileToProcess == null) ? session.create() : fileToProcess;
|
||||
fileToProcess = null;
|
||||
throw failure.getRight();
|
||||
}
|
||||
|
||||
session.transfer(resultSetFlowFiles, REL_SUCCESS);
|
||||
if (fileToProcess != null) {
|
||||
session.remove(fileToProcess);
|
||||
}
|
||||
} catch (final ProcessException | SQLException e) {
|
||||
logger.error("Issue processing SQL {} due to {}.", new Object[]{hqlStatement, e});
|
||||
if (flowfile == null) {
|
||||
// This can happen if any exceptions occur while setting up the connection, statement, etc.
|
||||
logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure",
|
||||
new Object[]{hqlStatement, e});
|
||||
context.yield();
|
||||
} else {
|
||||
if (context.hasIncomingConnection()) {
|
||||
logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure",
|
||||
new Object[]{hqlStatement, flowfile, e});
|
||||
flowfile = session.penalize(flowfile);
|
||||
} else {
|
||||
logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure",
|
||||
new Object[]{hqlStatement, e});
|
||||
context.yield();
|
||||
}
|
||||
session.transfer(flowfile, REL_FAILURE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Executes given queries using pre-defined connection.
|
||||
* Returns null on success, or a query string if failed.
|
||||
*/
|
||||
protected Pair<String,SQLException> executeConfigStatements(final Connection con, final List<String> configQueries){
|
||||
if (configQueries == null || configQueries.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for (String confSQL : configQueries) {
|
||||
try(final Statement st = con.createStatement()){
|
||||
st.execute(confSQL);
|
||||
} catch (SQLException e) {
|
||||
return Pair.of(confSQL, e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected List<String> getQueries(final String value) {
|
||||
if (value == null || value.length() == 0 || value.trim().length() == 0) {
|
||||
return null;
|
||||
}
|
||||
final List<String> queries = new LinkedList<>();
|
||||
for (String query : value.split(";")) {
|
||||
if (query.trim().length() > 0) {
|
||||
queries.add(query.trim());
|
||||
}
|
||||
}
|
||||
return queries;
|
||||
}
|
||||
}
|
|
@ -1,853 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttribute;
|
||||
import org.apache.nifi.annotation.behavior.ReadsAttributes;
|
||||
import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.DeprecationNotice;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.components.AllowableValue;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.components.ValidationContext;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.components.Validator;
|
||||
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||
import org.apache.nifi.expression.ExpressionLanguageScope;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processor.util.pattern.DiscontinuedException;
|
||||
import org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException;
|
||||
import org.apache.nifi.serialization.MalformedRecordException;
|
||||
import org.apache.nifi.serialization.RecordReader;
|
||||
import org.apache.nifi.serialization.RecordReaderFactory;
|
||||
import org.apache.nifi.serialization.RecordSetWriter;
|
||||
import org.apache.nifi.serialization.RecordSetWriterFactory;
|
||||
import org.apache.nifi.serialization.SimpleRecordSchema;
|
||||
import org.apache.nifi.serialization.WriteResult;
|
||||
import org.apache.nifi.serialization.record.DataType;
|
||||
import org.apache.nifi.serialization.record.MapRecord;
|
||||
import org.apache.nifi.serialization.record.Record;
|
||||
import org.apache.nifi.serialization.record.RecordField;
|
||||
import org.apache.nifi.serialization.record.RecordFieldType;
|
||||
import org.apache.nifi.serialization.record.RecordSchema;
|
||||
import org.apache.nifi.serialization.record.type.ArrayDataType;
|
||||
import org.apache.nifi.serialization.record.type.ChoiceDataType;
|
||||
import org.apache.nifi.serialization.record.type.MapDataType;
|
||||
import org.apache.nifi.serialization.record.type.RecordDataType;
|
||||
import org.apache.nifi.util.StringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
@Tags({"hive", "metadata", "jdbc", "database", "table"})
|
||||
@CapabilityDescription("This processor uses a Hive JDBC connection and incoming records to generate any Hive 1.1 table changes needed to support the incoming records.")
|
||||
@ReadsAttributes({
|
||||
@ReadsAttribute(attribute = "hive.table.management.strategy", description = "This attribute is read if the 'Table Management Strategy' property is configured "
|
||||
+ "to use the value of this attribute. The value of this attribute should correspond (ignoring case) to a valid option of the 'Table Management Strategy' property.")
|
||||
})
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute = "output.table", description = "This attribute is written on the flow files routed to the 'success' "
|
||||
+ "and 'failure' relationships, and contains the target table name."),
|
||||
@WritesAttribute(attribute = "output.path", description = "This attribute is written on the flow files routed to the 'success' "
|
||||
+ "and 'failure' relationships, and contains the path on the file system to the table (or partition location if the table is partitioned)."),
|
||||
@WritesAttribute(attribute = "mime.type", description = "Sets the mime.type attribute to the MIME Type specified by the Record Writer, only if a Record Writer is specified "
|
||||
+ "and Update Field Names is 'true'."),
|
||||
@WritesAttribute(attribute = "record.count", description = "Sets the number of records in the FlowFile, only if a Record Writer is specified and Update Field Names is 'true'.")
|
||||
})
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||
@RequiresInstanceClassLoading
|
||||
@DeprecationNotice(classNames = "org.apache.nifi.processors.hive.UpdateHive3Table")
|
||||
public class UpdateHive_1_1Table extends AbstractProcessor {
|
||||
|
||||
static final String TEXTFILE = "TEXTFILE";
|
||||
static final String SEQUENCEFILE = "SEQUENCEFILE";
|
||||
static final String ORC = "ORC";
|
||||
static final String PARQUET = "PARQUET";
|
||||
static final String AVRO = "AVRO";
|
||||
static final String RCFILE = "RCFILE";
|
||||
|
||||
static final AllowableValue TEXTFILE_STORAGE = new AllowableValue(TEXTFILE, TEXTFILE, "Stored as plain text files. TEXTFILE is the default file format, unless the configuration "
|
||||
+ "parameter hive.default.fileformat has a different setting.");
|
||||
static final AllowableValue SEQUENCEFILE_STORAGE = new AllowableValue(SEQUENCEFILE, SEQUENCEFILE, "Stored as compressed Sequence Files.");
|
||||
static final AllowableValue ORC_STORAGE = new AllowableValue(ORC, ORC, "Stored as ORC file format. Supports ACID Transactions & Cost-based Optimizer (CBO). "
|
||||
+ "Stores column-level metadata.");
|
||||
static final AllowableValue PARQUET_STORAGE = new AllowableValue(PARQUET, PARQUET, "Stored as Parquet format for the Parquet columnar storage format.");
|
||||
static final AllowableValue AVRO_STORAGE = new AllowableValue(AVRO, AVRO, "Stored as Avro format.");
|
||||
static final AllowableValue RCFILE_STORAGE = new AllowableValue(RCFILE, RCFILE, "Stored as Record Columnar File format.");
|
||||
|
||||
static final AllowableValue CREATE_IF_NOT_EXISTS = new AllowableValue("Create If Not Exists", "Create If Not Exists",
|
||||
"Create a table with the given schema if it does not already exist");
|
||||
static final AllowableValue FAIL_IF_NOT_EXISTS = new AllowableValue("Fail If Not Exists", "Fail If Not Exists",
|
||||
"If the target does not already exist, log an error and route the flowfile to failure");
|
||||
|
||||
static final String TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE = "hive.table.management.strategy";
|
||||
static final AllowableValue MANAGED_TABLE = new AllowableValue("Managed", "Managed",
|
||||
"Any tables created by this processor will be managed tables (see Hive documentation for details).");
|
||||
static final AllowableValue EXTERNAL_TABLE = new AllowableValue("External", "External",
|
||||
"Any tables created by this processor will be external tables located at the `External Table Location` property value.");
|
||||
static final AllowableValue ATTRIBUTE_DRIVEN_TABLE = new AllowableValue("Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute",
|
||||
"Use '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' Attribute",
|
||||
"Inspects the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' FlowFile attribute to determine the table management strategy. The value "
|
||||
+ "of this attribute must be a case-insensitive match to one of the other allowable values (Managed, External, e.g.).");
|
||||
|
||||
static final String ATTR_OUTPUT_TABLE = "output.table";
|
||||
static final String ATTR_OUTPUT_PATH = "output.path";
|
||||
|
||||
// Properties
|
||||
static final PropertyDescriptor RECORD_READER = new PropertyDescriptor.Builder()
|
||||
.name("record-reader")
|
||||
.displayName("Record Reader")
|
||||
.description("The service for reading incoming flow files. The reader is only used to determine the schema of the records, the actual records will not be processed.")
|
||||
.identifiesControllerService(RecordReaderFactory.class)
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder()
|
||||
.name("hive11-dbcp-service")
|
||||
.displayName("Hive Database Connection Pooling Service")
|
||||
.description("The Hive Controller Service that is used to obtain connection(s) to the Hive database")
|
||||
.required(true)
|
||||
.identifiesControllerService(Hive_1_1DBCPService.class)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor TABLE_NAME = new PropertyDescriptor.Builder()
|
||||
.name("hive11-table-name")
|
||||
.displayName("Table Name")
|
||||
.description("The name of the database table to update. If the table does not exist, then it will either be created or an error thrown, depending "
|
||||
+ "on the value of the Create Table property.")
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor CREATE_TABLE = new PropertyDescriptor.Builder()
|
||||
.name("hive11-create-table")
|
||||
.displayName("Create Table Strategy")
|
||||
.description("Specifies how to process the target table when it does not exist (create it, fail, e.g.).")
|
||||
.required(true)
|
||||
.addValidator(Validator.VALID)
|
||||
.allowableValues(CREATE_IF_NOT_EXISTS, FAIL_IF_NOT_EXISTS)
|
||||
.defaultValue(FAIL_IF_NOT_EXISTS.getValue())
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor TABLE_MANAGEMENT_STRATEGY = new PropertyDescriptor.Builder()
|
||||
.name("hive11-create-table-management")
|
||||
.displayName("Create Table Management Strategy")
|
||||
.description("Specifies (when a table is to be created) whether the table is a managed table or an external table. Note that when External is specified, the "
|
||||
+ "'External Table Location' property must be specified. If the '" + TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE + "' value is selected, 'External Table Location' "
|
||||
+ "must still be specified, but can contain Expression Language or be set to the empty string, and is ignored when the attribute evaluates to 'Managed'.")
|
||||
.required(true)
|
||||
.addValidator(Validator.VALID)
|
||||
.allowableValues(MANAGED_TABLE, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE)
|
||||
.defaultValue(MANAGED_TABLE.getValue())
|
||||
.dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor UPDATE_FIELD_NAMES = new PropertyDescriptor.Builder()
|
||||
.name("hive11-update-field-names")
|
||||
.displayName("Update Field Names")
|
||||
.description("This property indicates whether to update the output schema such that the field names are set to the exact column names from the specified "
|
||||
+ "table. This should be used if the incoming record field names may not match the table's column names in terms of upper- and lower-case. For example, this property should be "
|
||||
+ "set to true if the output FlowFile (and target table storage) is Avro format, as Hive/Impala expects the field names to match the column names exactly.")
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("false")
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor RECORD_WRITER_FACTORY = new PropertyDescriptor.Builder()
|
||||
.name("hive11-record-writer")
|
||||
.displayName("Record Writer")
|
||||
.description("Specifies the Controller Service to use for writing results to a FlowFile. The Record Writer should use Inherit Schema to emulate the inferred schema behavior, i.e. "
|
||||
+ "an explicit schema need not be defined in the writer, and will be supplied by the same logic used to infer the schema from the column types. If Create Table Strategy is set "
|
||||
+ "'Create If Not Exists', the Record Writer's output format must match the Record Reader's format in order for the data to be placed in the created table location. Note that "
|
||||
+ "this property is only used if 'Update Field Names' is set to true and the field names do not all match the column names exactly. If no "
|
||||
+ "update is needed for any field names (or 'Update Field Names' is false), the Record Writer is not used and instead the input FlowFile is routed to success or failure "
|
||||
+ "without modification.")
|
||||
.identifiesControllerService(RecordSetWriterFactory.class)
|
||||
.dependsOn(UPDATE_FIELD_NAMES, "true")
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor EXTERNAL_TABLE_LOCATION = new PropertyDescriptor.Builder()
|
||||
.name("hive11-external-table-location")
|
||||
.displayName("External Table Location")
|
||||
.description("Specifies (when an external table is to be created) the file path (in HDFS, e.g.) to store table data.")
|
||||
.required(true)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR)
|
||||
.dependsOn(TABLE_MANAGEMENT_STRATEGY, EXTERNAL_TABLE, ATTRIBUTE_DRIVEN_TABLE)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor TABLE_STORAGE_FORMAT = new PropertyDescriptor.Builder()
|
||||
.name("hive11-storage-format")
|
||||
.displayName("Create Table Storage Format")
|
||||
.description("If a table is to be created, the specified storage format will be used.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.allowableValues(TEXTFILE_STORAGE, SEQUENCEFILE_STORAGE, ORC_STORAGE, PARQUET_STORAGE, AVRO_STORAGE, RCFILE_STORAGE)
|
||||
.defaultValue(TEXTFILE)
|
||||
.dependsOn(CREATE_TABLE, CREATE_IF_NOT_EXISTS)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor QUERY_TIMEOUT = new PropertyDescriptor.Builder()
|
||||
.name("hive11query-timeout")
|
||||
.displayName("Query Timeout")
|
||||
.description("Sets the number of seconds the driver will wait for a query to execute. "
|
||||
+ "A value of 0 means no timeout. NOTE: Non-zero values may not be supported by the driver.")
|
||||
.defaultValue("0")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.INTEGER_VALIDATOR)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.build();
|
||||
|
||||
static final PropertyDescriptor PARTITION_CLAUSE = new PropertyDescriptor.Builder()
|
||||
.name("hive11-partition-clause")
|
||||
.displayName("Partition Clause")
|
||||
.description("Specifies a comma-separated list of attribute names and optional data types corresponding to the partition columns of the target table. Simply put, if the table is "
|
||||
+ "partitioned or is to be created with partitions, each partition name should be an attribute on the FlowFile and listed in this property. This assumes all incoming records "
|
||||
+ "belong to the same partition and the partition columns are not fields in the record. An example of specifying this field is if PartitionRecord "
|
||||
+ "is upstream and two partition columns 'name' (of type string) and 'age' (of type integer) are used, then this property can be set to 'name string, age int'. The data types "
|
||||
+ "are optional and if partition(s) are to be created they will default to string type if not specified. For non-string primitive types, specifying the data type for existing "
|
||||
+ "partition columns is helpful for interpreting the partition value(s). If the table exists, the data types need not be specified "
|
||||
+ "(and are ignored in that case). This property must be set if the table is partitioned, and there must be an attribute for each partition column in the table. "
|
||||
+ "The values of the attributes will be used as the partition values, and the resulting output.path attribute value will reflect the location of the partition in the filesystem "
|
||||
+ "(for use downstream in processors such as PutHDFS).")
|
||||
.required(false)
|
||||
.expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
// Relationships
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("A FlowFile containing records routed to this relationship after the record has been successfully transmitted to Hive.")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("A FlowFile containing records routed to this relationship if the record could not be transmitted to Hive.")
|
||||
.build();
|
||||
|
||||
private List<PropertyDescriptor> propertyDescriptors;
|
||||
private Set<Relationship> relationships;
|
||||
|
||||
@Override
|
||||
protected void init(ProcessorInitializationContext context) {
|
||||
List<PropertyDescriptor> props = new ArrayList<>();
|
||||
props.add(RECORD_READER);
|
||||
props.add(HIVE_DBCP_SERVICE);
|
||||
props.add(TABLE_NAME);
|
||||
props.add(PARTITION_CLAUSE);
|
||||
props.add(CREATE_TABLE);
|
||||
props.add(TABLE_MANAGEMENT_STRATEGY);
|
||||
props.add(EXTERNAL_TABLE_LOCATION);
|
||||
props.add(TABLE_STORAGE_FORMAT);
|
||||
props.add(UPDATE_FIELD_NAMES);
|
||||
props.add(RECORD_WRITER_FACTORY);
|
||||
props.add(QUERY_TIMEOUT);
|
||||
|
||||
propertyDescriptors = Collections.unmodifiableList(props);
|
||||
|
||||
Set<Relationship> _relationships = new HashSet<>();
|
||||
_relationships.add(REL_SUCCESS);
|
||||
_relationships.add(REL_FAILURE);
|
||||
relationships = Collections.unmodifiableSet(_relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return propertyDescriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<ValidationResult> customValidate(ValidationContext validationContext) {
|
||||
List<ValidationResult> validationResults = new ArrayList<>(super.customValidate(validationContext));
|
||||
final boolean recordWriterFactorySet = validationContext.getProperty(RECORD_WRITER_FACTORY).isSet();
|
||||
final boolean createIfNotExists = validationContext.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue());
|
||||
final boolean updateFieldNames = validationContext.getProperty(UPDATE_FIELD_NAMES).asBoolean();
|
||||
|
||||
if (!recordWriterFactorySet && updateFieldNames) {
|
||||
validationResults.add(new ValidationResult.Builder().subject(RECORD_WRITER_FACTORY.getDisplayName())
|
||||
.explanation("Record Writer must be set if 'Update Field Names' is true").valid(false).build());
|
||||
}
|
||||
final String tableManagementStrategy = validationContext.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue();
|
||||
final boolean managedTable;
|
||||
if (!ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) {
|
||||
managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy);
|
||||
// Ensure valid configuration for external tables
|
||||
if (createIfNotExists && !managedTable && !validationContext.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) {
|
||||
validationResults.add(new ValidationResult.Builder().subject(EXTERNAL_TABLE_LOCATION.getDisplayName())
|
||||
.explanation("External Table Location must be set when Table Management Strategy is set to External").valid(false).build());
|
||||
}
|
||||
}
|
||||
return validationResults;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
|
||||
|
||||
FlowFile flowFile = session.get();
|
||||
if (flowFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
|
||||
final RecordSetWriterFactory recordWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
|
||||
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
|
||||
final String partitionClauseString = context.getProperty(PARTITION_CLAUSE).evaluateAttributeExpressions(flowFile).getValue();
|
||||
List<String> partitionClauseElements = null;
|
||||
if (!StringUtils.isEmpty(partitionClauseString)) {
|
||||
partitionClauseElements = Arrays.stream(partitionClauseString.split(",")).filter(Objects::nonNull).map(String::trim).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
final ComponentLog log = getLogger();
|
||||
|
||||
try {
|
||||
final RecordReader reader;
|
||||
|
||||
try (final InputStream in = session.read(flowFile)) {
|
||||
// if we fail to create the RecordReader then we want to route to failure, so we need to
|
||||
// handle this separately from the other IOExceptions which normally route to retry
|
||||
try {
|
||||
reader = recordReaderFactory.createRecordReader(flowFile, in, getLogger());
|
||||
} catch (Exception e) {
|
||||
throw new RecordReaderFactoryException("Unable to create RecordReader", e);
|
||||
}
|
||||
} catch (RecordReaderFactoryException rrfe) {
|
||||
log.error(
|
||||
"Failed to create {} for {} - routing to failure",
|
||||
new Object[]{RecordReader.class.getSimpleName(), flowFile},
|
||||
rrfe
|
||||
);
|
||||
// Since we are wrapping the exceptions above there should always be a cause
|
||||
// but it's possible it might not have a message. This handles that by logging
|
||||
// the name of the class thrown.
|
||||
Throwable c = rrfe.getCause();
|
||||
if (c != null) {
|
||||
session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown");
|
||||
} else {
|
||||
session.putAttribute(flowFile, "record.error.message", rrfe.getClass().getCanonicalName() + " Thrown");
|
||||
}
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
|
||||
final RecordSchema recordSchema = reader.getSchema();
|
||||
|
||||
final boolean createIfNotExists = context.getProperty(CREATE_TABLE).getValue().equals(CREATE_IF_NOT_EXISTS.getValue());
|
||||
final boolean updateFieldNames = context.getProperty(UPDATE_FIELD_NAMES).asBoolean();
|
||||
if (recordWriterFactory == null && updateFieldNames) {
|
||||
throw new ProcessException("Record Writer must be set if 'Update Field Names' is true");
|
||||
}
|
||||
final String tableManagementStrategy = context.getProperty(TABLE_MANAGEMENT_STRATEGY).getValue();
|
||||
final boolean managedTable;
|
||||
if (ATTRIBUTE_DRIVEN_TABLE.getValue().equals(tableManagementStrategy)) {
|
||||
String tableManagementStrategyAttribute = flowFile.getAttribute(TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE);
|
||||
if (MANAGED_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) {
|
||||
managedTable = true;
|
||||
} else if (EXTERNAL_TABLE.getValue().equalsIgnoreCase(tableManagementStrategyAttribute)) {
|
||||
managedTable = false;
|
||||
} else {
|
||||
log.error("The '{}' attribute either does not exist or has invalid value: {}. Must be one of (ignoring case): Managed, External. "
|
||||
+ "Routing flowfile to failure",
|
||||
new Object[]{TABLE_MANAGEMENT_STRATEGY_ATTRIBUTE, tableManagementStrategyAttribute});
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
managedTable = MANAGED_TABLE.getValue().equals(tableManagementStrategy);
|
||||
}
|
||||
|
||||
// Ensure valid configuration for external tables
|
||||
if (createIfNotExists && !managedTable && !context.getProperty(EXTERNAL_TABLE_LOCATION).isSet()) {
|
||||
throw new IOException("External Table Location must be set when Table Management Strategy is set to External");
|
||||
}
|
||||
final String externalTableLocation = managedTable ? null : context.getProperty(EXTERNAL_TABLE_LOCATION).evaluateAttributeExpressions(flowFile).getValue();
|
||||
if (!managedTable && StringUtils.isEmpty(externalTableLocation)) {
|
||||
log.error("External Table Location has invalid value: {}. Routing flowfile to failure", new Object[]{externalTableLocation});
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
final String storageFormat = context.getProperty(TABLE_STORAGE_FORMAT).getValue();
|
||||
final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class);
|
||||
try (final Connection connection = dbcpService.getConnection()) {
|
||||
final Map<String,String> attributes = new HashMap<>(flowFile.getAttributes());
|
||||
OutputMetadataHolder outputMetadataHolder = checkAndUpdateTableSchema(attributes, connection, recordSchema, tableName, partitionClauseElements,
|
||||
createIfNotExists, externalTableLocation, storageFormat, updateFieldNames);
|
||||
if (outputMetadataHolder != null) {
|
||||
// The output schema changed (i.e. field names were updated), so write out the corresponding FlowFile
|
||||
try {
|
||||
final FlowFile inputFlowFile = flowFile;
|
||||
flowFile = session.write(flowFile, (in, out) -> {
|
||||
|
||||
// if we fail to create the RecordReader then we want to route to failure, so we need to
|
||||
// handle this separately from the other IOExceptions which normally route to retry
|
||||
final RecordReader recordReader;
|
||||
final RecordSetWriter recordSetWriter;
|
||||
try {
|
||||
recordReader = recordReaderFactory.createRecordReader(inputFlowFile, in, getLogger());
|
||||
recordSetWriter = recordWriterFactory.createWriter(getLogger(), outputMetadataHolder.getOutputSchema(), out, attributes);
|
||||
} catch (Exception e) {
|
||||
if(e instanceof IOException) {
|
||||
throw (IOException) e;
|
||||
}
|
||||
throw new IOException(new RecordReaderFactoryException("Unable to create RecordReader", e));
|
||||
}
|
||||
|
||||
WriteResult writeResult = updateRecords(recordSchema, outputMetadataHolder, recordReader, recordSetWriter);
|
||||
recordSetWriter.flush();
|
||||
recordSetWriter.close();
|
||||
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
|
||||
attributes.put(CoreAttributes.MIME_TYPE.key(), recordSetWriter.getMimeType());
|
||||
attributes.putAll(writeResult.getAttributes());
|
||||
});
|
||||
} catch (final Exception e) {
|
||||
getLogger().error("Failed to process {}; will route to failure", new Object[]{flowFile, e});
|
||||
// Since we are wrapping the exceptions above there should always be a cause
|
||||
// but it's possible it might not have a message. This handles that by logging
|
||||
// the name of the class thrown.
|
||||
Throwable c = e.getCause();
|
||||
if (c != null) {
|
||||
session.putAttribute(flowFile, "record.error.message", (c.getLocalizedMessage() != null) ? c.getLocalizedMessage() : c.getClass().getCanonicalName() + " Thrown");
|
||||
} else {
|
||||
session.putAttribute(flowFile, "record.error.message", e.getClass().getCanonicalName() + " Thrown");
|
||||
}
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
attributes.put(ATTR_OUTPUT_TABLE, tableName);
|
||||
flowFile = session.putAllAttributes(flowFile, attributes);
|
||||
session.getProvenanceReporter().invokeRemoteProcess(flowFile, dbcpService.getConnectionURL());
|
||||
session.transfer(flowFile, REL_SUCCESS);
|
||||
}
|
||||
} catch (IOException | SQLException e) {
|
||||
|
||||
flowFile = session.putAttribute(flowFile, ATTR_OUTPUT_TABLE, tableName);
|
||||
log.error("Exception while processing {} - routing to failure", new Object[]{flowFile}, e);
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
|
||||
} catch (DiscontinuedException e) {
|
||||
// The input FlowFile processing is discontinued. Keep it in the input queue.
|
||||
getLogger().warn("Discontinued processing for {} due to {}", new Object[]{flowFile, e}, e);
|
||||
session.transfer(flowFile, Relationship.SELF);
|
||||
} catch (Throwable t) {
|
||||
throw (t instanceof ProcessException) ? (ProcessException) t : new ProcessException(t);
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized OutputMetadataHolder checkAndUpdateTableSchema(Map<String,String> attributes, final Connection conn, final RecordSchema schema,
|
||||
final String tableName, List<String> partitionClause, final boolean createIfNotExists,
|
||||
final String externalTableLocation, final String storageFormat, final boolean updateFieldNames) throws IOException {
|
||||
// Read in the current table metadata, compare it to the reader's schema, and
|
||||
// add any columns from the schema that are missing in the table
|
||||
try (Statement s = conn.createStatement()) {
|
||||
// Determine whether the table exists
|
||||
ResultSet tables = s.executeQuery("SHOW TABLES");
|
||||
List<String> tableNames = new ArrayList<>();
|
||||
String hiveTableName;
|
||||
while (tables.next() && StringUtils.isNotEmpty(hiveTableName = tables.getString(1))) {
|
||||
tableNames.add(hiveTableName);
|
||||
}
|
||||
|
||||
List<String> columnsToAdd = new ArrayList<>();
|
||||
String outputPath;
|
||||
boolean tableCreated = false;
|
||||
if (!tableNames.contains(tableName) && createIfNotExists) {
|
||||
StringBuilder createTableStatement = new StringBuilder();
|
||||
for (RecordField recordField : schema.getFields()) {
|
||||
String recordFieldName = recordField.getFieldName();
|
||||
// The field does not exist in the table, add it
|
||||
columnsToAdd.add("`" + recordFieldName + "` " + getHiveTypeFromFieldType(recordField.getDataType(), true));
|
||||
getLogger().debug("Adding column " + recordFieldName + " to table " + tableName);
|
||||
}
|
||||
|
||||
// Handle partition clause
|
||||
if (partitionClause == null) {
|
||||
partitionClause = Collections.emptyList();
|
||||
}
|
||||
List<String> validatedPartitionClause = new ArrayList<>(partitionClause.size());
|
||||
for (String partition : partitionClause) {
|
||||
String[] partitionInfo = partition.split(" ");
|
||||
if (partitionInfo.length != 2) {
|
||||
validatedPartitionClause.add("`" + partitionInfo[0] + "` string");
|
||||
} else {
|
||||
validatedPartitionClause.add("`" + partitionInfo[0] + "` " + partitionInfo[1]);
|
||||
}
|
||||
}
|
||||
|
||||
createTableStatement.append("CREATE ")
|
||||
.append(externalTableLocation == null ? "" : "EXTERNAL ")
|
||||
.append("TABLE IF NOT EXISTS `")
|
||||
.append(tableName)
|
||||
.append("` (")
|
||||
.append(String.join(", ", columnsToAdd))
|
||||
.append(") ")
|
||||
.append(validatedPartitionClause.isEmpty() ? "" : "PARTITIONED BY (" + String.join(", ", validatedPartitionClause) + ") ")
|
||||
.append("STORED AS ")
|
||||
.append(storageFormat)
|
||||
.append(externalTableLocation == null ? "" : " LOCATION '" + externalTableLocation + "'");
|
||||
|
||||
String createTableSql = createTableStatement.toString();
|
||||
|
||||
if (StringUtils.isNotEmpty(createTableSql)) {
|
||||
// Perform the table create
|
||||
getLogger().info("Executing Hive DDL: " + createTableSql);
|
||||
s.execute(createTableSql);
|
||||
}
|
||||
|
||||
tableCreated = true;
|
||||
}
|
||||
|
||||
// Process the table (columns, partitions, location, etc.)
|
||||
List<String> hiveColumns = new ArrayList<>();
|
||||
|
||||
String describeTable = "DESC FORMATTED `" + tableName + "`";
|
||||
ResultSet tableInfo = s.executeQuery(describeTable);
|
||||
// Result is 3 columns, col_name, data_type, comment. Check the first row for a header and skip if so, otherwise add column name
|
||||
tableInfo.next();
|
||||
String columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) {
|
||||
hiveColumns.add(columnName);
|
||||
}
|
||||
// If the column was a header, check for a blank line to follow and skip it, otherwise add the column name
|
||||
if (columnName.startsWith("#")) {
|
||||
tableInfo.next();
|
||||
columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName)) {
|
||||
hiveColumns.add(columnName);
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all column names
|
||||
while (tableInfo.next() && StringUtils.isNotEmpty(columnName = tableInfo.getString(1))) {
|
||||
hiveColumns.add(columnName);
|
||||
}
|
||||
|
||||
// Collect all partition columns
|
||||
boolean moreRows = true;
|
||||
boolean headerFound = false;
|
||||
while (moreRows && !headerFound) {
|
||||
String line = tableInfo.getString(1);
|
||||
if ("# Partition Information".equals(line)) {
|
||||
headerFound = true;
|
||||
} else if ("# Detailed Table Information".equals(line)) {
|
||||
// Not partitioned, exit the loop with headerFound = false
|
||||
break;
|
||||
}
|
||||
moreRows = tableInfo.next();
|
||||
}
|
||||
|
||||
List<String> partitionColumns = new ArrayList<>();
|
||||
List<String> partitionColumnsEqualsValueList = new ArrayList<>();
|
||||
List<String> partitionColumnsLocationList = new ArrayList<>();
|
||||
if (headerFound) {
|
||||
// If the table is partitioned, construct the partition=value strings for each partition column
|
||||
String partitionColumnName;
|
||||
columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName) && !columnName.startsWith("#")) {
|
||||
partitionColumns.add(columnName);
|
||||
}
|
||||
// If the column was a header, check for a blank line to follow and skip it, otherwise add the column name
|
||||
if (columnName.startsWith("#")) {
|
||||
tableInfo.next();
|
||||
columnName = tableInfo.getString(1);
|
||||
if (StringUtils.isNotEmpty(columnName)) {
|
||||
partitionColumns.add(columnName);
|
||||
}
|
||||
}
|
||||
while (tableInfo.next() && StringUtils.isNotEmpty(partitionColumnName = tableInfo.getString(1))) {
|
||||
partitionColumns.add(partitionColumnName);
|
||||
}
|
||||
|
||||
final int partitionColumnsSize = partitionColumns.size();
|
||||
final int partitionClauseSize = (partitionClause == null) ? 0 : partitionClause.size();
|
||||
if (partitionClauseSize != partitionColumnsSize) {
|
||||
throw new IOException("Found " + partitionColumnsSize + " partition columns but " + partitionClauseSize + " partition values were supplied");
|
||||
}
|
||||
|
||||
for (int i = 0; i < partitionClauseSize; i++) {
|
||||
String partitionName = partitionClause.get(i).split(" ")[0];
|
||||
String partitionValue = attributes.get(partitionName);
|
||||
if (StringUtils.isEmpty(partitionValue)) {
|
||||
throw new IOException("No value found for partition value attribute '" + partitionName + "'");
|
||||
}
|
||||
if (!partitionColumns.contains(partitionName)) {
|
||||
throw new IOException("Cannot add partition '" + partitionName + "' to existing table");
|
||||
}
|
||||
partitionColumnsEqualsValueList.add("`" + partitionName + "`='" + partitionValue + "'");
|
||||
// Add unquoted version for the output path
|
||||
partitionColumnsLocationList.add(partitionName + "=" + partitionValue);
|
||||
}
|
||||
}
|
||||
|
||||
// Get table location
|
||||
moreRows = true;
|
||||
headerFound = false;
|
||||
while (moreRows && !headerFound) {
|
||||
String line = tableInfo.getString(1);
|
||||
if (line.startsWith("Location:")) {
|
||||
headerFound = true;
|
||||
continue; // Don't do a next() here, need to get the second column value
|
||||
}
|
||||
moreRows = tableInfo.next();
|
||||
}
|
||||
String tableLocation = tableInfo.getString(2);
|
||||
|
||||
String alterTableSql;
|
||||
// If the table wasn't newly created, alter it accordingly
|
||||
if (!tableCreated) {
|
||||
StringBuilder alterTableStatement = new StringBuilder();
|
||||
// Handle new columns
|
||||
for (RecordField recordField : schema.getFields()) {
|
||||
String recordFieldName = recordField.getFieldName().toLowerCase();
|
||||
if (!hiveColumns.contains(recordFieldName) && !partitionColumns.contains(recordFieldName)) {
|
||||
// The field does not exist in the table (and is not a partition column), add it
|
||||
columnsToAdd.add("`" + recordFieldName + "` " + getHiveTypeFromFieldType(recordField.getDataType(), true));
|
||||
getLogger().info("Adding column " + recordFieldName + " to table " + tableName);
|
||||
}
|
||||
}
|
||||
|
||||
if (!columnsToAdd.isEmpty()) {
|
||||
alterTableStatement.append("ALTER TABLE `")
|
||||
.append(tableName)
|
||||
.append("` ADD COLUMNS (")
|
||||
.append(String.join(", ", columnsToAdd))
|
||||
.append(")");
|
||||
|
||||
alterTableSql = alterTableStatement.toString();
|
||||
if (StringUtils.isNotEmpty(alterTableSql)) {
|
||||
// Perform the table update
|
||||
getLogger().info("Executing Hive DDL: " + alterTableSql);
|
||||
s.execute(alterTableSql);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outputPath = tableLocation;
|
||||
|
||||
// Handle new partition values
|
||||
if (!partitionColumnsEqualsValueList.isEmpty()) {
|
||||
alterTableSql = "ALTER TABLE `" +
|
||||
tableName +
|
||||
"` ADD IF NOT EXISTS PARTITION (" +
|
||||
String.join(", ", partitionColumnsEqualsValueList) +
|
||||
")";
|
||||
if (StringUtils.isNotEmpty(alterTableSql)) {
|
||||
// Perform the table update
|
||||
getLogger().info("Executing Hive DDL: " + alterTableSql);
|
||||
s.execute(alterTableSql);
|
||||
}
|
||||
// Add attribute for HDFS location of the partition values
|
||||
outputPath = tableLocation + "/" + String.join("/", partitionColumnsLocationList);
|
||||
}
|
||||
|
||||
// If updating field names, return a new RecordSchema, otherwise return null
|
||||
OutputMetadataHolder outputMetadataHolder;
|
||||
if (updateFieldNames) {
|
||||
List<RecordField> inputRecordFields = schema.getFields();
|
||||
List<RecordField> outputRecordFields = new ArrayList<>();
|
||||
Map<String,String> fieldMap = new HashMap<>();
|
||||
boolean needsUpdating = false;
|
||||
|
||||
for (RecordField inputRecordField : inputRecordFields) {
|
||||
final String inputRecordFieldName = inputRecordField.getFieldName();
|
||||
boolean found = false;
|
||||
for (String hiveColumnName : hiveColumns) {
|
||||
if (inputRecordFieldName.equalsIgnoreCase(hiveColumnName)) {
|
||||
// Set a flag if the field name doesn't match the column name exactly. This overall flag will determine whether
|
||||
// the records need updating (if true) or not (if false)
|
||||
if (!inputRecordFieldName.equals(hiveColumnName)) {
|
||||
needsUpdating = true;
|
||||
}
|
||||
fieldMap.put(inputRecordFieldName, hiveColumnName);
|
||||
outputRecordFields.add(new RecordField(hiveColumnName, inputRecordField.getDataType(), inputRecordField.getDefaultValue(), inputRecordField.isNullable()));
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
// If the input field wasn't a Hive table column, add it back to the schema as-is
|
||||
fieldMap.put(inputRecordFieldName, inputRecordFieldName);
|
||||
}
|
||||
}
|
||||
outputMetadataHolder = needsUpdating ? new OutputMetadataHolder(new SimpleRecordSchema(outputRecordFields), fieldMap)
|
||||
: null;
|
||||
} else {
|
||||
outputMetadataHolder = null;
|
||||
}
|
||||
attributes.put(ATTR_OUTPUT_PATH, outputPath);
|
||||
return outputMetadataHolder;
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static String getHiveTypeFromFieldType(DataType rawDataType, boolean hiveFieldNames) {
|
||||
if (rawDataType == null) {
|
||||
throw new IllegalArgumentException("Field type is null");
|
||||
}
|
||||
RecordFieldType dataType = rawDataType.getFieldType();
|
||||
|
||||
if (RecordFieldType.INT.equals(dataType)) {
|
||||
return "INT";
|
||||
}
|
||||
if (RecordFieldType.LONG.equals(dataType)) {
|
||||
return "BIGINT";
|
||||
}
|
||||
if (RecordFieldType.BOOLEAN.equals(dataType)) {
|
||||
return "BOOLEAN";
|
||||
}
|
||||
if (RecordFieldType.DOUBLE.equals(dataType)) {
|
||||
return "DOUBLE";
|
||||
}
|
||||
if (RecordFieldType.FLOAT.equals(dataType)) {
|
||||
return "FLOAT";
|
||||
}
|
||||
if (RecordFieldType.DECIMAL.equals(dataType)) {
|
||||
return "DECIMAL";
|
||||
}
|
||||
if (RecordFieldType.STRING.equals(dataType) || RecordFieldType.ENUM.equals(dataType)) {
|
||||
return "STRING";
|
||||
}
|
||||
if (RecordFieldType.DATE.equals(dataType)) {
|
||||
return "DATE";
|
||||
}
|
||||
if (RecordFieldType.TIME.equals(dataType)) {
|
||||
return "INT";
|
||||
}
|
||||
if (RecordFieldType.TIMESTAMP.equals(dataType)) {
|
||||
return "TIMESTAMP";
|
||||
}
|
||||
if (RecordFieldType.ARRAY.equals(dataType)) {
|
||||
ArrayDataType arrayDataType = (ArrayDataType) rawDataType;
|
||||
if (RecordFieldType.BYTE.getDataType().equals(arrayDataType.getElementType())) {
|
||||
return "BINARY";
|
||||
}
|
||||
return "ARRAY<" + getHiveTypeFromFieldType(arrayDataType.getElementType(), hiveFieldNames) + ">";
|
||||
}
|
||||
if (RecordFieldType.MAP.equals(dataType)) {
|
||||
MapDataType mapDataType = (MapDataType) rawDataType;
|
||||
return "MAP<STRING, " + getHiveTypeFromFieldType(mapDataType.getValueType(), hiveFieldNames) + ">";
|
||||
}
|
||||
if (RecordFieldType.CHOICE.equals(dataType)) {
|
||||
ChoiceDataType choiceDataType = (ChoiceDataType) rawDataType;
|
||||
List<DataType> unionFieldSchemas = choiceDataType.getPossibleSubTypes();
|
||||
|
||||
if (unionFieldSchemas != null) {
|
||||
// Ignore null types in union
|
||||
List<String> hiveFields = unionFieldSchemas.stream()
|
||||
.map((it) -> getHiveTypeFromFieldType(it, hiveFieldNames))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// Flatten the field if the union only has one non-null element
|
||||
return (hiveFields.size() == 1)
|
||||
? hiveFields.get(0)
|
||||
: "UNIONTYPE<" + org.apache.commons.lang3.StringUtils.join(hiveFields, ", ") + ">";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (RecordFieldType.RECORD.equals(dataType)) {
|
||||
RecordDataType recordDataType = (RecordDataType) rawDataType;
|
||||
List<RecordField> recordFields = recordDataType.getChildSchema().getFields();
|
||||
if (recordFields != null) {
|
||||
List<String> hiveFields = recordFields.stream().map(
|
||||
recordField -> ("`" + (hiveFieldNames ? recordField.getFieldName().toLowerCase() : recordField.getFieldName()) + "`:"
|
||||
+ getHiveTypeFromFieldType(recordField.getDataType(), hiveFieldNames))).collect(Collectors.toList());
|
||||
return "STRUCT<" + org.apache.commons.lang3.StringUtils.join(hiveFields, ", ") + ">";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
throw new IllegalArgumentException("Error converting Avro type " + dataType.name() + " to Hive type");
|
||||
}
|
||||
|
||||
private synchronized WriteResult updateRecords(final RecordSchema inputRecordSchema, final OutputMetadataHolder outputMetadataHolder,
|
||||
final RecordReader reader, final RecordSetWriter writer) throws IOException {
|
||||
try {
|
||||
writer.beginRecordSet();
|
||||
Record inputRecord;
|
||||
while((inputRecord = reader.nextRecord()) != null) {
|
||||
List<RecordField> inputRecordFields = inputRecordSchema.getFields();
|
||||
Map<String,Object> outputRecordFields = new HashMap<>(inputRecordFields.size());
|
||||
// Copy values from input field name to output field name
|
||||
for(Map.Entry<String,String> mapping : outputMetadataHolder.getFieldMap().entrySet()) {
|
||||
outputRecordFields.put(mapping.getValue(), inputRecord.getValue(mapping.getKey()));
|
||||
}
|
||||
Record outputRecord = new MapRecord(outputMetadataHolder.getOutputSchema(), outputRecordFields);
|
||||
writer.write(outputRecord);
|
||||
}
|
||||
return writer.finishRecordSet();
|
||||
|
||||
} catch (MalformedRecordException mre) {
|
||||
throw new IOException("Error reading records: "+mre.getMessage(), mre);
|
||||
}
|
||||
}
|
||||
|
||||
private static class OutputMetadataHolder {
|
||||
private final RecordSchema outputSchema;
|
||||
private final Map<String,String> fieldMap;
|
||||
|
||||
public OutputMetadataHolder(RecordSchema outputSchema, Map<String, String> fieldMap) {
|
||||
this.outputSchema = outputSchema;
|
||||
this.fieldMap = fieldMap;
|
||||
}
|
||||
|
||||
public RecordSchema getOutputSchema() {
|
||||
return outputSchema;
|
||||
}
|
||||
|
||||
public Map<String, String> getFieldMap() {
|
||||
return fieldMap;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
public class AuthenticationFailedException extends Exception {
|
||||
public AuthenticationFailedException(String reason, Exception cause) {
|
||||
super(reason, cause);
|
||||
}
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
public class CsvOutputOptions {
|
||||
|
||||
private boolean header = true;
|
||||
private String altHeader = null;
|
||||
private String delimiter = ",";
|
||||
private boolean quote = false;
|
||||
private boolean escape = true;
|
||||
|
||||
private int maxRowsPerFlowFile = 0;
|
||||
|
||||
public boolean isHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public String getAltHeader() {
|
||||
return altHeader;
|
||||
}
|
||||
|
||||
|
||||
public String getDelimiter() {
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
|
||||
public boolean isQuote() {
|
||||
return quote;
|
||||
}
|
||||
|
||||
public boolean isEscape() {
|
||||
return escape;
|
||||
}
|
||||
|
||||
public int getMaxRowsPerFlowFile() {
|
||||
return maxRowsPerFlowFile;
|
||||
}
|
||||
|
||||
public CsvOutputOptions(boolean header, String altHeader, String delimiter, boolean quote, boolean escape, int maxRowsPerFlowFile) {
|
||||
this.header = header;
|
||||
this.altHeader = altHeader;
|
||||
this.delimiter = delimiter;
|
||||
this.quote = quote;
|
||||
this.escape = escape;
|
||||
this.maxRowsPerFlowFile = maxRowsPerFlowFile;
|
||||
}
|
||||
}
|
|
@ -1,136 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.hadoop.KerberosProperties;
|
||||
import org.apache.nifi.hadoop.SecurityUtil;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.security.krb.KerberosUser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
public class HiveConfigurator {
|
||||
|
||||
public Collection<ValidationResult> validate(String configFiles, String principal, String keyTab, String password,
|
||||
AtomicReference<ValidationResources> validationResourceHolder, ComponentLog log) {
|
||||
|
||||
final List<ValidationResult> problems = new ArrayList<>();
|
||||
ValidationResources resources = validationResourceHolder.get();
|
||||
|
||||
// if no resources in the holder, or if the holder has different resources loaded,
|
||||
// then load the Configuration and set the new resources in the holder
|
||||
if (resources == null || !configFiles.equals(resources.getConfigResources())) {
|
||||
log.debug("Reloading validation resources");
|
||||
resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles));
|
||||
validationResourceHolder.set(resources);
|
||||
}
|
||||
|
||||
final Configuration hiveConfig = resources.getConfiguration();
|
||||
|
||||
problems.addAll(KerberosProperties.validatePrincipalWithKeytabOrPassword(this.getClass().getSimpleName(), hiveConfig, principal, keyTab, password, log));
|
||||
|
||||
return problems;
|
||||
}
|
||||
|
||||
public HiveConf getConfigurationFromFiles(final String configFiles) {
|
||||
final HiveConf hiveConfig = new HiveConf();
|
||||
if (StringUtils.isNotBlank(configFiles)) {
|
||||
for (final String configFile : configFiles.split(",")) {
|
||||
hiveConfig.addResource(new Path(configFile.trim()));
|
||||
}
|
||||
}
|
||||
return hiveConfig;
|
||||
}
|
||||
|
||||
public void preload(Configuration configuration) {
|
||||
try {
|
||||
FileSystem.get(configuration).close();
|
||||
UserGroupInformation.setConfiguration(configuration);
|
||||
} catch (IOException ioe) {
|
||||
// Suppress exception as future uses of this configuration will fail
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquires a {@link UserGroupInformation} using the given {@link Configuration} and {@link KerberosUser}.
|
||||
* @see SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser)
|
||||
* @param hiveConfig The Configuration to apply to the acquired UserGroupInformation
|
||||
* @param kerberosUser The KerberosUser to authenticate
|
||||
* @return A UserGroupInformation instance created using the Subject of the given KerberosUser
|
||||
* @throws AuthenticationFailedException if authentication fails
|
||||
*/
|
||||
public UserGroupInformation authenticate(final Configuration hiveConfig, KerberosUser kerberosUser) throws AuthenticationFailedException {
|
||||
try {
|
||||
return SecurityUtil.getUgiForKerberosUser(hiveConfig, kerberosUser);
|
||||
} catch (IOException ioe) {
|
||||
throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* As of Apache NiFi 1.5.0, due to changes made to
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this
|
||||
* class to authenticate a principal with Kerberos, Hive controller services no longer
|
||||
* attempt relogins explicitly. For more information, please read the documentation for
|
||||
* {@link SecurityUtil#loginKerberos(Configuration, String, String)}.
|
||||
* <p/>
|
||||
* In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by
|
||||
* {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive
|
||||
* controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions
|
||||
* with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same
|
||||
* {@link UserGroupInformation} instance. One of these threads could leave the
|
||||
* {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state
|
||||
* while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed
|
||||
* authentication attempts that would leave the Hive controller service in an unrecoverable state.
|
||||
*
|
||||
* @see SecurityUtil#loginKerberos(Configuration, String, String)
|
||||
* @deprecated Use {@link SecurityUtil#getUgiForKerberosUser(Configuration, KerberosUser)}
|
||||
*/
|
||||
@Deprecated
|
||||
public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab) throws AuthenticationFailedException {
|
||||
UserGroupInformation ugi;
|
||||
try {
|
||||
ugi = SecurityUtil.loginKerberos(hiveConfig, principal, keyTab);
|
||||
} catch (IOException ioe) {
|
||||
throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe);
|
||||
}
|
||||
return ugi;
|
||||
}
|
||||
|
||||
/**
|
||||
* As of Apache NiFi 1.5.0, this method has been deprecated and is now a wrapper
|
||||
* method which invokes {@link HiveConfigurator#authenticate(Configuration, String, String)}. It will no longer start a
|
||||
* {@link org.apache.nifi.hadoop.KerberosTicketRenewer} to perform explicit relogins.
|
||||
*
|
||||
* @see HiveConfigurator#authenticate(Configuration, String, String)
|
||||
*/
|
||||
@Deprecated
|
||||
public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab, long ticketRenewalPeriod) throws AuthenticationFailedException {
|
||||
return authenticate(hiveConfig, principal, keyTab);
|
||||
}
|
||||
}
|
|
@ -1,462 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.SchemaBuilder;
|
||||
import org.apache.avro.SchemaBuilder.FieldAssembler;
|
||||
import org.apache.avro.file.DataFileWriter;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.io.DatumWriter;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.text.StringEscapeUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.ResultSetMetaData;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static java.sql.Types.ARRAY;
|
||||
import static java.sql.Types.BIGINT;
|
||||
import static java.sql.Types.BINARY;
|
||||
import static java.sql.Types.BIT;
|
||||
import static java.sql.Types.BLOB;
|
||||
import static java.sql.Types.BOOLEAN;
|
||||
import static java.sql.Types.CHAR;
|
||||
import static java.sql.Types.CLOB;
|
||||
import static java.sql.Types.DATE;
|
||||
import static java.sql.Types.DECIMAL;
|
||||
import static java.sql.Types.DOUBLE;
|
||||
import static java.sql.Types.FLOAT;
|
||||
import static java.sql.Types.INTEGER;
|
||||
import static java.sql.Types.JAVA_OBJECT;
|
||||
import static java.sql.Types.LONGNVARCHAR;
|
||||
import static java.sql.Types.LONGVARBINARY;
|
||||
import static java.sql.Types.LONGVARCHAR;
|
||||
import static java.sql.Types.NCHAR;
|
||||
import static java.sql.Types.NUMERIC;
|
||||
import static java.sql.Types.NVARCHAR;
|
||||
import static java.sql.Types.OTHER;
|
||||
import static java.sql.Types.REAL;
|
||||
import static java.sql.Types.ROWID;
|
||||
import static java.sql.Types.SMALLINT;
|
||||
import static java.sql.Types.SQLXML;
|
||||
import static java.sql.Types.STRUCT;
|
||||
import static java.sql.Types.TIME;
|
||||
import static java.sql.Types.TIMESTAMP;
|
||||
import static java.sql.Types.TINYINT;
|
||||
import static java.sql.Types.VARBINARY;
|
||||
import static java.sql.Types.VARCHAR;
|
||||
|
||||
/**
|
||||
* JDBC / HiveQL common functions.
|
||||
*/
|
||||
public class HiveJdbcCommon {
|
||||
|
||||
public static final String AVRO = "Avro";
|
||||
public static final String CSV = "CSV";
|
||||
|
||||
public static final String MIME_TYPE_AVRO_BINARY = "application/avro-binary";
|
||||
public static final String CSV_MIME_TYPE = "text/csv";
|
||||
|
||||
|
||||
public static final PropertyDescriptor NORMALIZE_NAMES_FOR_AVRO = new PropertyDescriptor.Builder()
|
||||
.name("hive-normalize-avro")
|
||||
.displayName("Normalize Table/Column Names")
|
||||
.description("Whether to change non-Avro-compatible characters in column names to Avro-compatible characters. For example, colons and periods "
|
||||
+ "will be changed to underscores in order to build a valid Avro record.")
|
||||
.allowableValues("true", "false")
|
||||
.defaultValue("false")
|
||||
.required(true)
|
||||
.build();
|
||||
|
||||
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final int maxRows, boolean convertNames) throws SQLException, IOException {
|
||||
return convertToAvroStream(rs, outStream, null, maxRows, convertNames, null);
|
||||
}
|
||||
|
||||
|
||||
public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, String recordName, final int maxRows, boolean convertNames, ResultSetRowCallback callback)
|
||||
throws SQLException, IOException {
|
||||
final Schema schema = createSchema(rs, recordName, convertNames);
|
||||
final GenericRecord rec = new GenericData.Record(schema);
|
||||
|
||||
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
|
||||
try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
|
||||
dataFileWriter.create(schema, outStream);
|
||||
|
||||
final ResultSetMetaData meta = rs.getMetaData();
|
||||
final int nrOfColumns = meta.getColumnCount();
|
||||
long nrOfRows = 0;
|
||||
while (rs.next()) {
|
||||
if (callback != null) {
|
||||
callback.processRow(rs);
|
||||
}
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
final int javaSqlType = meta.getColumnType(i);
|
||||
Object value = rs.getObject(i);
|
||||
|
||||
if (value == null) {
|
||||
rec.put(i - 1, null);
|
||||
|
||||
} else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == BLOB || javaSqlType == CLOB) {
|
||||
// bytes requires little bit different handling
|
||||
ByteBuffer bb = null;
|
||||
if (value instanceof byte[]) {
|
||||
bb = ByteBuffer.wrap((byte[]) value);
|
||||
} else if (value instanceof ByteBuffer) {
|
||||
bb = (ByteBuffer) value;
|
||||
}
|
||||
if (bb != null) {
|
||||
rec.put(i - 1, bb);
|
||||
} else {
|
||||
throw new IOException("Could not process binary object of type " + value.getClass().getName());
|
||||
}
|
||||
|
||||
} else if (value instanceof Byte) {
|
||||
// tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT
|
||||
// But value is returned by JDBC as java.lang.Byte
|
||||
// (at least H2 JDBC works this way)
|
||||
// direct put to avro record results:
|
||||
// org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte
|
||||
rec.put(i - 1, ((Byte) value).intValue());
|
||||
|
||||
} else if (value instanceof BigDecimal || value instanceof BigInteger) {
|
||||
// Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38"
|
||||
rec.put(i - 1, value.toString());
|
||||
|
||||
} else if (value instanceof Number) {
|
||||
// Need to call the right getXYZ() method (instead of the getObject() method above), since Doubles are sometimes returned
|
||||
// when the JDBC type is 6 (Float) for example.
|
||||
if (javaSqlType == FLOAT) {
|
||||
value = rs.getFloat(i);
|
||||
} else if (javaSqlType == DOUBLE) {
|
||||
value = rs.getDouble(i);
|
||||
} else if (javaSqlType == INTEGER || javaSqlType == TINYINT || javaSqlType == SMALLINT) {
|
||||
value = rs.getInt(i);
|
||||
}
|
||||
|
||||
rec.put(i - 1, value);
|
||||
|
||||
} else if (value instanceof Boolean) {
|
||||
rec.put(i - 1, value);
|
||||
} else if (value instanceof java.sql.SQLXML) {
|
||||
rec.put(i - 1, ((java.sql.SQLXML) value).getString());
|
||||
} else {
|
||||
// The different types that we support are numbers (int, long, double, float),
|
||||
// as well as boolean values and Strings. Since Avro doesn't provide
|
||||
// timestamp types, we want to convert those to Strings. So we will cast anything other
|
||||
// than numbers or booleans to strings by using the toString() method.
|
||||
rec.put(i - 1, value.toString());
|
||||
}
|
||||
}
|
||||
dataFileWriter.append(rec);
|
||||
nrOfRows += 1;
|
||||
|
||||
if (maxRows > 0 && nrOfRows == maxRows)
|
||||
break;
|
||||
}
|
||||
|
||||
return nrOfRows;
|
||||
}
|
||||
}
|
||||
|
||||
public static Schema createSchema(final ResultSet rs, boolean convertNames) throws SQLException {
|
||||
return createSchema(rs, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an Avro schema from a result set. If the table/record name is known a priori and provided, use that as a
|
||||
* fallback for the record name if it cannot be retrieved from the result set, and finally fall back to a default value.
|
||||
*
|
||||
* @param rs The result set to convert to Avro
|
||||
* @param recordName The a priori record name to use if it cannot be determined from the result set.
|
||||
* @param convertNames Whether to convert column/table names to be legal Avro names
|
||||
* @return A Schema object representing the result set converted to an Avro record
|
||||
* @throws SQLException if any error occurs during conversion
|
||||
*/
|
||||
public static Schema createSchema(final ResultSet rs, String recordName, boolean convertNames) throws SQLException {
|
||||
final ResultSetMetaData meta = rs.getMetaData();
|
||||
final int nrOfColumns = meta.getColumnCount();
|
||||
String tableName = StringUtils.isEmpty(recordName) ? "NiFi_SelectHiveQL_Record" : recordName;
|
||||
try {
|
||||
if (nrOfColumns > 0) {
|
||||
// Hive JDBC doesn't support getTableName, instead it returns table.column for column name. Grab the table name from the first column
|
||||
String firstColumnNameFromMeta = meta.getColumnName(1);
|
||||
int tableNameDelimiter = firstColumnNameFromMeta.lastIndexOf(".");
|
||||
if (tableNameDelimiter > -1) {
|
||||
String tableNameFromMeta = firstColumnNameFromMeta.substring(0, tableNameDelimiter);
|
||||
if (!StringUtils.isBlank(tableNameFromMeta)) {
|
||||
tableName = tableNameFromMeta;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (SQLException se) {
|
||||
// Not all drivers support getTableName, so just use the previously-set default
|
||||
}
|
||||
|
||||
if (convertNames) {
|
||||
tableName = normalizeNameForAvro(tableName);
|
||||
}
|
||||
final FieldAssembler<Schema> builder = SchemaBuilder.record(tableName).namespace("any.data").fields();
|
||||
|
||||
/**
|
||||
* Some missing Avro types - Decimal, Date types. May need some additional work.
|
||||
*/
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
String columnNameFromMeta = meta.getColumnName(i);
|
||||
// Hive returns table.column for column name. Grab the column name as the string after the last period
|
||||
int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
|
||||
String columnName = columnNameFromMeta.substring(columnNameDelimiter + 1);
|
||||
switch (meta.getColumnType(i)) {
|
||||
case CHAR:
|
||||
case LONGNVARCHAR:
|
||||
case LONGVARCHAR:
|
||||
case NCHAR:
|
||||
case NVARCHAR:
|
||||
case VARCHAR:
|
||||
case ARRAY:
|
||||
case STRUCT:
|
||||
case JAVA_OBJECT:
|
||||
case OTHER:
|
||||
case SQLXML:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case BIT:
|
||||
case BOOLEAN:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case INTEGER:
|
||||
// Default to signed type unless otherwise noted. Some JDBC drivers don't implement isSigned()
|
||||
boolean signedType = true;
|
||||
try {
|
||||
signedType = meta.isSigned(i);
|
||||
} catch (SQLException se) {
|
||||
// Use signed types as default
|
||||
}
|
||||
if (signedType) {
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
|
||||
} else {
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
|
||||
}
|
||||
break;
|
||||
|
||||
case SMALLINT:
|
||||
case TINYINT:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case BIGINT:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
// java.sql.RowId is interface, is seems to be database
|
||||
// implementation specific, let's convert to String
|
||||
case ROWID:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case FLOAT:
|
||||
case REAL:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case DOUBLE:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
// Did not find direct suitable type, need to be clarified!!!!
|
||||
case DECIMAL:
|
||||
case NUMERIC:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
// Did not find direct suitable type, need to be clarified!!!!
|
||||
case DATE:
|
||||
case TIME:
|
||||
case TIMESTAMP:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
case BINARY:
|
||||
case VARBINARY:
|
||||
case LONGVARBINARY:
|
||||
case BLOB:
|
||||
case CLOB:
|
||||
builder.name(columnName).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault();
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type");
|
||||
}
|
||||
}
|
||||
|
||||
return builder.endRecord();
|
||||
}
|
||||
|
||||
public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, CsvOutputOptions outputOptions) throws SQLException, IOException {
|
||||
return convertToCsvStream(rs, outStream, null, null, outputOptions);
|
||||
}
|
||||
|
||||
public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, String recordName, ResultSetRowCallback callback, CsvOutputOptions outputOptions)
|
||||
throws SQLException, IOException {
|
||||
|
||||
final ResultSetMetaData meta = rs.getMetaData();
|
||||
final int nrOfColumns = meta.getColumnCount();
|
||||
List<String> columnNames = new ArrayList<>(nrOfColumns);
|
||||
|
||||
if (outputOptions.isHeader()) {
|
||||
if (outputOptions.getAltHeader() == null) {
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
String columnNameFromMeta = meta.getColumnName(i);
|
||||
// Hive returns table.column for column name. Grab the column name as the string after the last period
|
||||
int columnNameDelimiter = columnNameFromMeta.lastIndexOf(".");
|
||||
columnNames.add(columnNameFromMeta.substring(columnNameDelimiter + 1));
|
||||
}
|
||||
} else {
|
||||
String[] altHeaderNames = outputOptions.getAltHeader().split(",");
|
||||
columnNames = Arrays.asList(altHeaderNames);
|
||||
}
|
||||
}
|
||||
|
||||
// Write column names as header row
|
||||
outStream.write(StringUtils.join(columnNames, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
|
||||
if (outputOptions.isHeader()) {
|
||||
outStream.write("\n".getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
// Iterate over the rows
|
||||
int maxRows = outputOptions.getMaxRowsPerFlowFile();
|
||||
long nrOfRows = 0;
|
||||
while (rs.next()) {
|
||||
if (callback != null) {
|
||||
callback.processRow(rs);
|
||||
}
|
||||
List<String> rowValues = new ArrayList<>(nrOfColumns);
|
||||
for (int i = 1; i <= nrOfColumns; i++) {
|
||||
final int javaSqlType = meta.getColumnType(i);
|
||||
final Object value = rs.getObject(i);
|
||||
|
||||
switch (javaSqlType) {
|
||||
case CHAR:
|
||||
case LONGNVARCHAR:
|
||||
case LONGVARCHAR:
|
||||
case NCHAR:
|
||||
case NVARCHAR:
|
||||
case VARCHAR:
|
||||
String valueString = rs.getString(i);
|
||||
if (valueString != null) {
|
||||
// Removed extra quotes as those are a part of the escapeCsv when required.
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (outputOptions.isQuote()) {
|
||||
sb.append("\"");
|
||||
if (outputOptions.isEscape()) {
|
||||
sb.append(StringEscapeUtils.escapeCsv(valueString));
|
||||
} else {
|
||||
sb.append(valueString);
|
||||
}
|
||||
sb.append("\"");
|
||||
rowValues.add(sb.toString());
|
||||
} else {
|
||||
if (outputOptions.isEscape()) {
|
||||
rowValues.add(StringEscapeUtils.escapeCsv(valueString));
|
||||
} else {
|
||||
rowValues.add(valueString);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
break;
|
||||
case ARRAY:
|
||||
case STRUCT:
|
||||
case JAVA_OBJECT:
|
||||
String complexValueString = rs.getString(i);
|
||||
if (complexValueString != null) {
|
||||
rowValues.add(StringEscapeUtils.escapeCsv(complexValueString));
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
break;
|
||||
case SQLXML:
|
||||
if (value != null) {
|
||||
rowValues.add(StringEscapeUtils.escapeCsv(((java.sql.SQLXML) value).getString()));
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
default:
|
||||
if (value != null) {
|
||||
rowValues.add(value.toString());
|
||||
} else {
|
||||
rowValues.add("");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write row values
|
||||
outStream.write(StringUtils.join(rowValues, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8));
|
||||
outStream.write("\n".getBytes(StandardCharsets.UTF_8));
|
||||
nrOfRows++;
|
||||
|
||||
if (maxRows > 0 && nrOfRows == maxRows)
|
||||
break;
|
||||
}
|
||||
return nrOfRows;
|
||||
}
|
||||
|
||||
public static String normalizeNameForAvro(String inputName) {
|
||||
String normalizedName = inputName.replaceAll("[^A-Za-z0-9_]", "_");
|
||||
if (Character.isDigit(normalizedName.charAt(0))) {
|
||||
normalizedName = "_" + normalizedName;
|
||||
}
|
||||
return normalizedName;
|
||||
}
|
||||
|
||||
/**
|
||||
* An interface for callback methods which allows processing of a row during the convertToXYZStream() processing.
|
||||
* <b>IMPORTANT:</b> This method should only work on the row pointed at by the current ResultSet reference.
|
||||
* Advancing the cursor (e.g.) can cause rows to be skipped during Avro transformation.
|
||||
*/
|
||||
public interface ResultSetRowCallback {
|
||||
void processRow(ResultSet resultSet) throws IOException;
|
||||
}
|
||||
|
||||
public static Configuration getConfigurationFromFiles(final String configFiles) {
|
||||
final Configuration hiveConfig = new HiveConf();
|
||||
if (StringUtils.isNotBlank(configFiles)) {
|
||||
for (final String configFile : configFiles.split(",")) {
|
||||
hiveConfig.addResource(new Path(configFile.trim()));
|
||||
}
|
||||
}
|
||||
return hiveConfig;
|
||||
}
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* A helper class for maintaining loaded configurations (to avoid reloading on use unless necessary)
|
||||
*/
|
||||
public class ValidationResources {
|
||||
|
||||
private final String configResources;
|
||||
private final Configuration configuration;
|
||||
|
||||
public ValidationResources(String configResources, Configuration configuration) {
|
||||
this.configResources = configResources;
|
||||
this.configuration = configuration;
|
||||
}
|
||||
|
||||
public String getConfigResources() {
|
||||
return configResources;
|
||||
}
|
||||
|
||||
public Configuration getConfiguration() {
|
||||
return configuration;
|
||||
}
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.dbcp.hive.Hive_1_1ConnectionPool
|
|
@ -1,17 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.processors.hive.SelectHive_1_1QL
|
||||
org.apache.nifi.processors.hive.PutHive_1_1QL
|
||||
org.apache.nifi.processors.hive.UpdateHive_1_1Table
|
|
@ -1,184 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.nifi.dbcp.hive;
|
||||
|
||||
import org.apache.commons.dbcp2.BasicDataSource;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.kerberos.KerberosCredentialsService;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.registry.VariableDescriptor;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.util.MockConfigurationContext;
|
||||
import org.apache.nifi.util.MockControllerServiceLookup;
|
||||
import org.apache.nifi.util.MockVariableRegistry;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.UndeclaredThrowableException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.mockito.ArgumentMatchers.isA;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
public class Hive_1_1ConnectionPoolTest {
|
||||
private UserGroupInformation userGroupInformation;
|
||||
private Hive_1_1ConnectionPool hiveConnectionPool;
|
||||
private BasicDataSource basicDataSource;
|
||||
private ComponentLog componentLog;
|
||||
private File krb5conf = new File("src/test/resources/krb5.conf");
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws Exception {
|
||||
// have to initialize this system property before anything else
|
||||
System.setProperty("java.security.krb5.conf", krb5conf.getAbsolutePath());
|
||||
System.setProperty("java.security.krb5.realm", "nifi.com");
|
||||
System.setProperty("java.security.krb5.kdc", "nifi.kdc");
|
||||
|
||||
userGroupInformation = mock(UserGroupInformation.class);
|
||||
basicDataSource = mock(BasicDataSource.class);
|
||||
componentLog = mock(ComponentLog.class);
|
||||
|
||||
when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> {
|
||||
try {
|
||||
return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run();
|
||||
} catch (IOException | Error | RuntimeException | InterruptedException e) {
|
||||
throw e;
|
||||
} catch (Throwable e) {
|
||||
throw new UndeclaredThrowableException(e);
|
||||
}
|
||||
});
|
||||
|
||||
initPool();
|
||||
}
|
||||
|
||||
private void initPool() throws Exception {
|
||||
hiveConnectionPool = new Hive_1_1ConnectionPool();
|
||||
|
||||
Field ugiField = Hive_1_1ConnectionPool.class.getDeclaredField("ugi");
|
||||
ugiField.setAccessible(true);
|
||||
ugiField.set(hiveConnectionPool, userGroupInformation);
|
||||
|
||||
Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource");
|
||||
dataSourceField.setAccessible(true);
|
||||
dataSourceField.set(hiveConnectionPool, basicDataSource);
|
||||
|
||||
Field componentLogField = AbstractControllerService.class.getDeclaredField("logger");
|
||||
componentLogField.setAccessible(true);
|
||||
componentLogField.set(hiveConnectionPool, componentLog);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetConnectionSqlException() throws SQLException {
|
||||
SQLException sqlException = new SQLException("bad sql");
|
||||
when(basicDataSource.getConnection()).thenThrow(sqlException);
|
||||
ProcessException e = assertThrows(ProcessException.class, () -> hiveConnectionPool.getConnection());
|
||||
assertEquals(sqlException, e.getCause());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExpressionLanguageSupport() throws Exception {
|
||||
final String URL = "jdbc:hive2://localhost:10000/default";
|
||||
final String USER = "user";
|
||||
final String PASS = "pass";
|
||||
final int MAX_CONN = 7;
|
||||
final String MAX_CONN_LIFETIME = "1 sec";
|
||||
final String MAX_WAIT = "10 sec"; // 10000 milliseconds
|
||||
final String CONF = "/path/to/hive-site.xml";
|
||||
hiveConnectionPool = new Hive_1_1ConnectionPool();
|
||||
|
||||
Map<PropertyDescriptor, String> props = new HashMap<PropertyDescriptor, String>() {{
|
||||
put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}");
|
||||
put(Hive_1_1ConnectionPool.DB_USER, "${username}");
|
||||
put(Hive_1_1ConnectionPool.DB_PASSWORD, "${password}");
|
||||
put(Hive_1_1ConnectionPool.MAX_TOTAL_CONNECTIONS, "${maxconn}");
|
||||
put(Hive_1_1ConnectionPool.MAX_CONN_LIFETIME, "${maxconnlifetime}");
|
||||
put(Hive_1_1ConnectionPool.MAX_WAIT_TIME, "${maxwait}");
|
||||
put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${hiveconf}");
|
||||
}};
|
||||
|
||||
MockVariableRegistry registry = new MockVariableRegistry();
|
||||
registry.setVariable(new VariableDescriptor("url"), URL);
|
||||
registry.setVariable(new VariableDescriptor("username"), USER);
|
||||
registry.setVariable(new VariableDescriptor("password"), PASS);
|
||||
registry.setVariable(new VariableDescriptor("maxconn"), Integer.toString(MAX_CONN));
|
||||
registry.setVariable(new VariableDescriptor("maxconnlifetime"), MAX_CONN_LIFETIME);
|
||||
registry.setVariable(new VariableDescriptor("maxwait"), MAX_WAIT);
|
||||
registry.setVariable(new VariableDescriptor("hiveconf"), CONF);
|
||||
|
||||
|
||||
MockConfigurationContext context = new MockConfigurationContext(props, null, registry);
|
||||
hiveConnectionPool.onConfigured(context);
|
||||
|
||||
Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource");
|
||||
dataSourceField.setAccessible(true);
|
||||
basicDataSource = (BasicDataSource) dataSourceField.get(hiveConnectionPool);
|
||||
assertEquals(URL, basicDataSource.getUrl());
|
||||
assertEquals(USER, basicDataSource.getUsername());
|
||||
assertEquals(PASS, basicDataSource.getPassword());
|
||||
assertEquals(MAX_CONN, basicDataSource.getMaxTotal());
|
||||
assertEquals(1000L, basicDataSource.getMaxConnLifetimeMillis());
|
||||
assertEquals(10000L, basicDataSource.getMaxWaitMillis());
|
||||
assertEquals(URL, hiveConnectionPool.getConnectionURL());
|
||||
}
|
||||
|
||||
@EnabledIfSystemProperty(
|
||||
named = "nifi.test.unstable",
|
||||
matches = "true",
|
||||
disabledReason = "Kerberos does not seem to be properly handled in Travis build, but, locally, this test should successfully run")
|
||||
@Test
|
||||
public void testKerberosAuthException() {
|
||||
final String URL = "jdbc:hive2://localhost:10000/default";
|
||||
final String conf = "src/test/resources/hive-site-security.xml";
|
||||
final String ktab = "src/test/resources/fake.keytab";
|
||||
final String kprinc = "bad@PRINCIPAL.COM";
|
||||
final String kerberosCredentialsServiceId = UUID.randomUUID().toString();
|
||||
|
||||
Map<PropertyDescriptor, String> props = new HashMap<PropertyDescriptor, String>() {{
|
||||
put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}");
|
||||
put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${conf}");
|
||||
put(Hive_1_1ConnectionPool.KERBEROS_CREDENTIALS_SERVICE, kerberosCredentialsServiceId);
|
||||
}};
|
||||
|
||||
MockVariableRegistry registry = new MockVariableRegistry();
|
||||
registry.setVariable(new VariableDescriptor("url"), URL);
|
||||
registry.setVariable(new VariableDescriptor("conf"), conf);
|
||||
|
||||
MockControllerServiceLookup mockControllerServiceLookup = new MockControllerServiceLookup() {};
|
||||
KerberosCredentialsService kerberosCredentialsService = mock(KerberosCredentialsService.class);
|
||||
when(kerberosCredentialsService.getKeytab()).thenReturn(ktab);
|
||||
when(kerberosCredentialsService.getPrincipal()).thenReturn(kprinc);
|
||||
mockControllerServiceLookup.addControllerService(kerberosCredentialsService, kerberosCredentialsServiceId);
|
||||
|
||||
MockConfigurationContext context = new MockConfigurationContext(props, mockControllerServiceLookup, registry);
|
||||
assertThrows(InitializationException.class, () -> hiveConnectionPool.onConfigured(context));
|
||||
}
|
||||
}
|
|
@ -1,292 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSessionFactory;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.util.MockProcessContext;
|
||||
import org.apache.nifi.util.MockProcessorInitializationContext;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestHiveParser extends AbstractHive_1_1QLProcessor {
|
||||
|
||||
@BeforeEach
|
||||
public void initialize() {
|
||||
final MockProcessContext processContext = new MockProcessContext(this);
|
||||
final ProcessorInitializationContext initializationContext = new MockProcessorInitializationContext(this, processContext);
|
||||
initialize(initializationContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSelect() {
|
||||
String query = "select a.empid, to_something(b.saraly) from " +
|
||||
"company.emp a inner join default.salary b where a.empid = b.empid";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(2, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName("company", "emp", true)));
|
||||
assertTrue(tableNames.contains(new TableName("default", "salary", true)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSelectPrepared() {
|
||||
String query = "select empid from company.emp a where a.firstName = ?";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName("company", "emp", true)));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void parseLongSelect() {
|
||||
String query = "select\n" +
|
||||
"\n" +
|
||||
" i_item_id,\n" +
|
||||
"\n" +
|
||||
" i_item_desc,\n" +
|
||||
"\n" +
|
||||
" s_state,\n" +
|
||||
"\n" +
|
||||
" count(ss_quantity) as store_sales_quantitycount,\n" +
|
||||
"\n" +
|
||||
" avg(ss_quantity) as store_sales_quantityave,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(ss_quantity) as store_sales_quantitystdev,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov,\n" +
|
||||
"\n" +
|
||||
" count(sr_return_quantity) as store_returns_quantitycount,\n" +
|
||||
"\n" +
|
||||
" avg(sr_return_quantity) as store_returns_quantityave,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(sr_return_quantity) as store_returns_quantitystdev,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov,\n" +
|
||||
"\n" +
|
||||
" count(cs_quantity) as catalog_sales_quantitycount,\n" +
|
||||
"\n" +
|
||||
" avg(cs_quantity) as catalog_sales_quantityave,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitystdev,\n" +
|
||||
"\n" +
|
||||
" stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov\n" +
|
||||
"\n" +
|
||||
"from\n" +
|
||||
"\n" +
|
||||
" store_sales,\n" +
|
||||
"\n" +
|
||||
" store_returns,\n" +
|
||||
"\n" +
|
||||
" catalog_sales,\n" +
|
||||
"\n" +
|
||||
" date_dim d1,\n" +
|
||||
"\n" +
|
||||
" date_dim d2,\n" +
|
||||
"\n" +
|
||||
" date_dim d3,\n" +
|
||||
"\n" +
|
||||
" store,\n" +
|
||||
"\n" +
|
||||
" item\n" +
|
||||
"\n" +
|
||||
"where\n" +
|
||||
"\n" +
|
||||
" d1.d_quarter_name = '2000Q1'\n" +
|
||||
"\n" +
|
||||
" and d1.d_date_sk = ss_sold_date_sk\n" +
|
||||
"\n" +
|
||||
" and i_item_sk = ss_item_sk\n" +
|
||||
"\n" +
|
||||
" and s_store_sk = ss_store_sk\n" +
|
||||
"\n" +
|
||||
" and ss_customer_sk = sr_customer_sk\n" +
|
||||
"\n" +
|
||||
" and ss_item_sk = sr_item_sk\n" +
|
||||
"\n" +
|
||||
" and ss_ticket_number = sr_ticket_number\n" +
|
||||
"\n" +
|
||||
" and sr_returned_date_sk = d2.d_date_sk\n" +
|
||||
"\n" +
|
||||
" and d2.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
|
||||
"\n" +
|
||||
" and sr_customer_sk = cs_bill_customer_sk\n" +
|
||||
"\n" +
|
||||
" and sr_item_sk = cs_item_sk\n" +
|
||||
"\n" +
|
||||
" and cs_sold_date_sk = d3.d_date_sk\n" +
|
||||
"\n" +
|
||||
" and d3.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" +
|
||||
"\n" +
|
||||
"group by i_item_id , i_item_desc , s_state\n" +
|
||||
"\n" +
|
||||
"order by i_item_id , i_item_desc , s_state\n" +
|
||||
"\n" +
|
||||
"limit 100";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(6, tableNames.size());
|
||||
AtomicInteger cnt = new AtomicInteger(0);
|
||||
for (TableName tableName : tableNames) {
|
||||
if (tableName.equals(new TableName(null, "store_sales", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "store_returns", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "catalog_sales", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "date_dim", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "store", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "item", true))) {
|
||||
cnt.incrementAndGet();
|
||||
}
|
||||
}
|
||||
assertEquals(6, cnt.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSelectInsert() {
|
||||
String query = "insert into databaseA.tableA select key, max(value) from databaseA.tableA where category = 'x'";
|
||||
|
||||
// The same database.tableName can appear two times for input and output.
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(2, tableNames.size());
|
||||
AtomicInteger cnt = new AtomicInteger(0);
|
||||
tableNames.forEach(tableName -> {
|
||||
if (tableName.equals(new TableName("databaseA", "tableA", false))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName("databaseA", "tableA", true))) {
|
||||
cnt.incrementAndGet();
|
||||
}
|
||||
});
|
||||
assertEquals(2, cnt.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseInsert() {
|
||||
String query = "insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(3, tableNames.size());
|
||||
AtomicInteger cnt = new AtomicInteger(0);
|
||||
tableNames.forEach(tableName -> {
|
||||
if (tableName.equals(new TableName("databaseB", "tableB1", false))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "tableA1", true))) {
|
||||
cnt.incrementAndGet();
|
||||
} else if (tableName.equals(new TableName(null, "tableA2", true))) {
|
||||
cnt.incrementAndGet();
|
||||
}
|
||||
});
|
||||
assertEquals(3, cnt.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseUpdate() {
|
||||
String query = "update table_a set y = 'updated' where x > 100";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseDelete() {
|
||||
String query = "delete from table_a where x > 100";
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseDDL() {
|
||||
String query = "CREATE TABLE IF NOT EXISTS EMPLOYEES(\n" +
|
||||
"EmployeeID INT,FirstName STRING, Title STRING,\n" +
|
||||
"State STRING, Laptop STRING)\n" +
|
||||
"COMMENT 'Employee Names'\n" +
|
||||
"STORED AS ORC";
|
||||
|
||||
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "EMPLOYEES", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSetProperty() {
|
||||
String query = " set 'hive.exec.dynamic.partition.mode'=nonstrict";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseSetRole() {
|
||||
String query = "set role all";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseShowRoles() {
|
||||
String query = "show roles";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseMsck() {
|
||||
String query = "msck repair table table_a";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertTrue(tableNames.contains(new TableName(null, "table_a", false)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void parseAddJar() {
|
||||
String query = "ADD JAR hdfs:///tmp/my_jar.jar";
|
||||
final Set<TableName> tableNames = findTableNames(query);
|
||||
System.out.printf("tableNames=%s\n", tableNames);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,820 +0,0 @@
|
|||
package org.apache.nifi.processors.hive;/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.dbcp.DBCPService;
|
||||
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.pattern.RollbackOnFailure;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.DisabledOnOs;
|
||||
import org.junit.jupiter.api.condition.OS;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.sql.Types;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
@DisabledOnOs(OS.WINDOWS)
|
||||
public class TestPutHive_1_1QL {
|
||||
private static final String createPersons = "CREATE TABLE PERSONS (id integer primary key, name varchar(100), code integer)";
|
||||
private static final String createPersonsAutoId = "CREATE TABLE PERSONS (id INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1), name VARCHAR(100), code INTEGER check(code <= 100))";
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
System.setProperty("derby.stream.error.file", "target/derby.log");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDirectStatements(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', 84)".getBytes());
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
|
||||
runner.enqueue("UPDATE PERSONS SET NAME='George' WHERE ID=1".getBytes());
|
||||
runner.run();
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("George", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadStatementRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||
runner.run();
|
||||
|
||||
// The 1st one should be routed to success, others should stay in queue.
|
||||
assertEquals(3, runner.getQueueSize().getObjectCount());
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0);
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailAtBeginning(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailAtBeginningRollbackOnFailure(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes());
|
||||
runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes());
|
||||
|
||||
AssertionError e = assertThrows(AssertionError.class, () -> runner.run());
|
||||
assertTrue(e.getCause() instanceof ProcessException);
|
||||
|
||||
assertEquals(3, runner.getQueueSize().getObjectCount());
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0);
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadParameterType(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final Map<String, String> goodAttributes = new HashMap<>();
|
||||
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
goodAttributes.put("hiveql.args.1.value", "84");
|
||||
|
||||
final Map<String, String> badAttributes = new HashMap<>();
|
||||
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
|
||||
badAttributes.put("hiveql.args.1.value", "hello");
|
||||
|
||||
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, badAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadParameterValue(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final Map<String, String> goodAttributes = new HashMap<>();
|
||||
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
goodAttributes.put("hiveql.args.1.value", "84");
|
||||
|
||||
final Map<String, String> badAttributes = new HashMap<>();
|
||||
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
badAttributes.put("hiveql.args.1.value", "101"); // Constraint violation, up to 100
|
||||
|
||||
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, badAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3);
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertTrue(rs.next());
|
||||
assertTrue(rs.next());
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailInMiddleWithBadNumberFormat(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersonsAutoId);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final Map<String, String> goodAttributes = new HashMap<>();
|
||||
goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
goodAttributes.put("hiveql.args.1.value", "84");
|
||||
|
||||
final Map<String, String> badAttributes = new HashMap<>();
|
||||
badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
badAttributes.put("hiveql.args.1.value", "NOT_NUMBER");
|
||||
|
||||
final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes();
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, badAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.enqueue(data, goodAttributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3);
|
||||
runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertTrue(rs.next());
|
||||
assertTrue(rs.next());
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUsingSqlDataTypesWithNegativeValues(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate("CREATE TABLE PERSONS (id integer primary key, name varchar(100), code bigint)");
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", "-5");
|
||||
attributes.put("hiveql.args.1.value", "84");
|
||||
runner.enqueue("INSERT INTO PERSONS VALUES (1, 'Mark', ?)".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS");
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStatementsWithPreparedParameters(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
|
||||
runner.clearTransferState();
|
||||
|
||||
attributes.clear();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.1.value", "George");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.2.value", "1");
|
||||
|
||||
runner.enqueue("UPDATE PERSONS SET NAME=? WHERE ID=?".getBytes(), attributes);
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("George", rs.getString(2));
|
||||
assertEquals(84, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMultipleStatementsWithinFlowFile(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because of the semicolon
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS)
|
||||
.forEach(f -> f.assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS"));
|
||||
|
||||
// Now we can check that the values were inserted by the multi-statement script.
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1), "Record ID mismatch");
|
||||
assertEquals( "George", rs.getString(2), "Record NAME mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleStatementsWithinFlowFilePlusEmbeddedDelimiter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George\\;' WHERE ID=?; ";
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because of the semicolon
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
|
||||
// Now we can check that the values were inserted by the multi-statement script.
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1), "Record ID mismatch");
|
||||
assertEquals( "George\\;", rs.getString(2), "Record NAME mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testWithNullParameter(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
|
||||
runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
assertEquals(1, rs.getInt(1));
|
||||
assertEquals("Mark", rs.getString(2));
|
||||
assertEquals(0, rs.getInt(3));
|
||||
assertFalse(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidStatement(@TempDir Path tempDir) throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
stmt.executeUpdate(createPersons);
|
||||
}
|
||||
}
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE SOME_RANDOM_TABLE NAME='George' WHERE ID=?; ";
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because of the table is invalid
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_FAILURE, 1);
|
||||
|
||||
try (final Connection conn = service.getConnection()) {
|
||||
try (final Statement stmt = conn.createStatement()) {
|
||||
final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS");
|
||||
assertTrue(rs.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRetryableFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final DBCPService service = new SQLExceptionService(null);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because there isn't a valid connection and tables don't exist.
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRetryableFailureRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final DBCPService service = new SQLExceptionService(null);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
|
||||
AssertionError e = assertThrows(AssertionError.class, () -> runner.run());
|
||||
assertTrue(e.getCause() instanceof ProcessException);
|
||||
|
||||
assertEquals(1, runner.getQueueSize().getObjectCount());
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnknownFailure() throws InitializationException, ProcessException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final SQLExceptionService service = new SQLExceptionService(null);
|
||||
service.setErrorCode(2);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
runner.run();
|
||||
|
||||
// should fail because there isn't a valid connection and tables don't exist.
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnknownFailureRollbackOnFailure() throws InitializationException, ProcessException {
|
||||
final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class);
|
||||
final SQLExceptionService service = new SQLExceptionService(null);
|
||||
service.setErrorCode(0);
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
|
||||
runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true");
|
||||
|
||||
final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " +
|
||||
"UPDATE PERSONS SET NAME='George' WHERE ID=?; ";
|
||||
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
|
||||
attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR));
|
||||
attributes.put("hiveql.args.2.value", "Mark");
|
||||
|
||||
attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.3.value", "84");
|
||||
|
||||
attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER));
|
||||
attributes.put("hiveql.args.4.value", "1");
|
||||
|
||||
runner.enqueue(sql.getBytes(), attributes);
|
||||
|
||||
AssertionError e = assertThrows(AssertionError.class, () -> runner.run());
|
||||
assertTrue(e.getCause() instanceof ProcessException);
|
||||
|
||||
assertEquals(1, runner.getQueueSize().getObjectCount());
|
||||
runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for testing purposes
|
||||
*/
|
||||
private static class MockDBCPService extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||
private final String dbLocation;
|
||||
|
||||
MockDBCPService(final String dbLocation) {
|
||||
this.dbLocation = dbLocation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
|
||||
return DriverManager.getConnection("jdbc:derby:" + dbLocation + ";create=true");
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return "jdbc:derby:" + dbLocation + ";create=true";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for testing purposes
|
||||
*/
|
||||
private static class SQLExceptionService extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||
private final Hive_1_1DBCPService service;
|
||||
private int allowedBeforeFailure = 0;
|
||||
private int successful = 0;
|
||||
private int errorCode = 30000; // Default to a retryable exception code
|
||||
|
||||
SQLExceptionService(final Hive_1_1DBCPService service) {
|
||||
this.service = service;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
if (++successful > allowedBeforeFailure) {
|
||||
final Connection conn = Mockito.mock(Connection.class);
|
||||
Mockito.when(conn.prepareStatement(Mockito.any(String.class))).thenThrow(new SQLException("Unit Test Generated SQLException", "42000", errorCode));
|
||||
return conn;
|
||||
} else {
|
||||
return service.getConnection();
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return service != null ? service.getConnectionURL() : null;
|
||||
}
|
||||
|
||||
void setErrorCode(int errorCode) {
|
||||
this.errorCode = errorCode;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,661 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.avro.file.DataFileStream;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.io.DatumReader;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.dbcp.DBCPService;
|
||||
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.provenance.ProvenanceEventRecord;
|
||||
import org.apache.nifi.provenance.ProvenanceEventType;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.apache.nifi.util.hive.HiveJdbcCommon;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.sql.Types;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.apache.nifi.processors.hive.SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE;
|
||||
import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestSelectHive_1_1QL {
|
||||
|
||||
private static final Logger LOGGER;
|
||||
private final static String MAX_ROWS_KEY = "maxRows";
|
||||
private final int NUM_OF_ROWS = 100;
|
||||
|
||||
|
||||
static {
|
||||
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info");
|
||||
System.setProperty("org.slf4j.simpleLogger.showDateTime", "true");
|
||||
System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug");
|
||||
System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHive_1_1QL", "debug");
|
||||
System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHive_1_1QL", "debug");
|
||||
LOGGER = LoggerFactory.getLogger(TestSelectHive_1_1QL.class);
|
||||
}
|
||||
|
||||
private final static String DB_LOCATION = "target/db";
|
||||
|
||||
private final static String QUERY_WITH_EL = "select "
|
||||
+ " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
|
||||
+ " from persons PER"
|
||||
+ " where PER.ID > ${person.id}";
|
||||
|
||||
private final static String QUERY_WITHOUT_EL = "select "
|
||||
+ " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode"
|
||||
+ " from persons PER"
|
||||
+ " where PER.ID > 10";
|
||||
|
||||
|
||||
@BeforeAll
|
||||
public static void setupClass() {
|
||||
System.setProperty("derby.stream.error.file", "target/derby.log");
|
||||
}
|
||||
|
||||
private TestRunner runner;
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws InitializationException {
|
||||
final DBCPService dbcp = new DBCPServiceSimpleImpl();
|
||||
final Map<String, String> dbcpProperties = new HashMap<>();
|
||||
|
||||
runner = TestRunners.newTestRunner(SelectHive_1_1QL.class);
|
||||
runner.addControllerService("dbcp", dbcp, dbcpProperties);
|
||||
runner.enableControllerService(dbcp);
|
||||
runner.setProperty(SelectHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncomingConnectionWithNoFlowFile() {
|
||||
runner.setIncomingConnection(true);
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons");
|
||||
runner.run();
|
||||
runner.assertTransferCount(SelectHive_1_1QL.REL_SUCCESS, 0);
|
||||
runner.assertTransferCount(SelectHive_1_1QL.REL_FAILURE, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
runner.setIncomingConnection(false);
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro");
|
||||
|
||||
final List<ProvenanceEventRecord> provenanceEvents = runner.getProvenanceEvents();
|
||||
final ProvenanceEventRecord provenance0 = provenanceEvents.get(0);
|
||||
assertEquals(ProvenanceEventType.RECEIVE, provenance0.getEventType());
|
||||
assertEquals("jdbc:derby:target/db;create=true", provenance0.getTransitUri());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITH_EL, true, "Avro");
|
||||
|
||||
final List<ProvenanceEventRecord> provenanceEvents = runner.getProvenanceEvents();
|
||||
assertEquals(4, provenanceEvents.size());
|
||||
|
||||
final ProvenanceEventRecord provenance0 = provenanceEvents.get(0);
|
||||
assertEquals(ProvenanceEventType.FORK, provenance0.getEventType());
|
||||
|
||||
final ProvenanceEventRecord provenance1 = provenanceEvents.get(1);
|
||||
assertEquals(ProvenanceEventType.FETCH, provenance1.getEventType());
|
||||
assertEquals("jdbc:derby:target/db;create=true", provenance1.getTransitUri());
|
||||
|
||||
final ProvenanceEventRecord provenance2 = provenanceEvents.get(2);
|
||||
assertEquals(ProvenanceEventType.FORK, provenance2.getEventType());
|
||||
|
||||
final ProvenanceEventRecord provenance3 = provenanceEvents.get(3);
|
||||
assertEquals(ProvenanceEventType.DROP, provenance3.getEventType());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testWithNullIntColumn() throws SQLException {
|
||||
// remove previous test database, if any
|
||||
final File dbLocation = new File(DB_LOCATION);
|
||||
dbLocation.delete();
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_NULL_INT");
|
||||
} catch (final SQLException sqle) {
|
||||
// Nothing to do, probably means the table didn't exist
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))");
|
||||
|
||||
stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)");
|
||||
stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)");
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT");
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1);
|
||||
runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(SelectHive_1_1QL.RESULT_ROW_COUNT, "2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithSqlException() throws SQLException {
|
||||
// remove previous test database, if any
|
||||
final File dbLocation = new File(DB_LOCATION);
|
||||
dbLocation.delete();
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_NO_ROWS");
|
||||
} catch (final SQLException sqle) {
|
||||
// Nothing to do, probably means the table didn't exist
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_NO_ROWS (id integer)");
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS");
|
||||
runner.run();
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPreQueriesNoIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
"select 'no exception' from persons; select exception from persons",
|
||||
null);
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPreQueriesWithIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, true, CSV,
|
||||
"select 'no exception' from persons; select exception from persons",
|
||||
null);
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPostQueriesNoIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
null,
|
||||
"select 'no exception' from persons; select exception from persons");
|
||||
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerExceptionInPostQueriesWithIncomingFlows()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
doOnTrigger(QUERY_WITHOUT_EL, true, CSV,
|
||||
null,
|
||||
"select 'no exception' from persons; select exception from persons");
|
||||
|
||||
// with incoming connections, it should be rolled back
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithBadSQL() throws SQLException {
|
||||
final String BAD_SQL = "create table TEST_NO_ROWS (id integer)";
|
||||
|
||||
// Test with incoming flow file (it should be routed to failure intact, i.e. same content and no parent)
|
||||
runner.setIncomingConnection(true);
|
||||
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||
runner.enqueue(BAD_SQL);
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||
MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0);
|
||||
flowFile.assertContentEquals(BAD_SQL);
|
||||
flowFile.assertAttributeEquals("parentIds", null);
|
||||
runner.clearTransferState();
|
||||
|
||||
// Test with no incoming flow file (an empty flow file is transferred)
|
||||
runner.setIncomingConnection(false);
|
||||
// Try a valid SQL statement that will generate an error (val1 does not exist, e.g.)
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, BAD_SQL);
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1);
|
||||
flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0);
|
||||
flowFile.assertContentEquals("");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithCsv()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithAvro()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, AVRO);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithValidPreQieries()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
"select '1' from persons; select '2' from persons", //should not be 'select'. But Derby driver doesn't support "set param=val" format.
|
||||
null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithValidPostQieries()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
null,
|
||||
//should not be 'select'. But Derby driver doesn't support "set param=val" format,
|
||||
//so just providing any "compilable" query.
|
||||
" select '4' from persons; \nselect '5' from persons");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invokeOnTriggerWithValidPrePostQieries()
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV,
|
||||
//should not be 'select'. But Derby driver doesn't support "set param=val" format,
|
||||
//so just providing any "compilable" query.
|
||||
"select '1' from persons; select '2' from persons",
|
||||
" select '4' from persons; \nselect '5' from persons");
|
||||
}
|
||||
|
||||
|
||||
public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat)
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
invokeOnTrigger(query, incomingFlowFile, outputFormat, null, null);
|
||||
}
|
||||
|
||||
public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat,
|
||||
String preQueries, String postQueries)
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
TestRunner runner = doOnTrigger(query, incomingFlowFile, outputFormat, preQueries, postQueries);
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1);
|
||||
|
||||
final List<MockFlowFile> flowfiles = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS);
|
||||
MockFlowFile flowFile = flowfiles.get(0);
|
||||
final InputStream in = new ByteArrayInputStream(flowFile.toByteArray());
|
||||
long recordsFromStream = 0;
|
||||
if (AVRO.equals(outputFormat)) {
|
||||
assertEquals(MIME_TYPE_AVRO_BINARY, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
|
||||
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
|
||||
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
|
||||
GenericRecord record = null;
|
||||
while (dataFileReader.hasNext()) {
|
||||
// Reuse record object by passing it to next(). This saves us from
|
||||
// allocating and garbage collecting many objects for files with
|
||||
// many items.
|
||||
record = dataFileReader.next(record);
|
||||
recordsFromStream++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assertEquals(CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key()));
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
|
||||
String headerRow = br.readLine();
|
||||
// Derby capitalizes column names
|
||||
assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow);
|
||||
|
||||
// Validate rows
|
||||
String line;
|
||||
while ((line = br.readLine()) != null) {
|
||||
recordsFromStream++;
|
||||
String[] values = line.split(",");
|
||||
if (recordsFromStream < (NUM_OF_ROWS - 10)) {
|
||||
assertEquals(3, values.length);
|
||||
assertTrue(values[1].startsWith("\""));
|
||||
assertTrue(values[1].endsWith("\""));
|
||||
} else {
|
||||
assertEquals(2, values.length); // Middle value is null
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals(NUM_OF_ROWS - 10, recordsFromStream);
|
||||
assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHive_1_1QL.RESULT_ROW_COUNT)));
|
||||
flowFile.assertAttributeEquals(AbstractHive_1_1QLProcessor.ATTR_INPUT_TABLES, "persons");
|
||||
}
|
||||
|
||||
public TestRunner doOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat,
|
||||
String preQueries, String postQueries)
|
||||
throws InitializationException, ClassNotFoundException, SQLException, IOException {
|
||||
|
||||
// remove previous test database, if any
|
||||
final File dbLocation = new File(DB_LOCATION);
|
||||
dbLocation.delete();
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
final Statement stmt = con.createStatement();
|
||||
try {
|
||||
stmt.execute("drop table persons");
|
||||
} catch (final SQLException sqle) {
|
||||
// Nothing to do here, the table didn't exist
|
||||
}
|
||||
|
||||
stmt.execute("create table persons (id integer, name varchar(100), code integer)");
|
||||
Random rng = new Random(53496);
|
||||
stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")");
|
||||
for (int i = 2; i < NUM_OF_ROWS; i++) {
|
||||
stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")");
|
||||
}
|
||||
stmt.executeUpdate("insert into persons values (" + NUM_OF_ROWS + ", 'Last Person', NULL)");
|
||||
|
||||
LOGGER.info("test data loaded");
|
||||
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, query);
|
||||
runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat);
|
||||
if (preQueries != null) {
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_PRE_QUERY, preQueries);
|
||||
}
|
||||
if (postQueries != null) {
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_POST_QUERY, postQueries);
|
||||
}
|
||||
|
||||
if (incomingFlowFile) {
|
||||
// incoming FlowFile content is not used, but attributes are used
|
||||
final Map<String, String> attributes = new HashMap<>();
|
||||
attributes.put("person.id", "10");
|
||||
runner.enqueue("Hello".getBytes(), attributes);
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(incomingFlowFile);
|
||||
runner.run();
|
||||
|
||||
return runner;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxRowsPerFlowFileAvro() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
InputStream in;
|
||||
MockFlowFile mff;
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
|
||||
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
|
||||
runner.setVariable(MAX_ROWS_KEY, "9");
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12);
|
||||
|
||||
//ensure all but the last file have 9 records each
|
||||
for (int ff = 0; ff < 11; ff++) {
|
||||
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff);
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
assertEquals(9, getNumberOfRecordsFromStream(in));
|
||||
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
}
|
||||
|
||||
//last file should have 1 record
|
||||
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11);
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
assertEquals(1, getNumberOfRecordsFromStream(in));
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParametrizedQuery() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(true);
|
||||
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO);
|
||||
runner.setVariable(MAX_ROWS_KEY, "9");
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
attributes.put("hiveql.args.1.value", "1");
|
||||
attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id = ?", attributes );
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1);
|
||||
MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0);
|
||||
// Assert the attributes from the incoming flow file are preserved in the outgoing flow file(s)
|
||||
flowFile.assertAttributeEquals("hiveql.args.1.value", "1");
|
||||
flowFile.assertAttributeEquals("hiveql.args.1.type", String.valueOf(Types.INTEGER));
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
InputStream in;
|
||||
MockFlowFile mff;
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(true);
|
||||
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}");
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV);
|
||||
|
||||
runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap<String, String>() {{
|
||||
put(MAX_ROWS_KEY, "9");
|
||||
}});
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12);
|
||||
|
||||
//ensure all but the last file have 9 records (10 lines = 9 records + header) each
|
||||
for (int ff = 0; ff < 11; ff++) {
|
||||
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff);
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
assertEquals(10, br.lines().count());
|
||||
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
}
|
||||
|
||||
//last file should have 1 record (2 lines = 1 record + header)
|
||||
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11);
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
assertEquals(2, br.lines().count());
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(11), mff.getAttribute("fragment.index"));
|
||||
assertEquals("12", mff.getAttribute("fragment.count"));
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxRowsPerFlowFileWithMaxFragments() throws ClassNotFoundException, SQLException, InitializationException, IOException {
|
||||
|
||||
// load test data to database
|
||||
final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection();
|
||||
Statement stmt = con.createStatement();
|
||||
InputStream in;
|
||||
MockFlowFile mff;
|
||||
|
||||
try {
|
||||
stmt.execute("drop table TEST_QUERY_DB_TABLE");
|
||||
} catch (final SQLException sqle) {
|
||||
// Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842]
|
||||
}
|
||||
|
||||
stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)");
|
||||
int rowCount = 0;
|
||||
//create larger row set
|
||||
for (int batch = 0; batch < 100; batch++) {
|
||||
stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')");
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
runner.setIncomingConnection(false);
|
||||
runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE");
|
||||
runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "9");
|
||||
Integer maxFragments = 3;
|
||||
runner.setProperty(SelectHive_1_1QL.MAX_FRAGMENTS, maxFragments.toString());
|
||||
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, maxFragments);
|
||||
|
||||
for (int i = 0; i < maxFragments; i++) {
|
||||
mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(i);
|
||||
in = new ByteArrayInputStream(mff.toByteArray());
|
||||
assertEquals(9, getNumberOfRecordsFromStream(in));
|
||||
|
||||
mff.assertAttributeExists("fragment.identifier");
|
||||
assertEquals(Integer.toString(i), mff.getAttribute("fragment.index"));
|
||||
assertEquals(maxFragments.toString(), mff.getAttribute("fragment.count"));
|
||||
}
|
||||
|
||||
runner.clearTransferState();
|
||||
}
|
||||
|
||||
private long getNumberOfRecordsFromStream(InputStream in) throws IOException {
|
||||
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
|
||||
try (DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(in, datumReader)) {
|
||||
GenericRecord record = null;
|
||||
long recordsFromStream = 0;
|
||||
while (dataFileReader.hasNext()) {
|
||||
// Reuse record object by passing it to next(). This saves us from
|
||||
// allocating and garbage collecting many objects for files with
|
||||
// many items.
|
||||
record = dataFileReader.next(record);
|
||||
recordsFromStream += 1;
|
||||
}
|
||||
|
||||
return recordsFromStream;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for SelectHive_1_1QL processor testing.
|
||||
*/
|
||||
private class DBCPServiceSimpleImpl extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
|
||||
return DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true");
|
||||
} catch (final Exception e) {
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return "jdbc:derby:" + DB_LOCATION + ";create=true";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,444 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hive;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.nifi.controller.AbstractControllerService;
|
||||
import org.apache.nifi.dbcp.DBCPService;
|
||||
import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService;
|
||||
import org.apache.nifi.logging.ComponentLog;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.reporting.InitializationException;
|
||||
import org.apache.nifi.schema.access.SchemaNotFoundException;
|
||||
import org.apache.nifi.serialization.RecordReader;
|
||||
import org.apache.nifi.serialization.SimpleRecordSchema;
|
||||
import org.apache.nifi.serialization.record.MockRecordParser;
|
||||
import org.apache.nifi.serialization.record.RecordField;
|
||||
import org.apache.nifi.serialization.record.RecordFieldType;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.DisabledOnOs;
|
||||
import org.junit.jupiter.api.condition.OS;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.stubbing.Answer;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.BiFunction;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.mockito.ArgumentMatchers.anyInt;
|
||||
import static org.mockito.ArgumentMatchers.anyString;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
@DisabledOnOs(OS.WINDOWS)
|
||||
public class TestUpdateHive_1_1Table {
|
||||
|
||||
private static final String TEST_CONF_PATH = "src/test/resources/core-site.xml";
|
||||
private static final String TARGET_HIVE = "target/hive";
|
||||
|
||||
private static final String[] SHOW_TABLES_COLUMN_NAMES = new String[]{"tab_name"};
|
||||
private static final String[][] SHOW_TABLES_RESULTSET = new String[][]{
|
||||
new String[]{"messages"},
|
||||
new String[]{"users"},
|
||||
};
|
||||
|
||||
private static final String[] DESC_MESSAGES_TABLE_COLUMN_NAMES = new String[]{"id", "msg"};
|
||||
private static final String[][] DESC_MESSAGES_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"# col_name", "data_type", "comment"},
|
||||
new String[]{"id", "int", ""},
|
||||
new String[]{"msg", "string", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Partition Information", null, null},
|
||||
new String[]{"# col_name", "data_type", "comment"},
|
||||
new String[]{"continent", "string", ""},
|
||||
new String[]{"country", "string", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages", null}
|
||||
};
|
||||
|
||||
private static final String[] DESC_USERS_TABLE_COLUMN_NAMES = new String[]{"name", "favorite_number", "favorite_color", "scale"};
|
||||
private static final String[][] DESC_USERS_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"name", "string", ""},
|
||||
new String[]{"favorite_number", "int", ""},
|
||||
new String[]{"favorite_color", "string", ""},
|
||||
new String[]{"scale", "double", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users", null}
|
||||
};
|
||||
private static final String[][] DESC_EXTERNAL_USERS_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"name", "string", ""},
|
||||
new String[]{"favorite_number", "int", ""},
|
||||
new String[]{"favorite_color", "string", ""},
|
||||
new String[]{"scale", "double", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/path/to/users", null}
|
||||
};
|
||||
|
||||
private static final String[] DESC_NEW_TABLE_COLUMN_NAMES = DESC_USERS_TABLE_COLUMN_NAMES;
|
||||
private static final String[][] DESC_NEW_TABLE_RESULTSET = new String[][]{
|
||||
new String[]{"", null, null},
|
||||
new String[]{"name", "string", ""},
|
||||
new String[]{"favorite_number", "int", ""},
|
||||
new String[]{"favorite_color", "string", ""},
|
||||
new String[]{"scale", "double", ""},
|
||||
new String[]{"", null, null},
|
||||
new String[]{"# Detailed Table Information", null, null},
|
||||
new String[]{"Location:", "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable", null}
|
||||
};
|
||||
|
||||
private TestRunner runner;
|
||||
private UpdateHive_1_1Table processor;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
|
||||
Configuration testConf = new Configuration();
|
||||
testConf.addResource(new Path(TEST_CONF_PATH));
|
||||
|
||||
// Delete any temp files from previous tests
|
||||
try {
|
||||
FileUtils.deleteDirectory(new File(TARGET_HIVE));
|
||||
} catch (IOException ioe) {
|
||||
// Do nothing, directory may not have existed
|
||||
}
|
||||
|
||||
processor = new UpdateHive_1_1Table();
|
||||
}
|
||||
|
||||
private void configure(final UpdateHive_1_1Table processor, final int numUsers) throws InitializationException {
|
||||
configure(processor, numUsers, false, -1);
|
||||
}
|
||||
|
||||
private void configure(final UpdateHive_1_1Table processor, final int numUsers, boolean failOnCreateReader, int failAfter) throws InitializationException {
|
||||
configure(processor, numUsers, failOnCreateReader, failAfter, null);
|
||||
}
|
||||
|
||||
private void configure(final UpdateHive_1_1Table processor, final int numUsers, final boolean failOnCreateReader, final int failAfter,
|
||||
final BiFunction<Integer, MockRecordParser, Void> recordGenerator) throws InitializationException {
|
||||
runner = TestRunners.newTestRunner(processor);
|
||||
MockRecordParser readerFactory = new MockRecordParser() {
|
||||
@Override
|
||||
public RecordReader createRecordReader(Map<String, String> variables, InputStream in, long inputLength, ComponentLog logger) throws IOException, SchemaNotFoundException {
|
||||
if (failOnCreateReader) {
|
||||
throw new SchemaNotFoundException("test");
|
||||
}
|
||||
return super.createRecordReader(variables, in, inputLength, logger);
|
||||
}
|
||||
};
|
||||
List<RecordField> fields = Arrays.asList(
|
||||
new RecordField("name", RecordFieldType.STRING.getDataType()),
|
||||
new RecordField("favorite_number", RecordFieldType.INT.getDataType()),
|
||||
new RecordField("favorite_color", RecordFieldType.STRING.getDataType()),
|
||||
new RecordField("scale", RecordFieldType.DOUBLE.getDataType())
|
||||
);
|
||||
final SimpleRecordSchema recordSchema = new SimpleRecordSchema(fields);
|
||||
for (final RecordField recordField : recordSchema.getFields()) {
|
||||
readerFactory.addSchemaField(recordField.getFieldName(), recordField.getDataType().getFieldType(), recordField.isNullable());
|
||||
}
|
||||
|
||||
if (recordGenerator == null) {
|
||||
for (int i = 0; i < numUsers; i++) {
|
||||
readerFactory.addRecord("name" + i, i, "blue" + i, i * 10.0);
|
||||
}
|
||||
} else {
|
||||
recordGenerator.apply(numUsers, readerFactory);
|
||||
}
|
||||
|
||||
readerFactory.failAfter(failAfter);
|
||||
|
||||
runner.addControllerService("mock-reader-factory", readerFactory);
|
||||
runner.enableControllerService(readerFactory);
|
||||
|
||||
runner.setProperty(UpdateHive_1_1Table.RECORD_READER, "mock-reader-factory");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetup(@TempDir java.nio.file.Path tempDir) throws Exception {
|
||||
configure(processor, 0);
|
||||
runner.assertNotValid();
|
||||
final File dbDir = tempDir.resolve("db").toFile();
|
||||
final DBCPService service = new MockHiveConnectionPool(dbDir.getAbsolutePath());
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.assertNotValid();
|
||||
runner.assertNotValid();
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "users");
|
||||
runner.assertValid();
|
||||
runner.run();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoStatementsExecuted() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "users");
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("test");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(UpdateHive_1_1Table.PARTITION_CLAUSE, "continent, country");
|
||||
HashMap<String,String> attrs = new HashMap<>();
|
||||
attrs.put("continent", "Asia");
|
||||
attrs.put("country", "China");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "users");
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/users");
|
||||
assertTrue(service.getExecutedStatements().isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateManagedTable() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "${table.name}");
|
||||
runner.setProperty(UpdateHive_1_1Table.CREATE_TABLE, UpdateHive_1_1Table.CREATE_IF_NOT_EXISTS);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_STORAGE_FORMAT, UpdateHive_1_1Table.PARQUET);
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp");
|
||||
Map<String, String> attrs = new HashMap<>();
|
||||
attrs.put("db.name", "default");
|
||||
attrs.put("table.name", "_newTable");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "_newTable");
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(1, statements.size());
|
||||
assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET",
|
||||
statements.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateManagedTableWithPartition() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "${table.name}");
|
||||
runner.setProperty(UpdateHive_1_1Table.CREATE_TABLE, UpdateHive_1_1Table.CREATE_IF_NOT_EXISTS);
|
||||
runner.setProperty(UpdateHive_1_1Table.PARTITION_CLAUSE, "age int");
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_STORAGE_FORMAT, UpdateHive_1_1Table.PARQUET);
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("_newTable");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp");
|
||||
Map<String, String> attrs = new HashMap<>();
|
||||
attrs.put("db.name", "default");
|
||||
attrs.put("table.name", "_newTable");
|
||||
attrs.put("age", "23");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "_newTable");
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/_newTable");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(1, statements.size());
|
||||
assertEquals("CREATE TABLE IF NOT EXISTS `_newTable` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) PARTITIONED BY (`age` int) STORED AS PARQUET",
|
||||
statements.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateExternalTable() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "${table.name}");
|
||||
runner.setProperty(UpdateHive_1_1Table.CREATE_TABLE, UpdateHive_1_1Table.CREATE_IF_NOT_EXISTS);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_MANAGEMENT_STRATEGY, UpdateHive_1_1Table.EXTERNAL_TABLE);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_STORAGE_FORMAT, UpdateHive_1_1Table.PARQUET);
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("ext_users");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.assertNotValid(); // Needs location specified
|
||||
runner.setProperty(UpdateHive_1_1Table.EXTERNAL_TABLE_LOCATION, "/path/to/users");
|
||||
runner.assertValid();
|
||||
Map<String, String> attrs = new HashMap<>();
|
||||
attrs.put("db.name", "default");
|
||||
attrs.put("table.name", "ext_users");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "ext_users");
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/path/to/users");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(1, statements.size());
|
||||
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS `ext_users` (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE) STORED AS PARQUET "
|
||||
+ "LOCATION '/path/to/users'",
|
||||
statements.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddColumnsAndPartition() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "messages");
|
||||
final MockHiveConnectionPool service = new MockHiveConnectionPool("test");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.setProperty(UpdateHive_1_1Table.PARTITION_CLAUSE, "continent, country");
|
||||
HashMap<String,String> attrs = new HashMap<>();
|
||||
attrs.put("continent", "Asia");
|
||||
attrs.put("country", "China");
|
||||
runner.enqueue(new byte[0], attrs);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 1);
|
||||
final MockFlowFile flowFile = runner.getFlowFilesForRelationship(UpdateHive_1_1Table.REL_SUCCESS).get(0);
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_TABLE, "messages");
|
||||
flowFile.assertAttributeEquals(UpdateHive_1_1Table.ATTR_OUTPUT_PATH, "hdfs://mycluster:8020/warehouse/tablespace/managed/hive/messages/continent=Asia/country=China");
|
||||
List<String> statements = service.getExecutedStatements();
|
||||
assertEquals(2, statements.size());
|
||||
// All columns from users table/data should be added to the table, and a new partition should be added
|
||||
assertEquals("ALTER TABLE `messages` ADD COLUMNS (`name` STRING, `favorite_number` INT, `favorite_color` STRING, `scale` DOUBLE)",
|
||||
statements.get(0));
|
||||
assertEquals("ALTER TABLE `messages` ADD IF NOT EXISTS PARTITION (`continent`='Asia', `country`='China')",
|
||||
statements.get(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMissingPartitionValues() throws Exception {
|
||||
configure(processor, 1);
|
||||
runner.setProperty(UpdateHive_1_1Table.TABLE_NAME, "messages");
|
||||
final DBCPService service = new MockHiveConnectionPool("test");
|
||||
runner.addControllerService("dbcp", service);
|
||||
runner.enableControllerService(service);
|
||||
runner.setProperty(UpdateHive_1_1Table.HIVE_DBCP_SERVICE, "dbcp");
|
||||
runner.enqueue(new byte[0]);
|
||||
runner.run();
|
||||
|
||||
runner.assertTransferCount(UpdateHive_1_1Table.REL_SUCCESS, 0);
|
||||
runner.assertTransferCount(UpdateHive_1_1Table.REL_FAILURE, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple implementation only for testing purposes
|
||||
*/
|
||||
private static class MockHiveConnectionPool extends AbstractControllerService implements Hive_1_1DBCPService {
|
||||
private final String dbLocation;
|
||||
|
||||
private final List<String> executedStatements = new ArrayList<>();
|
||||
|
||||
MockHiveConnectionPool(final String dbLocation) {
|
||||
this.dbLocation = dbLocation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getIdentifier() {
|
||||
return "dbcp";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Connection getConnection() throws ProcessException {
|
||||
try {
|
||||
Connection conn = mock(Connection.class);
|
||||
Statement s = mock(Statement.class);
|
||||
when(conn.createStatement()).thenReturn(s);
|
||||
when(s.executeQuery(anyString())).thenAnswer((Answer<ResultSet>) invocation -> {
|
||||
final String query = (String) invocation.getArguments()[0];
|
||||
if ("SHOW TABLES".equals(query)) {
|
||||
return new MockResultSet(SHOW_TABLES_COLUMN_NAMES, SHOW_TABLES_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `messages`".equals(query)) {
|
||||
return new MockResultSet(DESC_MESSAGES_TABLE_COLUMN_NAMES, DESC_MESSAGES_TABLE_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `users`".equals(query)) {
|
||||
return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_USERS_TABLE_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `ext_users`".equals(query)) {
|
||||
return new MockResultSet(DESC_USERS_TABLE_COLUMN_NAMES, DESC_EXTERNAL_USERS_TABLE_RESULTSET).createResultSet();
|
||||
} else if ("DESC FORMATTED `_newTable`".equals(query)) {
|
||||
return new MockResultSet(DESC_NEW_TABLE_COLUMN_NAMES, DESC_NEW_TABLE_RESULTSET).createResultSet();
|
||||
} else {
|
||||
return new MockResultSet(new String[]{}, new String[][]{new String[]{}}).createResultSet();
|
||||
}
|
||||
});
|
||||
when(s.execute(anyString())).thenAnswer((Answer<Boolean>) invocation -> {
|
||||
executedStatements.add((String) invocation.getArguments()[0]);
|
||||
return false;
|
||||
});
|
||||
return conn;
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
throw new ProcessException("getConnection failed: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getConnectionURL() {
|
||||
return "jdbc:fake:" + dbLocation;
|
||||
}
|
||||
|
||||
List<String> getExecutedStatements() {
|
||||
return executedStatements;
|
||||
}
|
||||
}
|
||||
|
||||
private static class MockResultSet {
|
||||
String[] colNames;
|
||||
String[][] data;
|
||||
int currentRow;
|
||||
|
||||
MockResultSet(String[] colNames, String[][] data) {
|
||||
this.colNames = colNames;
|
||||
this.data = data;
|
||||
currentRow = 0;
|
||||
}
|
||||
|
||||
ResultSet createResultSet() throws SQLException {
|
||||
ResultSet rs = mock(ResultSet.class);
|
||||
when(rs.next()).thenAnswer((Answer<Boolean>) invocation -> (data != null) && (++currentRow <= data.length));
|
||||
when(rs.getString(anyInt())).thenAnswer((Answer<String>) invocation -> {
|
||||
final int index = (int) invocation.getArguments()[0];
|
||||
if (index < 1) {
|
||||
throw new SQLException("Columns start with index 1");
|
||||
}
|
||||
if (currentRow > data.length) {
|
||||
throw new SQLException("This result set is already closed");
|
||||
}
|
||||
return data[currentRow - 1][index - 1];
|
||||
});
|
||||
|
||||
return rs;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
{
|
||||
"namespace" : "org.apache.nifi",
|
||||
"name" : "outer_record",
|
||||
"type" : "record",
|
||||
"fields" : [ {
|
||||
"name" : "records",
|
||||
"type" : {
|
||||
"type" : "array",
|
||||
"items" : {
|
||||
"type" : "record",
|
||||
"name" : "inner_record",
|
||||
"fields" : [ {
|
||||
"name" : "name",
|
||||
"type" : "string"
|
||||
}, {
|
||||
"name" : "age",
|
||||
"type" : "int"
|
||||
} ]
|
||||
}
|
||||
}
|
||||
} ]
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hive</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>kerberos</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authorization</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,22 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hive</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,30 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hive</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hive.server2.authentication</name>
|
||||
<value>KERBEROS</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>kerberos</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,22 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>file:///</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -1,10 +0,0 @@
|
|||
[libdefaults]
|
||||
default_realm = EXAMPLE.COM
|
||||
dns_lookup_kdc = false
|
||||
dns_lookup_realm = false
|
||||
|
||||
[realms]
|
||||
EXAMPLE.COM = {
|
||||
kdc = kerberos.example.com
|
||||
admin_server = kerberos.example.com
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
{"namespace": "example.avro",
|
||||
"type": "record",
|
||||
"name": "User",
|
||||
"fields": [
|
||||
{"name": "name", "type": "string"},
|
||||
{"name": "favorite_number", "type": ["int", "null"]},
|
||||
{"name": "favorite_color", "type": ["string", "null"]},
|
||||
{"name": "scale", "type": ["double", "null"]}
|
||||
]
|
||||
}
|
|
@ -29,10 +29,6 @@
|
|||
<modules>
|
||||
<module>nifi-hive-services-api</module>
|
||||
<module>nifi-hive-services-api-nar</module>
|
||||
<module>nifi-hive-processors</module>
|
||||
<module>nifi-hive-nar</module>
|
||||
<module>nifi-hive_1_1-processors</module>
|
||||
<module>nifi-hive_1_1-nar</module>
|
||||
<module>nifi-hive3-processors</module>
|
||||
<module>nifi-hive3-nar</module>
|
||||
<module>nifi-hive-test-utils</module>
|
||||
|
@ -104,20 +100,10 @@
|
|||
<artifactId>ant</artifactId>
|
||||
<version>1.10.12</version>
|
||||
</dependency>
|
||||
<!-- Override Xerces 2.9.1 in Hive 1.1 and 1.2 -->
|
||||
<dependency>
|
||||
<groupId>xerces</groupId>
|
||||
<artifactId>xercesImpl</artifactId>
|
||||
<version>2.12.2</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<properties>
|
||||
<hive11.version>1.1.1</hive11.version>
|
||||
<hive11.hadoop.version>2.6.2</hive11.hadoop.version>
|
||||
<hive12.version>1.2.2</hive12.version>
|
||||
<hive12.hadoop.version>2.6.2</hive12.hadoop.version>
|
||||
<hive3.version>3.1.3</hive3.version>
|
||||
<hive.version>${hive3.version}</hive.version>
|
||||
<avatica.version>1.22.0</avatica.version>
|
||||
|
|
Loading…
Reference in New Issue