From cdf3c69208ffbaa831b68b9d9eb4621c57c51582 Mon Sep 17 00:00:00 2001 From: Matthew Burgess Date: Tue, 22 Jan 2019 13:34:35 -0500 Subject: [PATCH] NIFI-5967: Add Hive 1.1 processors Removed extra Kerberos properties, added LICENSEs to all Hive NARs, removed unnecessary NOTICE entries --- nifi-assembly/pom.xml | 17 + .../src/main/resources/META-INF/LICENSE | 231 +++++ .../nifi/dbcp/hive/Hive_1_1DBCPService.java | 31 + .../src/main/resources/META-INF/LICENSE | 231 +++++ .../nifi-hive_1_1-nar/pom.xml | 49 + .../src/main/resources/META-INF/LICENSE | 231 +++++ .../src/main/resources/META-INF/NOTICE | 283 ++++++ .../nifi-hive_1_1-processors/pom.xml | 121 +++ .../dbcp/hive/Hive_1_1ConnectionPool.java | 369 ++++++++ .../hive/AbstractHive_1_1QLProcessor.java | 344 +++++++ .../nifi/processors/hive/PutHive_1_1QL.java | 297 +++++++ .../processors/hive/SelectHive_1_1QL.java | 552 ++++++++++++ .../hive/AuthenticationFailedException.java | 23 + .../nifi/util/hive/CsvOutputOptions.java | 63 ++ .../nifi/util/hive/HiveConfigurator.java | 116 +++ .../apache/nifi/util/hive/HiveJdbcCommon.java | 463 ++++++++++ .../org/apache/nifi/util/hive/HiveUtils.java | 53 ++ .../nifi/util/hive/ValidationResources.java | 41 + ...g.apache.nifi.controller.ControllerService | 15 + .../org.apache.nifi.processor.Processor | 16 + .../dbcp/hive/Hive_1_1ConnectionPoolTest.java | 176 ++++ .../nifi/processors/hive/TestHiveParser.java | 292 ++++++ .../processors/hive/TestPutHive_1_1QL.java | 841 ++++++++++++++++++ .../processors/hive/TestSelectHive_1_1QL.java | 658 ++++++++++++++ .../src/test/resources/array_of_records.avsc | 38 + .../src/test/resources/core-site-security.xml | 30 + .../src/test/resources/core-site.xml | 22 + .../src/test/resources/fake.keytab | 0 .../src/test/resources/hive-site-security.xml | 30 + .../src/test/resources/hive-site.xml | 22 + .../src/test/resources/krb5.conf | 10 + .../src/test/resources/user.avsc | 26 + nifi-nar-bundles/nifi-hive-bundle/pom.xml | 4 + 33 files changed, 5695 insertions(+) create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive-nar/src/main/resources/META-INF/LICENSE create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive-services-api/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1DBCPService.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive3-nar/src/main/resources/META-INF/LICENSE create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/pom.xml create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/LICENSE create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/NOTICE create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/pom.xml create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPool.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive_1_1QLProcessor.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/PutHive_1_1QL.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive_1_1QL.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPoolTest.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive_1_1QL.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive_1_1QL.java create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/array_of_records.avsc create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site-security.xml create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site.xml create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/fake.keytab create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site-security.xml create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site.xml create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/krb5.conf create mode 100644 nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/user.avsc diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml index 131c9fd588..50c6f58c76 100755 --- a/nifi-assembly/pom.xml +++ b/nifi-assembly/pom.xml @@ -781,6 +781,23 @@ language governing permissions and limitations under the License. --> + + include-hive1_1 + + + false + + + + org.apache.nifi + nifi-hive_1_1-nar + 1.9.0-SNAPSHOT + nar + + + include-hive3 + + 4.0.0 + + + org.apache.nifi + nifi-hive-bundle + 1.9.0-SNAPSHOT + + + nifi-hive_1_1-nar + 1.9.0-SNAPSHOT + nar + + true + true + + ${hive11.hadoop.version} + + + + + org.apache.nifi + nifi-hive-services-api-nar + 1.9.0-SNAPSHOT + nar + + + org.apache.nifi + nifi-hive_1_1-processors + 1.9.0-SNAPSHOT + + + + diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/LICENSE b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/LICENSE new file mode 100644 index 0000000000..2eac11083a --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/LICENSE @@ -0,0 +1,231 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +APACHE NIFI SUBCOMPONENTS: + +The Apache NiFi project contains subcomponents with separate copyright +notices and license terms. Your use of the source code for the these +subcomponents is subject to the terms and conditions of the following +licenses. + The binary distribution of this product bundles 'Bouncy Castle JDK 1.5' + under an MIT style license. + + Copyright (c) 2000 - 2015 The Legion of the Bouncy Castle Inc. (http://www.bouncycastle.org) + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/NOTICE new file mode 100644 index 0000000000..b02c6f7d08 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-nar/src/main/resources/META-INF/NOTICE @@ -0,0 +1,283 @@ +nifi-hive_1_1-nar +Copyright 2014-2018 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This includes derived works from the Apache Storm (ASLv2 licensed) project (https://github.com/apache/storm): + Copyright 2015 The Apache Software Foundation + The derived work is adapted from + org/apache/storm/hive/common/HiveWriter.java + org/apache/storm/hive/common/HiveOptions.java + and can be found in the org.apache.nifi.util.hive package + +=========================================== +Apache Software License v2 +=========================================== + +The following binary components are provided under the Apache Software License v2 + + (ASLv2) Apache Ant + The following NOTICE information applies: + Apache Ant + Copyright 1999-2016 The Apache Software Foundation + + (ASLv2) Apache Commons Codec + The following NOTICE information applies: + Apache Commons Codec + Copyright 2002-2014 The Apache Software Foundation + + src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java + contains test data from http://aspell.net/test/orig/batch0.tab. + Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) + + =============================================================================== + + The content of package org.apache.commons.codec.language.bm has been translated + from the original php source code available at http://stevemorse.org/phoneticinfo.htm + with permission from the original authors. + Original source copyright: + Copyright (c) 2008 Alexander Beider & Stephen P. Morse. + + (ASLv2) Apache Commons DBCP + The following NOTICE information applies: + Apache Commons DBCP + Copyright 2001-2015 The Apache Software Foundation. + + (ASLv2) Apache HttpComponents + The following NOTICE information applies: + Apache HttpComponents Client + Copyright 1999-2016 The Apache Software Foundation + Apache HttpComponents Core - HttpCore + Copyright 2006-2009 The Apache Software Foundation + + (ASLv2) Apache Commons Logging + The following NOTICE information applies: + Apache Commons Logging + Copyright 2003-2014 The Apache Software Foundation + + (ASLv2) Apache Commons Pool + The following NOTICE information applies: + Apache Commons Pool + Copyright 1999-2009 The Apache Software Foundation. + + (ASLv2) Apache Commons IO + The following NOTICE information applies: + Apache Commons IO + Copyright 2002-2016 The Apache Software Foundation + + (ASLv2) Apache Hive + The following NOTICE information applies: + Apache Hive + Copyright 2008-2015 The Apache Software Foundation + + This product includes software developed by The Apache Software + Foundation (http://www.apache.org/). + + This product includes Jersey (https://jersey.java.net/) + Copyright (c) 2010-2014 Oracle and/or its affiliates. + + This project includes software copyrighted by Microsoft Corporation and + licensed under the Apache License, Version 2.0. + + This project includes software copyrighted by Dell SecureWorks and + licensed under the Apache License, Version 2.0. + + (ASLv2) Jackson JSON processor + The following NOTICE information applies: + # Jackson JSON processor + + Jackson is a high-performance, Free/Open Source JSON processing library. + It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has + been in development since 2007. + It is currently developed by a community of developers, as well as supported + commercially by FasterXML.com. + + ## Licensing + + Jackson core and extension components may licensed under different licenses. + To find the details that apply to this artifact see the accompanying LICENSE file. + For more information, including possible other licensing options, contact + FasterXML.com (http://fasterxml.com). + + ## Credits + + A list of contributors may be found from CREDITS file, which is included + in some artifacts (usually source distributions); but is always available + from the source code management (SCM) system project uses. + + (ASLv2) BoneCP + The following NOTICE information applies: + BoneCP + Copyright 2010 Wallace Wadge + + (ASLv2) Apache Hadoop + The following NOTICE information applies: + The binary distribution of this product bundles binaries of + org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the + following notices: + * Copyright 2011 Dain Sundstrom + * Copyright 2011 FuseSource Corp. http://fusesource.com + + The binary distribution of this product bundles binaries of + org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni), + which has the following notices: + * This product includes software developed by FuseSource Corp. + http://fusesource.com + * This product includes software developed at + Progress Software Corporation and/or its subsidiaries or affiliates. + * This product includes software developed by IBM Corporation and others. + + (ASLv2) Apache Commons Lang + The following NOTICE information applies: + Apache Commons Lang + Copyright 2001-2015 The Apache Software Foundation + + (ASLv2) Apache Curator + The following NOTICE information applies: + Apache Curator + Copyright 2013-2014 The Apache Software Foundation + + (ASLv2) Apache Derby + The following NOTICE information applies: + Apache Derby + Copyright 2004-2014 Apache, Apache DB, Apache Derby, Apache Torque, Apache JDO, Apache DDLUtils, + the Derby hat logo, the Apache JDO logo, and the Apache feather logo are trademarks of The Apache Software Foundation. + + (ASLv2) Apache DS + The following NOTICE information applies: + ApacheDS + Copyright 2003-2015 The Apache Software Foundation + + (ASLv2) Apache Geronimo + The following NOTICE information applies: + Apache Geronimo + Copyright 2003-2008 The Apache Software Foundation + + (ASLv2) HTrace Core + The following NOTICE information applies: + In addition, this product includes software dependencies. See + the accompanying LICENSE.txt for a listing of dependencies + that are NOT Apache licensed (with pointers to their licensing) + + Apache HTrace includes an Apache Thrift connector to Zipkin. Zipkin + is a distributed tracing system that is Apache 2.0 Licensed. + Copyright 2012 Twitter, Inc. + + (ASLv2) Jettison + The following NOTICE information applies: + Copyright 2006 Envoi Solutions LLC + + (ASLv2) Jetty + The following NOTICE information applies: + Jetty Web Container + Copyright 1995-2017 Mort Bay Consulting Pty Ltd. + + (ASLv2) Apache log4j + The following NOTICE information applies: + Apache log4j + Copyright 2007 The Apache Software Foundation + + (ASLv2) Parquet MR + The following NOTICE information applies: + Parquet MR + Copyright 2012 Twitter, Inc. + + This project includes code from https://github.com/lemire/JavaFastPFOR + parquet-column/src/main/java/parquet/column/values/bitpacking/LemireBitPacking.java + Apache License Version 2.0 http://www.apache.org/licenses/. + (c) Daniel Lemire, http://lemire.me/en/ + + (ASLv2) Apache Thrift + The following NOTICE information applies: + Apache Thrift + Copyright 2006-2010 The Apache Software Foundation. + + (ASLv2) Apache Twill + The following NOTICE information applies: + Apache Twill + Copyright 2013-2016 The Apache Software Foundation + + (ASLv2) Dropwizard Metrics + The following NOTICE information applies: + Metrics + Copyright 2010-2013 Coda Hale and Yammer, Inc. + + This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64, + LongAdder), which was released with the following comments: + + Written by Doug Lea with assistance from members of JCP JSR-166 + Expert Group and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ + + (ASLv2) Joda Time + The following NOTICE information applies: + This product includes software developed by + Joda.org (http://www.joda.org/). + + (ASLv2) The Netty Project + The following NOTICE information applies: + The Netty Project + Copyright 2011 The Netty Project + + (ASLv2) Apache Tomcat + The following NOTICE information applies: + Apache Tomcat + Copyright 2007 The Apache Software Foundation + + Java Management Extensions (JMX) support is provided by + the MX4J package, which is open source software. The + original software and related information is available + at http://mx4j.sourceforge.net. + + Java compilation software for JSP pages is provided by Eclipse, + which is open source software. The orginal software and + related infomation is available at + http://www.eclipse.org. + + (ASLv2) Apache ZooKeeper + The following NOTICE information applies: + Apache ZooKeeper + Copyright 2009-2012 The Apache Software Foundation + + (ASLv2) Google GSON + The following NOTICE information applies: + Copyright 2008 Google Inc. + + (ASLv2) JPam + The following NOTICE information applies: + Copyright 2003-2006 Greg Luck + + ************************ + Common Development and Distribution License 1.1 + ************************ + + The following binary components are provided under the Common Development and Distribution License 1.1. See project link for details. + + (CDDL 1.1) (GPL2 w/ CPE) jersey-client (com.sun.jersey:jersey-client:jar:1.9 - https://jersey.java.net) + (CDDL 1.1) (GPL2 w/ CPE) jersey-core (com.sun.jersey:jersey-core:jar:1.9 - https://jersey.java.net/) + (CDDL 1.1) (GPL2 w/ CPE) jersey-json (com.sun.jersey:jersey-json:jar:1.9 - https://jersey.java.net/) + (CDDL 1.1) (GPL2 w/ CPE) jersey-server (com.sun.jersey:jersey-server:jar:1.9 - https://jersey.java.net/) + (CDDL 1.1) (GPL2 w/ CPE) jersey-guice (com.sun.jersey.contribs:jersey-guice:jar:1.9 - https://jersey.java.net/) + (CDDL 1.1) (GPL2 w/ CPE) Java Architecture For XML Binding (javax.xml.bind:jaxb-api:jar:2.2.2 - https://jaxb.dev.java.net/) + (CDDL 1.1) (GPL2 w/ CPE) JavaMail API (compat) (javax.mail:mail:jar:1.4.7 - http://kenai.com/projects/javamail/mail) + + + ************************ + Common Development and Distribution License 1.0 + ************************ + + The following binary components are provided under the Common Development and Distribution License 1.0. See project link for details. + + (CDDL 1.0) JavaServlet(TM) Specification (javax.servlet:servlet-api:jar:2.5 - no url available) + (CDDL 1.0) (GPL3) Streaming API For XML (javax.xml.stream:stax-api:jar:1.0-2 - no url provided) + (CDDL 1.0) JavaBeans Activation Framework (JAF) (javax.activation:activation:jar:1.1 - http://java.sun.com/products/javabeans/jaf/index.jsp) + (CDDL 1.0) JavaServer Pages(TM) API (javax.servlet.jsp:jsp-api:jar:2.1 - http://jsp.java.net) + + ***************** + Public Domain + ***************** + + The following binary components are provided to the 'Public Domain'. See project link for details. + + (Public Domain) AOP Alliance 1.0 (http://aopalliance.sourceforge.net/) diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/pom.xml new file mode 100644 index 0000000000..0f614f3199 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/pom.xml @@ -0,0 +1,121 @@ + + + + 4.0.0 + + + org.apache.nifi + nifi-hive-bundle + 1.9.0-SNAPSHOT + + + nifi-hive_1_1-processors + jar + + + + ${hive11.hadoop.version} + + + + + org.apache.nifi + nifi-api + 1.9.0-SNAPSHOT + + + org.apache.nifi + nifi-processor-utils + 1.9.0-SNAPSHOT + + + org.apache.nifi + nifi-dbcp-service-api + 1.9.0-SNAPSHOT + provided + + + org.apache.nifi + nifi-hive-services-api + 1.9.0-SNAPSHOT + provided + + + org.apache.nifi + nifi-kerberos-credentials-service-api + provided + + + org.apache.hive + hive-jdbc + ${hive11.version} + + + org.json + json + + + + + org.apache.hive.hcatalog + hive-hcatalog-streaming + ${hive11.version} + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hive.hcatalog + hive-hcatalog-core + ${hive11.version} + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + com.google.code.findbugs + jsr305 + + + + + org.apache.nifi + nifi-hadoop-utils + 1.9.0-SNAPSHOT + + + com.github.stephenc.findbugs + findbugs-annotations + 1.3.9-1 + + + org.apache.commons + commons-text + 1.4 + + + org.apache.nifi + nifi-mock + 1.9.0-SNAPSHOT + test + + + diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPool.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPool.java new file mode 100644 index 0000000000..76ff47ae0a --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPool.java @@ -0,0 +1,369 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.dbcp.hive; + + +import org.apache.commons.dbcp.BasicDataSource; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hive.jdbc.HiveDriver; +import org.apache.nifi.annotation.behavior.RequiresInstanceClassLoading; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnDisabled; +import org.apache.nifi.annotation.lifecycle.OnEnabled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.controller.ConfigurationContext; +import org.apache.nifi.hadoop.SecurityUtil; +import org.apache.nifi.kerberos.KerberosCredentialsService; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.util.hive.AuthenticationFailedException; +import org.apache.nifi.util.hive.HiveConfigurator; +import org.apache.nifi.util.hive.HiveUtils; +import org.apache.nifi.util.hive.ValidationResources; + +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.nifi.controller.ControllerServiceInitializationContext; +import org.apache.nifi.expression.ExpressionLanguageScope; + +/** + * Implementation for Database Connection Pooling Service used for Apache Hive 1.1 + * connections. Apache DBCP is used for connection pooling functionality. + */ +@RequiresInstanceClassLoading +@Tags({"hive", "dbcp", "jdbc", "database", "connection", "pooling", "store"}) +@CapabilityDescription("Provides Database Connection Pooling Service for Apache Hive 1.1.x. Connections can be asked from pool and returned after usage.") +public class Hive_1_1ConnectionPool extends AbstractControllerService implements Hive_1_1DBCPService { + private static final String ALLOW_EXPLICIT_KEYTAB = "NIFI_ALLOW_EXPLICIT_KEYTAB"; + + public static final PropertyDescriptor DATABASE_URL = new PropertyDescriptor.Builder() + .name("hive-db-connect-url") + .displayName("Database Connection URL") + .description("A database connection URL used to connect to a database. May contain database system name, host, port, database name and some parameters." + + " The exact syntax of a database connection URL is specified by the Hive documentation. For example, the server principal is often included " + + "as a connection parameter when connecting to a secure Hive server.") + .defaultValue(null) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .required(true) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor HIVE_CONFIGURATION_RESOURCES = new PropertyDescriptor.Builder() + .name("hive-config-resources") + .displayName("Hive Configuration Resources") + .description("A file or comma separated list of files which contains the Hive configuration (hive-site.xml, e.g.). Without this, Hadoop " + + "will search the classpath for a 'hive-site.xml' file or will revert to a default configuration. Note that to enable authentication " + + "with Kerberos e.g., the appropriate properties must be set in the configuration files. Please see the Hive documentation for more details.") + .required(false) + .addValidator(HiveUtils.createMultipleFilesExistValidator()) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor DB_USER = new PropertyDescriptor.Builder() + .name("hive-db-user") + .displayName("Database User") + .description("Database user name") + .defaultValue(null) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor DB_PASSWORD = new PropertyDescriptor.Builder() + .name("hive-db-password") + .displayName("Password") + .description("The password for the database user") + .defaultValue(null) + .required(false) + .sensitive(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor MAX_WAIT_TIME = new PropertyDescriptor.Builder() + .name("hive-max-wait-time") + .displayName("Max Wait Time") + .description("The maximum amount of time that the pool will wait (when there are no available connections) " + + " for a connection to be returned before failing, or -1 to wait indefinitely. ") + .defaultValue("500 millis") + .required(true) + .addValidator(StandardValidators.TIME_PERIOD_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor MAX_TOTAL_CONNECTIONS = new PropertyDescriptor.Builder() + .name("hive-max-total-connections") + .displayName("Max Total Connections") + .description("The maximum number of active connections that can be allocated from this pool at the same time, " + + "or negative for no limit.") + .defaultValue("8") + .required(true) + .addValidator(StandardValidators.INTEGER_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + public static final PropertyDescriptor VALIDATION_QUERY = new PropertyDescriptor.Builder() + .name("Validation-query") + .displayName("Validation query") + .description("Validation query used to validate connections before returning them. " + + "When a borrowed connection is invalid, it gets dropped and a new valid connection will be returned. " + + "NOTE: Using validation may have a performance penalty.") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.VARIABLE_REGISTRY) + .build(); + + static final PropertyDescriptor KERBEROS_CREDENTIALS_SERVICE = new PropertyDescriptor.Builder() + .name("kerberos-credentials-service") + .displayName("Kerberos Credentials Service") + .description("Specifies the Kerberos Credentials Controller Service that should be used for authenticating with Kerberos") + .identifiesControllerService(KerberosCredentialsService.class) + .required(false) + .build(); + + + private List properties; + + private String connectionUrl = "unknown"; + + // Holder of cached Configuration information so validation does not reload the same config over and over + private final AtomicReference validationResourceHolder = new AtomicReference<>(); + + private volatile BasicDataSource dataSource; + + private volatile HiveConfigurator hiveConfigurator = new HiveConfigurator(); + private volatile UserGroupInformation ugi; + + @Override + protected void init(final ControllerServiceInitializationContext context) { + List props = new ArrayList<>(); + props.add(DATABASE_URL); + props.add(HIVE_CONFIGURATION_RESOURCES); + props.add(DB_USER); + props.add(DB_PASSWORD); + props.add(MAX_WAIT_TIME); + props.add(MAX_TOTAL_CONNECTIONS); + props.add(VALIDATION_QUERY); + props.add(KERBEROS_CREDENTIALS_SERVICE); + + properties = props; + } + + @Override + protected List getSupportedPropertyDescriptors() { + return properties; + } + + @Override + protected Collection customValidate(ValidationContext validationContext) { + boolean confFileProvided = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).isSet(); + + final List problems = new ArrayList<>(); + + if (confFileProvided) { + final KerberosCredentialsService credentialsService = validationContext.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); + + final String resolvedPrincipal; + final String resolvedKeytab; + if (credentialsService == null) { + resolvedPrincipal = null; + resolvedKeytab = null; + } else { + resolvedPrincipal = credentialsService.getPrincipal(); + resolvedKeytab = credentialsService.getKeytab(); + } + + final String configFiles = validationContext.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); + problems.addAll(hiveConfigurator.validate(configFiles, resolvedPrincipal, resolvedKeytab, validationResourceHolder, getLogger())); + } + + return problems; + } + + /** + * Configures connection pool by creating an instance of the + * {@link BasicDataSource} based on configuration provided with + * {@link ConfigurationContext}. + *

+ * This operation makes no guarantees that the actual connection could be + * made since the underlying system may still go off-line during normal + * operation of the connection pool. + *

+ * As of Apache NiFi 1.5.0, due to changes made to + * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this class invoking + * {@link HiveConfigurator#authenticate(Configuration, String, String)} + * to authenticate a principal with Kerberos, Hive controller services no longer use a separate thread to + * relogin, and instead call {@link UserGroupInformation#checkTGTAndReloginFromKeytab()} from + * {@link Hive_1_1ConnectionPool#getConnection()}. The relogin request is performed in a synchronized block to prevent + * threads from requesting concurrent relogins. For more information, please read the documentation for + * {@link SecurityUtil#loginKerberos(Configuration, String, String)}. + *

+ * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by + * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive + * controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions + * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same + * {@link UserGroupInformation} instance. One of these threads could leave the + * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state + * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed + * authentication attempts that would leave the Hive controller service in an unrecoverable state. + * + * @see SecurityUtil#loginKerberos(Configuration, String, String) + * @see HiveConfigurator#authenticate(Configuration, String, String) + * @see HiveConfigurator#authenticate(Configuration, String, String, long) + * @param context the configuration context + * @throws InitializationException if unable to create a database connection + */ + @OnEnabled + public void onConfigured(final ConfigurationContext context) throws InitializationException { + + ComponentLog log = getLogger(); + + final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).evaluateAttributeExpressions().getValue(); + final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles); + final String validationQuery = context.getProperty(VALIDATION_QUERY).evaluateAttributeExpressions().getValue(); + + // add any dynamic properties to the Hive configuration + for (final Map.Entry entry : context.getProperties().entrySet()) { + final PropertyDescriptor descriptor = entry.getKey(); + if (descriptor.isDynamic()) { + hiveConfig.set(descriptor.getName(), context.getProperty(descriptor).evaluateAttributeExpressions().getValue()); + } + } + + final String drv = HiveDriver.class.getName(); + if (SecurityUtil.isSecurityEnabled(hiveConfig)) { + final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class); + + final String resolvedPrincipal; + final String resolvedKeytab; + if (credentialsService == null) { + resolvedPrincipal = null; + resolvedKeytab = null; + } else { + resolvedPrincipal = credentialsService.getPrincipal(); + resolvedKeytab = credentialsService.getKeytab(); + } + + log.info("Hive Security Enabled, logging in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab}); + + try { + ugi = hiveConfigurator.authenticate(hiveConfig, resolvedPrincipal, resolvedKeytab); + } catch (AuthenticationFailedException ae) { + log.error(ae.getMessage(), ae); + throw new InitializationException(ae); + } + + getLogger().info("Successfully logged in as principal {} with keytab {}", new Object[] {resolvedPrincipal, resolvedKeytab}); + } + + final String user = context.getProperty(DB_USER).evaluateAttributeExpressions().getValue(); + final String passw = context.getProperty(DB_PASSWORD).evaluateAttributeExpressions().getValue(); + final Long maxWaitMillis = context.getProperty(MAX_WAIT_TIME).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS); + final Integer maxTotal = context.getProperty(MAX_TOTAL_CONNECTIONS).evaluateAttributeExpressions().asInteger(); + + dataSource = new BasicDataSource(); + dataSource.setDriverClassName(drv); + + connectionUrl = context.getProperty(DATABASE_URL).evaluateAttributeExpressions().getValue(); + + dataSource.setMaxWait(maxWaitMillis); + dataSource.setMaxActive(maxTotal); + + if (validationQuery != null && !validationQuery.isEmpty()) { + dataSource.setValidationQuery(validationQuery); + dataSource.setTestOnBorrow(true); + } + + dataSource.setUrl(connectionUrl); + dataSource.setUsername(user); + dataSource.setPassword(passw); + } + + /** + * Shutdown pool, close all open connections. + */ + @OnDisabled + public void shutdown() { + try { + if(dataSource != null) { + dataSource.close(); + } + } catch (final SQLException e) { + throw new ProcessException(e); + } + } + + @Override + public Connection getConnection() throws ProcessException { + try { + if (ugi != null) { + synchronized(this) { + /* + * Make sure that only one thread can request that the UGI relogin at a time. This + * explicit relogin attempt is necessary due to the Hive client/thrift not implicitly handling + * the acquisition of a new TGT after the current one has expired. + * https://issues.apache.org/jira/browse/NIFI-5134 + */ + ugi.checkTGTAndReloginFromKeytab(); + } + try { + return ugi.doAs((PrivilegedExceptionAction) () -> dataSource.getConnection()); + } catch (UndeclaredThrowableException e) { + Throwable cause = e.getCause(); + if (cause instanceof SQLException) { + throw (SQLException) cause; + } else { + throw e; + } + } + } else { + getLogger().info("Simple Authentication"); + return dataSource.getConnection(); + } + } catch (SQLException | IOException | InterruptedException e) { + getLogger().error("Error getting Hive connection", e); + throw new ProcessException(e); + } + } + + @Override + public String toString() { + return "HiveConnectionPool[id=" + getIdentifier() + "]"; + } + + @Override + public String getConnectionURL() { + return connectionUrl; + } + +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive_1_1QLProcessor.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive_1_1QLProcessor.java new file mode 100644 index 0000000000..14b207c572 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/AbstractHive_1_1QLProcessor.java @@ -0,0 +1,344 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.hive; + +import org.antlr.runtime.tree.CommonTree; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.ParseDriver; +import org.apache.hadoop.hive.ql.parse.ParseException; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.AbstractSessionFactoryProcessor; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.stream.io.StreamUtils; + +import java.io.IOException; +import java.io.InputStream; +import java.math.BigDecimal; +import java.nio.charset.Charset; +import java.sql.Date; +import java.sql.PreparedStatement; +import java.sql.SQLDataException; +import java.sql.SQLException; +import java.sql.Time; +import java.sql.Timestamp; +import java.sql.Types; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * An abstract base class for HiveQL processors to share common data, methods, etc. + */ +public abstract class AbstractHive_1_1QLProcessor extends AbstractSessionFactoryProcessor { + + protected static final Pattern HIVEQL_TYPE_ATTRIBUTE_PATTERN = Pattern.compile("hiveql\\.args\\.(\\d+)\\.type"); + protected static final Pattern NUMBER_PATTERN = Pattern.compile("-?\\d+"); + static String ATTR_INPUT_TABLES = "query.input.tables"; + static String ATTR_OUTPUT_TABLES = "query.output.tables"; + + + public static final PropertyDescriptor HIVE_DBCP_SERVICE = new PropertyDescriptor.Builder() + .name("Hive Database Connection Pooling Service") + .description("The Hive Controller Service that is used to obtain connection(s) to the Hive database") + .required(true) + .identifiesControllerService(Hive_1_1DBCPService.class) + .build(); + + public static final PropertyDescriptor CHARSET = new PropertyDescriptor.Builder() + .name("hive-charset") + .displayName("Character Set") + .description("Specifies the character set of the record data.") + .required(true) + .defaultValue("UTF-8") + .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) + .build(); + + /** + * Determines the HiveQL statement that should be executed for the given FlowFile + * + * @param session the session that can be used to access the given FlowFile + * @param flowFile the FlowFile whose HiveQL statement should be executed + * @return the HiveQL that is associated with the given FlowFile + */ + protected String getHiveQL(final ProcessSession session, final FlowFile flowFile, final Charset charset) { + // Read the HiveQL from the FlowFile's content + final byte[] buffer = new byte[(int) flowFile.getSize()]; + session.read(flowFile, new InputStreamCallback() { + @Override + public void process(final InputStream in) throws IOException { + StreamUtils.fillBuffer(in, buffer); + } + }); + + // Create the PreparedStatement to use for this FlowFile. + return new String(buffer, charset); + } + + private class ParameterHolder { + String attributeName; + int jdbcType; + String value; + } + + /** + * Sets all of the appropriate parameters on the given PreparedStatement, based on the given FlowFile attributes. + * + * @param stmt the statement to set the parameters on + * @param attributes the attributes from which to derive parameter indices, values, and types + * @throws SQLException if the PreparedStatement throws a SQLException when the appropriate setter is called + */ + protected int setParameters(int base, final PreparedStatement stmt, int paramCount, final Map attributes) throws SQLException { + + Map parmMap = new TreeMap(); + + for (final Map.Entry entry : attributes.entrySet()) { + final String key = entry.getKey(); + final Matcher matcher = HIVEQL_TYPE_ATTRIBUTE_PATTERN.matcher(key); + if (matcher.matches()) { + final int parameterIndex = Integer.parseInt(matcher.group(1)); + if (parameterIndex >= base && parameterIndex < base + paramCount) { + final boolean isNumeric = NUMBER_PATTERN.matcher(entry.getValue()).matches(); + if (!isNumeric) { + throw new SQLDataException("Value of the " + key + " attribute is '" + entry.getValue() + "', which is not a valid JDBC numeral jdbcType"); + } + + final String valueAttrName = "hiveql.args." + parameterIndex + ".value"; + + ParameterHolder ph = new ParameterHolder(); + int realIndexLoc = parameterIndex - base +1; + + ph.jdbcType = Integer.parseInt(entry.getValue()); + ph.value = attributes.get(valueAttrName); + ph.attributeName = valueAttrName; + + parmMap.put(realIndexLoc, ph); + + } + } + } + + + // Now that's we've retrieved the correct number of parameters and it's sorted, let's set them. + for (final Map.Entry entry : parmMap.entrySet()) { + final Integer index = entry.getKey(); + final ParameterHolder ph = entry.getValue(); + + try { + setParameter(stmt, ph.attributeName, index, ph.value, ph.jdbcType); + } catch (final NumberFormatException nfe) { + throw new SQLDataException("The value of the " + ph.attributeName + " is '" + ph.value + "', which cannot be converted into the necessary data jdbcType", nfe); + } + } + return base + paramCount; + } + + /** + * Determines how to map the given value to the appropriate JDBC data jdbcType and sets the parameter on the + * provided PreparedStatement + * + * @param stmt the PreparedStatement to set the parameter on + * @param attrName the name of the attribute that the parameter is coming from - for logging purposes + * @param parameterIndex the index of the HiveQL parameter to set + * @param parameterValue the value of the HiveQL parameter to set + * @param jdbcType the JDBC Type of the HiveQL parameter to set + * @throws SQLException if the PreparedStatement throws a SQLException when calling the appropriate setter + */ + protected void setParameter(final PreparedStatement stmt, final String attrName, final int parameterIndex, final String parameterValue, final int jdbcType) throws SQLException { + if (parameterValue == null) { + stmt.setNull(parameterIndex, jdbcType); + } else { + try { + switch (jdbcType) { + case Types.BIT: + case Types.BOOLEAN: + stmt.setBoolean(parameterIndex, Boolean.parseBoolean(parameterValue)); + break; + case Types.TINYINT: + stmt.setByte(parameterIndex, Byte.parseByte(parameterValue)); + break; + case Types.SMALLINT: + stmt.setShort(parameterIndex, Short.parseShort(parameterValue)); + break; + case Types.INTEGER: + stmt.setInt(parameterIndex, Integer.parseInt(parameterValue)); + break; + case Types.BIGINT: + stmt.setLong(parameterIndex, Long.parseLong(parameterValue)); + break; + case Types.REAL: + stmt.setFloat(parameterIndex, Float.parseFloat(parameterValue)); + break; + case Types.FLOAT: + case Types.DOUBLE: + stmt.setDouble(parameterIndex, Double.parseDouble(parameterValue)); + break; + case Types.DECIMAL: + case Types.NUMERIC: + stmt.setBigDecimal(parameterIndex, new BigDecimal(parameterValue)); + break; + case Types.DATE: + stmt.setDate(parameterIndex, new Date(Long.parseLong(parameterValue))); + break; + case Types.TIME: + stmt.setTime(parameterIndex, new Time(Long.parseLong(parameterValue))); + break; + case Types.TIMESTAMP: + stmt.setTimestamp(parameterIndex, new Timestamp(Long.parseLong(parameterValue))); + break; + case Types.CHAR: + case Types.VARCHAR: + case Types.LONGNVARCHAR: + case Types.LONGVARCHAR: + stmt.setString(parameterIndex, parameterValue); + break; + default: + stmt.setObject(parameterIndex, parameterValue, jdbcType); + break; + } + } catch (SQLException e) { + // Log which attribute/parameter had an error, then rethrow to be handled at the top level + getLogger().error("Error setting parameter {} to value from {} ({})", new Object[]{parameterIndex, attrName, parameterValue}, e); + throw e; + } + } + } + + protected static class TableName { + private final String database; + private final String table; + private final boolean input; + + TableName(String database, String table, boolean input) { + this.database = database; + this.table = table; + this.input = input; + } + + public String getDatabase() { + return database; + } + + public String getTable() { + return table; + } + + public boolean isInput() { + return input; + } + + @Override + public String toString() { + return database == null || database.isEmpty() ? table : database + '.' + table; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + TableName tableName = (TableName) o; + + if (input != tableName.input) return false; + if (database != null ? !database.equals(tableName.database) : tableName.database != null) return false; + return table.equals(tableName.table); + } + + @Override + public int hashCode() { + int result = database != null ? database.hashCode() : 0; + result = 31 * result + table.hashCode(); + result = 31 * result + (input ? 1 : 0); + return result; + } + } + + protected Set findTableNames(final String query) { + final ASTNode node; + try { + node = new ParseDriver().parse(normalize(query)); + } catch (ParseException e) { + // If failed to parse the query, just log a message, but continue. + getLogger().debug("Failed to parse query: {} due to {}", new Object[]{query, e}, e); + return Collections.emptySet(); + } + + final HashSet tableNames = new HashSet<>(); + findTableNames(node, tableNames); + return tableNames; + } + + /** + * Normalize query. + * Hive resolves prepared statement parameters before executing a query, + * see {@link org.apache.hive.jdbc.HivePreparedStatement#updateSql(String, HashMap)} for detail. + * HiveParser does not expect '?' to be in a query string, and throws an Exception if there is one. + * In this normalize method, '?' is replaced to 'x' to avoid that. + */ + private String normalize(String query) { + return query.replace('?', 'x'); + } + + private void findTableNames(final Object obj, final Set tableNames) { + if (!(obj instanceof CommonTree)) { + return; + } + final CommonTree tree = (CommonTree) obj; + final int childCount = tree.getChildCount(); + if ("TOK_TABNAME".equals(tree.getText())) { + final TableName tableName; + final boolean isInput = "TOK_TABREF".equals(tree.getParent().getText()); + switch (childCount) { + case 1 : + tableName = new TableName(null, tree.getChild(0).getText(), isInput); + break; + case 2: + tableName = new TableName(tree.getChild(0).getText(), tree.getChild(1).getText(), isInput); + break; + default: + throw new IllegalStateException("TOK_TABNAME does not have expected children, childCount=" + childCount); + } + // If parent is TOK_TABREF, then it is an input table. + tableNames.add(tableName); + return; + } + for (int i = 0; i < childCount; i++) { + findTableNames(tree.getChild(i), tableNames); + } + } + + protected Map toQueryTableAttributes(Set tableNames) { + final Map attributes = new HashMap<>(); + for (TableName tableName : tableNames) { + final String attributeName = tableName.isInput() ? ATTR_INPUT_TABLES : ATTR_OUTPUT_TABLES; + if (attributes.containsKey(attributeName)) { + attributes.put(attributeName, attributes.get(attributeName) + "," + tableName); + } else { + attributes.put(attributeName, tableName.toString()); + } + } + return attributes; + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/PutHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/PutHive_1_1QL.java new file mode 100644 index 0000000000..d571789429 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/PutHive_1_1QL.java @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.hive; + +import org.apache.commons.lang3.StringUtils; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; +import org.apache.nifi.annotation.behavior.ReadsAttribute; +import org.apache.nifi.annotation.behavior.ReadsAttributes; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; +import org.apache.nifi.expression.ExpressionLanguageScope; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessSessionFactory; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.processor.util.pattern.ErrorTypes; +import org.apache.nifi.processor.util.pattern.ExceptionHandler; +import org.apache.nifi.processor.util.pattern.ExceptionHandler.OnError; +import org.apache.nifi.processor.util.pattern.PartialFunctions.FetchFlowFiles; +import org.apache.nifi.processor.util.pattern.PartialFunctions.InitConnection; +import org.apache.nifi.processor.util.pattern.Put; +import org.apache.nifi.processor.util.pattern.RollbackOnFailure; +import org.apache.nifi.processor.util.pattern.RoutingResult; + +import java.nio.charset.Charset; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.SQLNonTransientException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; + +@SeeAlso(SelectHive_1_1QL.class) +@InputRequirement(Requirement.INPUT_REQUIRED) +@Tags({"sql", "hive", "put", "database", "update", "insert"}) +@CapabilityDescription("Executes a HiveQL DDL/DML command (UPDATE, INSERT, e.g.). The content of an incoming FlowFile is expected to be the HiveQL command " + + "to execute. The HiveQL command may use the ? to escape parameters. In this case, the parameters to use must exist as FlowFile attributes " + + "with the naming convention hiveql.args.N.type and hiveql.args.N.value, where N is a positive integer. The hiveql.args.N.type is expected to be " + + "a number indicating the JDBC Type. The content of the FlowFile is expected to be in UTF-8 format.") +@ReadsAttributes({ + @ReadsAttribute(attribute = "hiveql.args.N.type", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The type of each Parameter is specified as an integer " + + "that represents the JDBC Type of the parameter."), + @ReadsAttribute(attribute = "hiveql.args.N.value", description = "Incoming FlowFiles are expected to be parametrized HiveQL statements. The value of the Parameters are specified as " + + "hiveql.args.1.value, hiveql.args.2.value, hiveql.args.3.value, and so on. The type of the hiveql.args.1.value Parameter is specified by the hiveql.args.1.type attribute.") +}) +@WritesAttributes({ + @WritesAttribute(attribute = "query.input.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, " + + "and contains input table names (if any) in comma delimited 'databaseName.tableName' format."), + @WritesAttribute(attribute = "query.output.tables", description = "This attribute is written on the flow files routed to the 'success' relationships, " + + "and contains the target table names in 'databaseName.tableName' format.") +}) +public class PutHive_1_1QL extends AbstractHive_1_1QLProcessor { + + public static final PropertyDescriptor BATCH_SIZE = new PropertyDescriptor.Builder() + .name("hive-batch-size") + .displayName("Batch Size") + .description("The preferred number of FlowFiles to put to the database in a single transaction") + .required(true) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .defaultValue("100") + .build(); + + public static final PropertyDescriptor STATEMENT_DELIMITER = new PropertyDescriptor.Builder() + .name("statement-delimiter") + .displayName("Statement Delimiter") + .description("Statement Delimiter used to separate SQL statements in a multiple statement script") + .required(true) + .defaultValue(";") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .build(); + + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("A FlowFile is routed to this relationship after the database is successfully updated") + .build(); + public static final Relationship REL_RETRY = new Relationship.Builder() + .name("retry") + .description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed") + .build(); + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail, " + + "such as an invalid query or an integrity constraint violation") + .build(); + + + private final static List propertyDescriptors; + private final static Set relationships; + + /* + * Will ensure that the list of property descriptors is built only once. + * Will also create a Set of relationships + */ + static { + List _propertyDescriptors = new ArrayList<>(); + _propertyDescriptors.add(HIVE_DBCP_SERVICE); + _propertyDescriptors.add(BATCH_SIZE); + _propertyDescriptors.add(CHARSET); + _propertyDescriptors.add(STATEMENT_DELIMITER); + _propertyDescriptors.add(RollbackOnFailure.ROLLBACK_ON_FAILURE); + propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors); + + Set _relationships = new HashSet<>(); + _relationships.add(REL_SUCCESS); + _relationships.add(REL_FAILURE); + _relationships.add(REL_RETRY); + relationships = Collections.unmodifiableSet(_relationships); + } + + private Put process; + private ExceptionHandler exceptionHandler; + + @OnScheduled + public void constructProcess() { + exceptionHandler = new ExceptionHandler<>(); + exceptionHandler.mapException(e -> { + if (e instanceof SQLNonTransientException) { + return ErrorTypes.InvalidInput; + } else if (e instanceof SQLException) { + // Use the SQLException's vendor code for guidance -- see Hive's ErrorMsg class for details on error codes + int errorCode = ((SQLException) e).getErrorCode(); + getLogger().debug("Error occurred during Hive operation, Hive returned error code {}", new Object[]{errorCode}); + if (errorCode >= 10000 && errorCode < 20000) { + return ErrorTypes.InvalidInput; + } else if (errorCode >= 20000 && errorCode < 30000) { + return ErrorTypes.InvalidInput; + } else if (errorCode >= 30000 && errorCode < 40000) { + return ErrorTypes.TemporalInputFailure; + } else if (errorCode >= 40000 && errorCode < 50000) { + // These are unknown errors (to include some parse errors), but rather than generating an UnknownFailure which causes + // a ProcessException, we'll route to failure via an InvalidInput error type. + return ErrorTypes.InvalidInput; + } else { + // Default unknown errors to TemporalFailure (as they were implemented originally), so they can be routed to failure + // or rolled back depending on the user's setting of Rollback On Failure. + return ErrorTypes.TemporalFailure; + } + } else { + return ErrorTypes.UnknownFailure; + } + }); + exceptionHandler.adjustError(RollbackOnFailure.createAdjustError(getLogger())); + + process = new Put<>(); + process.setLogger(getLogger()); + process.initConnection(initConnection); + process.fetchFlowFiles(fetchFlowFiles); + process.putFlowFile(putFlowFile); + process.adjustRoute(RollbackOnFailure.createAdjustRoute(REL_FAILURE, REL_RETRY)); + } + + @Override + protected List getSupportedPropertyDescriptors() { + return propertyDescriptors; + } + + @Override + public Set getRelationships() { + return relationships; + } + + private class FunctionContext extends RollbackOnFailure { + final Charset charset; + final String statementDelimiter; + final long startNanos = System.nanoTime(); + + String connectionUrl; + + + private FunctionContext(boolean rollbackOnFailure, Charset charset, String statementDelimiter) { + super(rollbackOnFailure, false); + this.charset = charset; + this.statementDelimiter = statementDelimiter; + } + } + + private InitConnection initConnection = (context, session, fc, ff) -> { + final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class); + final Connection connection = dbcpService.getConnection(ff == null ? Collections.emptyMap() : ff.getAttributes()); + fc.connectionUrl = dbcpService.getConnectionURL(); + return connection; + }; + + private FetchFlowFiles fetchFlowFiles = (context, session, functionContext, result) -> { + final int batchSize = context.getProperty(BATCH_SIZE).asInteger(); + return session.get(batchSize); + }; + + private Put.PutFlowFile putFlowFile = (context, session, fc, conn, flowFile, result) -> { + final String script = getHiveQL(session, flowFile, fc.charset); + String regex = "(? tableNames = new HashSet<>(); + exceptionHandler.execute(fc, flowFile, input -> { + int loc = 1; + for (String hiveQLStr: hiveQLs) { + getLogger().debug("HiveQL: {}", new Object[]{hiveQLStr}); + + final String hiveQL = hiveQLStr.trim(); + if (!StringUtils.isEmpty(hiveQL)) { + final PreparedStatement stmt = conn.prepareStatement(hiveQL); + + // Get ParameterMetadata + // Hive JDBC Doesn't support this yet: + // ParameterMetaData pmd = stmt.getParameterMetaData(); + // int paramCount = pmd.getParameterCount(); + int paramCount = StringUtils.countMatches(hiveQL, "?"); + + if (paramCount > 0) { + loc = setParameters(loc, stmt, paramCount, flowFile.getAttributes()); + } + + // Parse hiveQL and extract input/output tables + try { + tableNames.addAll(findTableNames(hiveQL)); + } catch (Exception e) { + // If failed to parse the query, just log a warning message, but continue. + getLogger().warn("Failed to parse hiveQL: {} due to {}", new Object[]{hiveQL, e}, e); + } + + // Execute the statement + stmt.execute(); + fc.proceed(); + } + } + + // Emit a Provenance SEND event + final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - fc.startNanos); + + final FlowFile updatedFlowFile = session.putAllAttributes(flowFile, toQueryTableAttributes(tableNames)); + session.getProvenanceReporter().send(updatedFlowFile, fc.connectionUrl, transmissionMillis, true); + result.routeTo(flowFile, REL_SUCCESS); + + }, onFlowFileError(context, session, result)); + + }; + + private OnError onFlowFileError(final ProcessContext context, final ProcessSession session, final RoutingResult result) { + OnError onFlowFileError = ExceptionHandler.createOnError(context, session, result, REL_FAILURE, REL_RETRY); + onFlowFileError = onFlowFileError.andThen((c, i, r, e) -> { + switch (r.destination()) { + case Failure: + getLogger().error("Failed to update Hive for {} due to {}; routing to failure", new Object[] {i, e}, e); + break; + case Retry: + getLogger().error("Failed to update Hive for {} due to {}; it is possible that retrying the operation will succeed, so routing to retry", + new Object[] {i, e}, e); + break; + case Self: + getLogger().error("Failed to update Hive for {} due to {};", new Object[] {i, e}, e); + break; + } + }); + return RollbackOnFailure.createOnError(onFlowFileError); + } + + @Override + public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { + final Boolean rollbackOnFailure = context.getProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE).asBoolean(); + final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); + final String statementDelimiter = context.getProperty(STATEMENT_DELIMITER).getValue(); + final FunctionContext functionContext = new FunctionContext(rollbackOnFailure, charset, statementDelimiter); + RollbackOnFailure.onTrigger(context, sessionFactory, functionContext, getLogger(), session -> process.onTrigger(context, session, functionContext)); + } +} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive_1_1QL.java new file mode 100644 index 0000000000..d16cfc3c41 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/processors/hive/SelectHive_1_1QL.java @@ -0,0 +1,552 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.hive; + +import java.nio.charset.Charset; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.Statement; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.nifi.annotation.behavior.EventDriven; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; +import org.apache.nifi.expression.ExpressionLanguageScope; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.flowfile.attributes.CoreAttributes; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessSessionFactory; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.processor.util.pattern.PartialFunctions; +import org.apache.nifi.util.StopWatch; +import org.apache.nifi.util.hive.CsvOutputOptions; +import org.apache.nifi.util.hive.HiveJdbcCommon; + +import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO; +import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV; +import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE; +import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY; +import static org.apache.nifi.util.hive.HiveJdbcCommon.NORMALIZE_NAMES_FOR_AVRO; + +@EventDriven +@InputRequirement(Requirement.INPUT_ALLOWED) +@Tags({"hive", "sql", "select", "jdbc", "query", "database"}) +@CapabilityDescription("Execute provided HiveQL SELECT query against a Hive database connection. Query result will be converted to Avro or CSV format." + + " Streaming is used so arbitrarily large result sets are supported. This processor can be scheduled to run on " + + "a timer, or cron expression, using the standard scheduling methods, or it can be triggered by an incoming FlowFile. " + + "If it is triggered by an incoming FlowFile, then attributes of that FlowFile will be available when evaluating the " + + "select query. FlowFile attribute 'selecthiveql.row.count' indicates how many rows were selected.") +@WritesAttributes({ + @WritesAttribute(attribute = "mime.type", description = "Sets the MIME type for the outgoing flowfile to application/avro-binary for Avro or text/csv for CSV."), + @WritesAttribute(attribute = "filename", description = "Adds .avro or .csv to the filename attribute depending on which output format is selected."), + @WritesAttribute(attribute = "selecthiveql.row.count", description = "Indicates how many rows were selected/returned by the query."), + @WritesAttribute(attribute = "fragment.identifier", description = "If 'Max Rows Per Flow File' is set then all FlowFiles from the same query result set " + + "will have the same value for the fragment.identifier attribute. This can then be used to correlate the results."), + @WritesAttribute(attribute = "fragment.count", description = "If 'Max Rows Per Flow File' is set then this is the total number of " + + "FlowFiles produced by a single ResultSet. This can be used in conjunction with the " + + "fragment.identifier attribute in order to know how many FlowFiles belonged to the same incoming ResultSet."), + @WritesAttribute(attribute = "fragment.index", description = "If 'Max Rows Per Flow File' is set then the position of this FlowFile in the list of " + + "outgoing FlowFiles that were all derived from the same result set FlowFile. This can be " + + "used in conjunction with the fragment.identifier attribute to know which FlowFiles originated from the same query result set and in what order " + + "FlowFiles were produced"), + @WritesAttribute(attribute = "query.input.tables", description = "Contains input table names in comma delimited 'databaseName.tableName' format.") +}) +public class SelectHive_1_1QL extends AbstractHive_1_1QLProcessor { + + public static final String RESULT_ROW_COUNT = "selecthiveql.row.count"; + + // Relationships + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("Successfully created FlowFile from HiveQL query result set.") + .build(); + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("HiveQL query execution failed. Incoming FlowFile will be penalized and routed to this relationship.") + .build(); + + + public static final PropertyDescriptor HIVEQL_PRE_QUERY = new PropertyDescriptor.Builder() + .name("hive-pre-query") + .displayName("HiveQL Pre-Query") + .description("A semicolon-delimited list of queries executed before the main SQL query is executed. " + + "Example: 'set tez.queue.name=queue1; set hive.exec.orc.split.strategy=ETL; set hive.exec.reducers.bytes.per.reducer=1073741824'. " + + "Note, the results/outputs of these queries will be suppressed if successfully executed.") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor HIVEQL_SELECT_QUERY = new PropertyDescriptor.Builder() + .name("hive-query") + .displayName("HiveQL Select Query") + .description("HiveQL SELECT query to execute. If this is not set, the query is assumed to be in the content of an incoming FlowFile.") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor HIVEQL_POST_QUERY = new PropertyDescriptor.Builder() + .name("hive-post-query") + .displayName("HiveQL Post-Query") + .description("A semicolon-delimited list of queries executed after the main SQL query is executed. " + + "Note, the results/outputs of these queries will be suppressed if successfully executed.") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor FETCH_SIZE = new PropertyDescriptor.Builder() + .name("hive-fetch-size") + .displayName("Fetch Size") + .description("The number of result rows to be fetched from the result set at a time. This is a hint to the driver and may not be " + + "honored and/or exact. If the value specified is zero, then the hint is ignored.") + .defaultValue("0") + .required(true) + .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor MAX_ROWS_PER_FLOW_FILE = new PropertyDescriptor.Builder() + .name("hive-max-rows") + .displayName("Max Rows Per Flow File") + .description("The maximum number of result rows that will be included in a single FlowFile. " + + "This will allow you to break up very large result sets into multiple FlowFiles. If the value specified is zero, then all rows are returned in a single FlowFile.") + .defaultValue("0") + .required(true) + .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor MAX_FRAGMENTS = new PropertyDescriptor.Builder() + .name("hive-max-frags") + .displayName("Maximum Number of Fragments") + .description("The maximum number of fragments. If the value specified is zero, then all fragments are returned. " + + "This prevents OutOfMemoryError when this processor ingests huge table.") + .defaultValue("0") + .required(true) + .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor HIVEQL_CSV_HEADER = new PropertyDescriptor.Builder() + .name("csv-header") + .displayName("CSV Header") + .description("Include Header in Output") + .required(true) + .allowableValues("true", "false") + .defaultValue("true") + .addValidator(StandardValidators.BOOLEAN_VALIDATOR) + .build(); + + public static final PropertyDescriptor HIVEQL_CSV_ALT_HEADER = new PropertyDescriptor.Builder() + .name("csv-alt-header") + .displayName("Alternate CSV Header") + .description("Comma separated list of header fields") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor HIVEQL_CSV_DELIMITER = new PropertyDescriptor.Builder() + .name("csv-delimiter") + .displayName("CSV Delimiter") + .description("CSV Delimiter used to separate fields") + .required(true) + .defaultValue(",") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor HIVEQL_CSV_QUOTE = new PropertyDescriptor.Builder() + .name("csv-quote") + .displayName("CSV Quote") + .description("Whether to force quoting of CSV fields. Note that this might conflict with the setting for CSV Escape.") + .required(true) + .allowableValues("true", "false") + .defaultValue("true") + .addValidator(StandardValidators.BOOLEAN_VALIDATOR) + .build(); + public static final PropertyDescriptor HIVEQL_CSV_ESCAPE = new PropertyDescriptor.Builder() + .name("csv-escape") + .displayName("CSV Escape") + .description("Whether to escape CSV strings in output. Note that this might conflict with the setting for CSV Quote.") + .required(true) + .allowableValues("true", "false") + .defaultValue("true") + .addValidator(StandardValidators.BOOLEAN_VALIDATOR) + .build(); + + public static final PropertyDescriptor HIVEQL_OUTPUT_FORMAT = new PropertyDescriptor.Builder() + .name("hive-output-format") + .displayName("Output Format") + .description("How to represent the records coming from Hive (Avro, CSV, e.g.)") + .required(true) + .allowableValues(AVRO, CSV) + .defaultValue(AVRO) + .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .build(); + + private final static List propertyDescriptors; + private final static Set relationships; + + /* + * Will ensure that the list of property descriptors is built only once. + * Will also create a Set of relationships + */ + static { + List _propertyDescriptors = new ArrayList<>(); + _propertyDescriptors.add(HIVE_DBCP_SERVICE); + _propertyDescriptors.add(HIVEQL_PRE_QUERY); + _propertyDescriptors.add(HIVEQL_SELECT_QUERY); + _propertyDescriptors.add(HIVEQL_POST_QUERY); + _propertyDescriptors.add(FETCH_SIZE); + _propertyDescriptors.add(MAX_ROWS_PER_FLOW_FILE); + _propertyDescriptors.add(MAX_FRAGMENTS); + _propertyDescriptors.add(HIVEQL_OUTPUT_FORMAT); + _propertyDescriptors.add(NORMALIZE_NAMES_FOR_AVRO); + _propertyDescriptors.add(HIVEQL_CSV_HEADER); + _propertyDescriptors.add(HIVEQL_CSV_ALT_HEADER); + _propertyDescriptors.add(HIVEQL_CSV_DELIMITER); + _propertyDescriptors.add(HIVEQL_CSV_QUOTE); + _propertyDescriptors.add(HIVEQL_CSV_ESCAPE); + _propertyDescriptors.add(CHARSET); + propertyDescriptors = Collections.unmodifiableList(_propertyDescriptors); + + Set _relationships = new HashSet<>(); + _relationships.add(REL_SUCCESS); + _relationships.add(REL_FAILURE); + relationships = Collections.unmodifiableSet(_relationships); + } + + @Override + protected List getSupportedPropertyDescriptors() { + return propertyDescriptors; + } + + @Override + public Set getRelationships() { + return relationships; + } + + @OnScheduled + public void setup(ProcessContext context) { + // If the query is not set, then an incoming flow file is needed. Otherwise fail the initialization + if (!context.getProperty(HIVEQL_SELECT_QUERY).isSet() && !context.hasIncomingConnection()) { + final String errorString = "Either the Select Query must be specified or there must be an incoming connection " + + "providing flowfile(s) containing a SQL select query"; + getLogger().error(errorString); + throw new ProcessException(errorString); + } + } + + @Override + public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { + PartialFunctions.onTrigger(context, sessionFactory, getLogger(), session -> onTrigger(context, session)); + } + + private void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + FlowFile fileToProcess = (context.hasIncomingConnection() ? session.get() : null); + FlowFile flowfile = null; + + // If we have no FlowFile, and all incoming connections are self-loops then we can continue on. + // However, if we have no FlowFile and we have connections coming from other Processors, then + // we know that we should run only if we have a FlowFile. + if (context.hasIncomingConnection()) { + if (fileToProcess == null && context.hasNonLoopConnection()) { + return; + } + } + + final ComponentLog logger = getLogger(); + final Hive_1_1DBCPService dbcpService = context.getProperty(HIVE_DBCP_SERVICE).asControllerService(Hive_1_1DBCPService.class); + final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue()); + + List preQueries = getQueries(context.getProperty(HIVEQL_PRE_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); + List postQueries = getQueries(context.getProperty(HIVEQL_POST_QUERY).evaluateAttributeExpressions(fileToProcess).getValue()); + + final boolean flowbased = !(context.getProperty(HIVEQL_SELECT_QUERY).isSet()); + + // Source the SQL + String hqlStatement; + + if (context.getProperty(HIVEQL_SELECT_QUERY).isSet()) { + hqlStatement = context.getProperty(HIVEQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue(); + } else { + // If the query is not set, then an incoming flow file is required, and expected to contain a valid SQL select query. + // If there is no incoming connection, onTrigger will not be called as the processor will fail when scheduled. + final StringBuilder queryContents = new StringBuilder(); + session.read(fileToProcess, in -> queryContents.append(IOUtils.toString(in, charset))); + hqlStatement = queryContents.toString(); + } + + + final Integer fetchSize = context.getProperty(FETCH_SIZE).evaluateAttributeExpressions(fileToProcess).asInteger(); + final Integer maxRowsPerFlowFile = context.getProperty(MAX_ROWS_PER_FLOW_FILE).evaluateAttributeExpressions(fileToProcess).asInteger(); + final Integer maxFragments = context.getProperty(MAX_FRAGMENTS).isSet() + ? context.getProperty(MAX_FRAGMENTS).evaluateAttributeExpressions(fileToProcess).asInteger() + : 0; + final String outputFormat = context.getProperty(HIVEQL_OUTPUT_FORMAT).getValue(); + final boolean convertNamesForAvro = context.getProperty(NORMALIZE_NAMES_FOR_AVRO).asBoolean(); + final StopWatch stopWatch = new StopWatch(true); + final boolean header = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); + final String altHeader = context.getProperty(HIVEQL_CSV_ALT_HEADER).evaluateAttributeExpressions(fileToProcess).getValue(); + final String delimiter = context.getProperty(HIVEQL_CSV_DELIMITER).evaluateAttributeExpressions(fileToProcess).getValue(); + final boolean quote = context.getProperty(HIVEQL_CSV_QUOTE).asBoolean(); + final boolean escape = context.getProperty(HIVEQL_CSV_HEADER).asBoolean(); + final String fragmentIdentifier = UUID.randomUUID().toString(); + + try (final Connection con = dbcpService.getConnection(fileToProcess == null ? Collections.emptyMap() : fileToProcess.getAttributes()); + final Statement st = (flowbased ? con.prepareStatement(hqlStatement) : con.createStatement()) + ) { + Pair failure = executeConfigStatements(con, preQueries); + if (failure != null) { + // In case of failure, assigning config query to "hqlStatement" to follow current error handling + hqlStatement = failure.getLeft(); + flowfile = (fileToProcess == null) ? session.create() : fileToProcess; + fileToProcess = null; + throw failure.getRight(); + } + if (fetchSize != null && fetchSize > 0) { + try { + st.setFetchSize(fetchSize); + } catch (SQLException se) { + // Not all drivers support this, just log the error (at debug level) and move on + logger.debug("Cannot set fetch size to {} due to {}", new Object[]{fetchSize, se.getLocalizedMessage()}, se); + } + } + + final List resultSetFlowFiles = new ArrayList<>(); + try { + logger.debug("Executing query {}", new Object[]{hqlStatement}); + if (flowbased) { + // Hive JDBC Doesn't Support this yet: + // ParameterMetaData pmd = ((PreparedStatement)st).getParameterMetaData(); + // int paramCount = pmd.getParameterCount(); + + // Alternate way to determine number of params in SQL. + int paramCount = StringUtils.countMatches(hqlStatement, "?"); + + if (paramCount > 0) { + setParameters(1, (PreparedStatement) st, paramCount, fileToProcess.getAttributes()); + } + } + + final ResultSet resultSet; + + try { + resultSet = (flowbased ? ((PreparedStatement) st).executeQuery() : st.executeQuery(hqlStatement)); + } catch (SQLException se) { + // If an error occurs during the query, a flowfile is expected to be routed to failure, so ensure one here + flowfile = (fileToProcess == null) ? session.create() : fileToProcess; + fileToProcess = null; + throw se; + } + + int fragmentIndex = 0; + String baseFilename = (fileToProcess != null) ? fileToProcess.getAttribute(CoreAttributes.FILENAME.key()) : null; + while (true) { + final AtomicLong nrOfRows = new AtomicLong(0L); + flowfile = (fileToProcess == null) ? session.create() : session.create(fileToProcess); + if (baseFilename == null) { + baseFilename = flowfile.getAttribute(CoreAttributes.FILENAME.key()); + } + try { + flowfile = session.write(flowfile, out -> { + try { + if (AVRO.equals(outputFormat)) { + nrOfRows.set(HiveJdbcCommon.convertToAvroStream(resultSet, out, maxRowsPerFlowFile, convertNamesForAvro)); + } else if (CSV.equals(outputFormat)) { + CsvOutputOptions options = new CsvOutputOptions(header, altHeader, delimiter, quote, escape, maxRowsPerFlowFile); + nrOfRows.set(HiveJdbcCommon.convertToCsvStream(resultSet, out, options)); + } else { + nrOfRows.set(0L); + throw new ProcessException("Unsupported output format: " + outputFormat); + } + } catch (final SQLException | RuntimeException e) { + throw new ProcessException("Error during database query or conversion of records.", e); + } + }); + } catch (ProcessException e) { + // Add flowfile to results before rethrowing so it will be removed from session in outer catch + resultSetFlowFiles.add(flowfile); + throw e; + } + + if (nrOfRows.get() > 0 || resultSetFlowFiles.isEmpty()) { + final Map attributes = new HashMap<>(); + // Set attribute for how many rows were selected + attributes.put(RESULT_ROW_COUNT, String.valueOf(nrOfRows.get())); + + try { + // Set input/output table names by parsing the query + attributes.putAll(toQueryTableAttributes(findTableNames(hqlStatement))); + } catch (Exception e) { + // If failed to parse the query, just log a warning message, but continue. + getLogger().warn("Failed to parse query: {} due to {}", new Object[]{hqlStatement, e}, e); + } + + // Set MIME type on output document and add extension to filename + if (AVRO.equals(outputFormat)) { + attributes.put(CoreAttributes.MIME_TYPE.key(), MIME_TYPE_AVRO_BINARY); + attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".avro"); + } else if (CSV.equals(outputFormat)) { + attributes.put(CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE); + attributes.put(CoreAttributes.FILENAME.key(), baseFilename + "." + fragmentIndex + ".csv"); + } + + if (maxRowsPerFlowFile > 0) { + attributes.put("fragment.identifier", fragmentIdentifier); + attributes.put("fragment.index", String.valueOf(fragmentIndex)); + } + + flowfile = session.putAllAttributes(flowfile, attributes); + + logger.info("{} contains {} " + outputFormat + " records; transferring to 'success'", + new Object[]{flowfile, nrOfRows.get()}); + + if (context.hasIncomingConnection()) { + // If the flow file came from an incoming connection, issue a Fetch provenance event + session.getProvenanceReporter().fetch(flowfile, dbcpService.getConnectionURL(), + "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS)); + } else { + // If we created a flow file from rows received from Hive, issue a Receive provenance event + session.getProvenanceReporter().receive(flowfile, dbcpService.getConnectionURL(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); + } + resultSetFlowFiles.add(flowfile); + } else { + // If there were no rows returned (and the first flow file has been sent, we're done processing, so remove the flowfile and carry on + session.remove(flowfile); + if (resultSetFlowFiles != null && resultSetFlowFiles.size()>0) { + flowfile = resultSetFlowFiles.get(resultSetFlowFiles.size()-1); + } + break; + } + + fragmentIndex++; + if (maxFragments > 0 && fragmentIndex >= maxFragments) { + break; + } + } + + for (int i = 0; i < resultSetFlowFiles.size(); i++) { + // Set count on all FlowFiles + if (maxRowsPerFlowFile > 0) { + resultSetFlowFiles.set(i, + session.putAttribute(resultSetFlowFiles.get(i), "fragment.count", Integer.toString(fragmentIndex))); + } + } + + } catch (final SQLException e) { + throw e; + } + + failure = executeConfigStatements(con, postQueries); + if (failure != null) { + hqlStatement = failure.getLeft(); + if (resultSetFlowFiles != null) { + resultSetFlowFiles.forEach(ff -> session.remove(ff)); + } + flowfile = (fileToProcess == null) ? session.create() : fileToProcess; + fileToProcess = null; + throw failure.getRight(); + } + + session.transfer(resultSetFlowFiles, REL_SUCCESS); + if (fileToProcess != null) { + session.remove(fileToProcess); + } + } catch (final ProcessException | SQLException e) { + logger.error("Issue processing SQL {} due to {}.", new Object[]{hqlStatement, e}); + if (flowfile == null) { + // This can happen if any exceptions occur while setting up the connection, statement, etc. + logger.error("Unable to execute HiveQL select query {} due to {}. No FlowFile to route to failure", + new Object[]{hqlStatement, e}); + context.yield(); + } else { + if (context.hasIncomingConnection()) { + logger.error("Unable to execute HiveQL select query {} for {} due to {}; routing to failure", + new Object[]{hqlStatement, flowfile, e}); + flowfile = session.penalize(flowfile); + } else { + logger.error("Unable to execute HiveQL select query {} due to {}; routing to failure", + new Object[]{hqlStatement, e}); + context.yield(); + } + session.transfer(flowfile, REL_FAILURE); + } + } + } + + /* + * Executes given queries using pre-defined connection. + * Returns null on success, or a query string if failed. + */ + protected Pair executeConfigStatements(final Connection con, final List configQueries){ + if (configQueries == null || configQueries.isEmpty()) { + return null; + } + + for (String confSQL : configQueries) { + try(final Statement st = con.createStatement()){ + st.execute(confSQL); + } catch (SQLException e) { + return Pair.of(confSQL, e); + } + } + return null; + } + + protected List getQueries(final String value) { + if (value == null || value.length() == 0 || value.trim().length() == 0) { + return null; + } + final List queries = new LinkedList<>(); + for (String query : value.split(";")) { + if (query.trim().length() > 0) { + queries.add(query.trim()); + } + } + return queries; + } +} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java new file mode 100644 index 0000000000..70cc6c13c4 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/AuthenticationFailedException.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.util.hive; + +public class AuthenticationFailedException extends Exception { + public AuthenticationFailedException(String reason, Exception cause) { + super(reason, cause); + } +} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java new file mode 100644 index 0000000000..36889129f3 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/CsvOutputOptions.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.util.hive; + +public class CsvOutputOptions { + + private boolean header = true; + private String altHeader = null; + private String delimiter = ","; + private boolean quote = false; + private boolean escape = true; + + private int maxRowsPerFlowFile = 0; + + public boolean isHeader() { + return header; + } + + public String getAltHeader() { + return altHeader; + } + + + public String getDelimiter() { + return delimiter; + } + + + public boolean isQuote() { + return quote; + } + + public boolean isEscape() { + return escape; + } + + public int getMaxRowsPerFlowFile() { + return maxRowsPerFlowFile; + } + + public CsvOutputOptions(boolean header, String altHeader, String delimiter, boolean quote, boolean escape, int maxRowsPerFlowFile) { + this.header = header; + this.altHeader = altHeader; + this.delimiter = delimiter; + this.quote = quote; + this.escape = escape; + this.maxRowsPerFlowFile = maxRowsPerFlowFile; + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java new file mode 100644 index 0000000000..a987ff89c0 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveConfigurator.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.util.hive; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.hadoop.KerberosProperties; +import org.apache.nifi.hadoop.SecurityUtil; +import org.apache.nifi.logging.ComponentLog; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +public class HiveConfigurator { + + public Collection validate(String configFiles, String principal, String keyTab, AtomicReference validationResourceHolder, ComponentLog log) { + + final List problems = new ArrayList<>(); + ValidationResources resources = validationResourceHolder.get(); + + // if no resources in the holder, or if the holder has different resources loaded, + // then load the Configuration and set the new resources in the holder + if (resources == null || !configFiles.equals(resources.getConfigResources())) { + log.debug("Reloading validation resources"); + resources = new ValidationResources(configFiles, getConfigurationFromFiles(configFiles)); + validationResourceHolder.set(resources); + } + + final Configuration hiveConfig = resources.getConfiguration(); + + problems.addAll(KerberosProperties.validatePrincipalAndKeytab(this.getClass().getSimpleName(), hiveConfig, principal, keyTab, log)); + + return problems; + } + + public HiveConf getConfigurationFromFiles(final String configFiles) { + final HiveConf hiveConfig = new HiveConf(); + if (StringUtils.isNotBlank(configFiles)) { + for (final String configFile : configFiles.split(",")) { + hiveConfig.addResource(new Path(configFile.trim())); + } + } + return hiveConfig; + } + + public void preload(Configuration configuration) { + try { + FileSystem.get(configuration).close(); + UserGroupInformation.setConfiguration(configuration); + } catch (IOException ioe) { + // Suppress exception as future uses of this configuration will fail + } + } + + /** + * As of Apache NiFi 1.5.0, due to changes made to + * {@link SecurityUtil#loginKerberos(Configuration, String, String)}, which is used by this + * class to authenticate a principal with Kerberos, Hive controller services no longer + * attempt relogins explicitly. For more information, please read the documentation for + * {@link SecurityUtil#loginKerberos(Configuration, String, String)}. + *

+ * In previous versions of NiFi, a {@link org.apache.nifi.hadoop.KerberosTicketRenewer} was started by + * {@link HiveConfigurator#authenticate(Configuration, String, String, long)} when the Hive + * controller service was enabled. The use of a separate thread to explicitly relogin could cause race conditions + * with the implicit relogin attempts made by hadoop/Hive code on a thread that references the same + * {@link UserGroupInformation} instance. One of these threads could leave the + * {@link javax.security.auth.Subject} in {@link UserGroupInformation} to be cleared or in an unexpected state + * while the other thread is attempting to use the {@link javax.security.auth.Subject}, resulting in failed + * authentication attempts that would leave the Hive controller service in an unrecoverable state. + * + * @see SecurityUtil#loginKerberos(Configuration, String, String) + */ + public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab) throws AuthenticationFailedException { + UserGroupInformation ugi; + try { + ugi = SecurityUtil.loginKerberos(hiveConfig, principal, keyTab); + } catch (IOException ioe) { + throw new AuthenticationFailedException("Kerberos Authentication for Hive failed", ioe); + } + return ugi; + } + + /** + * As of Apache NiFi 1.5.0, this method has been deprecated and is now a wrapper + * method which invokes {@link HiveConfigurator#authenticate(Configuration, String, String)}. It will no longer start a + * {@link org.apache.nifi.hadoop.KerberosTicketRenewer} to perform explicit relogins. + * + * @see HiveConfigurator#authenticate(Configuration, String, String) + */ + @Deprecated + public UserGroupInformation authenticate(final Configuration hiveConfig, String principal, String keyTab, long ticketRenewalPeriod) throws AuthenticationFailedException { + return authenticate(hiveConfig, principal, keyTab); + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java new file mode 100644 index 0000000000..9f07b186ff --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveJdbcCommon.java @@ -0,0 +1,463 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.util.hive; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.SchemaBuilder.FieldAssembler; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.apache.commons.text.StringEscapeUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.nifi.components.PropertyDescriptor; + +import java.io.IOException; +import java.io.OutputStream; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.SQLXML; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static java.sql.Types.ARRAY; +import static java.sql.Types.BIGINT; +import static java.sql.Types.BINARY; +import static java.sql.Types.BIT; +import static java.sql.Types.BLOB; +import static java.sql.Types.BOOLEAN; +import static java.sql.Types.CHAR; +import static java.sql.Types.CLOB; +import static java.sql.Types.DATE; +import static java.sql.Types.DECIMAL; +import static java.sql.Types.DOUBLE; +import static java.sql.Types.FLOAT; +import static java.sql.Types.INTEGER; +import static java.sql.Types.JAVA_OBJECT; +import static java.sql.Types.LONGNVARCHAR; +import static java.sql.Types.LONGVARBINARY; +import static java.sql.Types.LONGVARCHAR; +import static java.sql.Types.NCHAR; +import static java.sql.Types.NUMERIC; +import static java.sql.Types.NVARCHAR; +import static java.sql.Types.OTHER; +import static java.sql.Types.REAL; +import static java.sql.Types.ROWID; +import static java.sql.Types.SMALLINT; +import static java.sql.Types.SQLXML; +import static java.sql.Types.STRUCT; +import static java.sql.Types.TIME; +import static java.sql.Types.TIMESTAMP; +import static java.sql.Types.TINYINT; +import static java.sql.Types.VARBINARY; +import static java.sql.Types.VARCHAR; + +/** + * JDBC / HiveQL common functions. + */ +public class HiveJdbcCommon { + + public static final String AVRO = "Avro"; + public static final String CSV = "CSV"; + + public static final String MIME_TYPE_AVRO_BINARY = "application/avro-binary"; + public static final String CSV_MIME_TYPE = "text/csv"; + + + public static final PropertyDescriptor NORMALIZE_NAMES_FOR_AVRO = new PropertyDescriptor.Builder() + .name("hive-normalize-avro") + .displayName("Normalize Table/Column Names") + .description("Whether to change non-Avro-compatible characters in column names to Avro-compatible characters. For example, colons and periods " + + "will be changed to underscores in order to build a valid Avro record.") + .allowableValues("true", "false") + .defaultValue("false") + .required(true) + .build(); + + public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, final int maxRows, boolean convertNames) throws SQLException, IOException { + return convertToAvroStream(rs, outStream, null, maxRows, convertNames, null); + } + + + public static long convertToAvroStream(final ResultSet rs, final OutputStream outStream, String recordName, final int maxRows, boolean convertNames, ResultSetRowCallback callback) + throws SQLException, IOException { + final Schema schema = createSchema(rs, recordName, convertNames); + final GenericRecord rec = new GenericData.Record(schema); + + final DatumWriter datumWriter = new GenericDatumWriter<>(schema); + try (final DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { + dataFileWriter.create(schema, outStream); + + final ResultSetMetaData meta = rs.getMetaData(); + final int nrOfColumns = meta.getColumnCount(); + long nrOfRows = 0; + while (rs.next()) { + if (callback != null) { + callback.processRow(rs); + } + for (int i = 1; i <= nrOfColumns; i++) { + final int javaSqlType = meta.getColumnType(i); + Object value = rs.getObject(i); + + if (value == null) { + rec.put(i - 1, null); + + } else if (javaSqlType == BINARY || javaSqlType == VARBINARY || javaSqlType == LONGVARBINARY || javaSqlType == BLOB || javaSqlType == CLOB) { + // bytes requires little bit different handling + ByteBuffer bb = null; + if (value instanceof byte[]) { + bb = ByteBuffer.wrap((byte[]) value); + } else if (value instanceof ByteBuffer) { + bb = (ByteBuffer) value; + } + if (bb != null) { + rec.put(i - 1, bb); + } else { + throw new IOException("Could not process binary object of type " + value.getClass().getName()); + } + + } else if (value instanceof Byte) { + // tinyint(1) type is returned by JDBC driver as java.sql.Types.TINYINT + // But value is returned by JDBC as java.lang.Byte + // (at least H2 JDBC works this way) + // direct put to avro record results: + // org.apache.avro.AvroRuntimeException: Unknown datum type java.lang.Byte + rec.put(i - 1, ((Byte) value).intValue()); + + } else if (value instanceof BigDecimal || value instanceof BigInteger) { + // Avro can't handle BigDecimal and BigInteger as numbers - it will throw an AvroRuntimeException such as: "Unknown datum type: java.math.BigDecimal: 38" + rec.put(i - 1, value.toString()); + + } else if (value instanceof Number) { + // Need to call the right getXYZ() method (instead of the getObject() method above), since Doubles are sometimes returned + // when the JDBC type is 6 (Float) for example. + if (javaSqlType == FLOAT) { + value = rs.getFloat(i); + } else if (javaSqlType == DOUBLE) { + value = rs.getDouble(i); + } else if (javaSqlType == INTEGER || javaSqlType == TINYINT || javaSqlType == SMALLINT) { + value = rs.getInt(i); + } + + rec.put(i - 1, value); + + } else if (value instanceof Boolean) { + rec.put(i - 1, value); + } else if (value instanceof java.sql.SQLXML) { + rec.put(i - 1, ((java.sql.SQLXML) value).getString()); + } else { + // The different types that we support are numbers (int, long, double, float), + // as well as boolean values and Strings. Since Avro doesn't provide + // timestamp types, we want to convert those to Strings. So we will cast anything other + // than numbers or booleans to strings by using the toString() method. + rec.put(i - 1, value.toString()); + } + } + dataFileWriter.append(rec); + nrOfRows += 1; + + if (maxRows > 0 && nrOfRows == maxRows) + break; + } + + return nrOfRows; + } + } + + public static Schema createSchema(final ResultSet rs, boolean convertNames) throws SQLException { + return createSchema(rs, null, false); + } + + /** + * Creates an Avro schema from a result set. If the table/record name is known a priori and provided, use that as a + * fallback for the record name if it cannot be retrieved from the result set, and finally fall back to a default value. + * + * @param rs The result set to convert to Avro + * @param recordName The a priori record name to use if it cannot be determined from the result set. + * @param convertNames Whether to convert column/table names to be legal Avro names + * @return A Schema object representing the result set converted to an Avro record + * @throws SQLException if any error occurs during conversion + */ + public static Schema createSchema(final ResultSet rs, String recordName, boolean convertNames) throws SQLException { + final ResultSetMetaData meta = rs.getMetaData(); + final int nrOfColumns = meta.getColumnCount(); + String tableName = StringUtils.isEmpty(recordName) ? "NiFi_SelectHiveQL_Record" : recordName; + try { + if (nrOfColumns > 0) { + // Hive JDBC doesn't support getTableName, instead it returns table.column for column name. Grab the table name from the first column + String firstColumnNameFromMeta = meta.getColumnName(1); + int tableNameDelimiter = firstColumnNameFromMeta.lastIndexOf("."); + if (tableNameDelimiter > -1) { + String tableNameFromMeta = firstColumnNameFromMeta.substring(0, tableNameDelimiter); + if (!StringUtils.isBlank(tableNameFromMeta)) { + tableName = tableNameFromMeta; + } + } + } + } catch (SQLException se) { + // Not all drivers support getTableName, so just use the previously-set default + } + + if (convertNames) { + tableName = normalizeNameForAvro(tableName); + } + final FieldAssembler builder = SchemaBuilder.record(tableName).namespace("any.data").fields(); + + /** + * Some missing Avro types - Decimal, Date types. May need some additional work. + */ + for (int i = 1; i <= nrOfColumns; i++) { + String columnNameFromMeta = meta.getColumnName(i); + // Hive returns table.column for column name. Grab the column name as the string after the last period + int columnNameDelimiter = columnNameFromMeta.lastIndexOf("."); + String columnName = columnNameFromMeta.substring(columnNameDelimiter + 1); + switch (meta.getColumnType(i)) { + case CHAR: + case LONGNVARCHAR: + case LONGVARCHAR: + case NCHAR: + case NVARCHAR: + case VARCHAR: + case ARRAY: + case STRUCT: + case JAVA_OBJECT: + case OTHER: + case SQLXML: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); + break; + + case BIT: + case BOOLEAN: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().booleanType().endUnion().noDefault(); + break; + + case INTEGER: + // Default to signed type unless otherwise noted. Some JDBC drivers don't implement isSigned() + boolean signedType = true; + try { + signedType = meta.isSigned(i); + } catch (SQLException se) { + // Use signed types as default + } + if (signedType) { + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault(); + } else { + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault(); + } + break; + + case SMALLINT: + case TINYINT: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().intType().endUnion().noDefault(); + break; + + case BIGINT: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().longType().endUnion().noDefault(); + break; + + // java.sql.RowId is interface, is seems to be database + // implementation specific, let's convert to String + case ROWID: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); + break; + + case FLOAT: + case REAL: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().floatType().endUnion().noDefault(); + break; + + case DOUBLE: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().doubleType().endUnion().noDefault(); + break; + + // Did not find direct suitable type, need to be clarified!!!! + case DECIMAL: + case NUMERIC: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); + break; + + // Did not find direct suitable type, need to be clarified!!!! + case DATE: + case TIME: + case TIMESTAMP: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().stringType().endUnion().noDefault(); + break; + + case BINARY: + case VARBINARY: + case LONGVARBINARY: + case BLOB: + case CLOB: + builder.name(columnName).type().unionOf().nullBuilder().endNull().and().bytesType().endUnion().noDefault(); + break; + + + default: + throw new IllegalArgumentException("createSchema: Unknown SQL type " + meta.getColumnType(i) + " cannot be converted to Avro type"); + } + } + + return builder.endRecord(); + } + + public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, CsvOutputOptions outputOptions) throws SQLException, IOException { + return convertToCsvStream(rs, outStream, null, null, outputOptions); + } + + public static long convertToCsvStream(final ResultSet rs, final OutputStream outStream, String recordName, ResultSetRowCallback callback, CsvOutputOptions outputOptions) + throws SQLException, IOException { + + final ResultSetMetaData meta = rs.getMetaData(); + final int nrOfColumns = meta.getColumnCount(); + List columnNames = new ArrayList<>(nrOfColumns); + + if (outputOptions.isHeader()) { + if (outputOptions.getAltHeader() == null) { + for (int i = 1; i <= nrOfColumns; i++) { + String columnNameFromMeta = meta.getColumnName(i); + // Hive returns table.column for column name. Grab the column name as the string after the last period + int columnNameDelimiter = columnNameFromMeta.lastIndexOf("."); + columnNames.add(columnNameFromMeta.substring(columnNameDelimiter + 1)); + } + } else { + String[] altHeaderNames = outputOptions.getAltHeader().split(","); + columnNames = Arrays.asList(altHeaderNames); + } + } + + // Write column names as header row + outStream.write(StringUtils.join(columnNames, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8)); + if (outputOptions.isHeader()) { + outStream.write("\n".getBytes(StandardCharsets.UTF_8)); + } + + // Iterate over the rows + int maxRows = outputOptions.getMaxRowsPerFlowFile(); + long nrOfRows = 0; + while (rs.next()) { + if (callback != null) { + callback.processRow(rs); + } + List rowValues = new ArrayList<>(nrOfColumns); + for (int i = 1; i <= nrOfColumns; i++) { + final int javaSqlType = meta.getColumnType(i); + final Object value = rs.getObject(i); + + switch (javaSqlType) { + case CHAR: + case LONGNVARCHAR: + case LONGVARCHAR: + case NCHAR: + case NVARCHAR: + case VARCHAR: + String valueString = rs.getString(i); + if (valueString != null) { + // Removed extra quotes as those are a part of the escapeCsv when required. + StringBuilder sb = new StringBuilder(); + if (outputOptions.isQuote()) { + sb.append("\""); + if (outputOptions.isEscape()) { + sb.append(StringEscapeUtils.escapeCsv(valueString)); + } else { + sb.append(valueString); + } + sb.append("\""); + rowValues.add(sb.toString()); + } else { + if (outputOptions.isEscape()) { + rowValues.add(StringEscapeUtils.escapeCsv(valueString)); + } else { + rowValues.add(valueString); + } + } + } else { + rowValues.add(""); + } + break; + case ARRAY: + case STRUCT: + case JAVA_OBJECT: + String complexValueString = rs.getString(i); + if (complexValueString != null) { + rowValues.add(StringEscapeUtils.escapeCsv(complexValueString)); + } else { + rowValues.add(""); + } + break; + case SQLXML: + if (value != null) { + rowValues.add(StringEscapeUtils.escapeCsv(((java.sql.SQLXML) value).getString())); + } else { + rowValues.add(""); + } + default: + if (value != null) { + rowValues.add(value.toString()); + } else { + rowValues.add(""); + } + } + } + // Write row values + outStream.write(StringUtils.join(rowValues, outputOptions.getDelimiter()).getBytes(StandardCharsets.UTF_8)); + outStream.write("\n".getBytes(StandardCharsets.UTF_8)); + nrOfRows++; + + if (maxRows > 0 && nrOfRows == maxRows) + break; + } + return nrOfRows; + } + + public static String normalizeNameForAvro(String inputName) { + String normalizedName = inputName.replaceAll("[^A-Za-z0-9_]", "_"); + if (Character.isDigit(normalizedName.charAt(0))) { + normalizedName = "_" + normalizedName; + } + return normalizedName; + } + + /** + * An interface for callback methods which allows processing of a row during the convertToXYZStream() processing. + * IMPORTANT: This method should only work on the row pointed at by the current ResultSet reference. + * Advancing the cursor (e.g.) can cause rows to be skipped during Avro transformation. + */ + public interface ResultSetRowCallback { + void processRow(ResultSet resultSet) throws IOException; + } + + public static Configuration getConfigurationFromFiles(final String configFiles) { + final Configuration hiveConfig = new HiveConf(); + if (StringUtils.isNotBlank(configFiles)) { + for (final String configFile : configFiles.split(",")) { + hiveConfig.addResource(new Path(configFile.trim())); + } + } + return hiveConfig; + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java new file mode 100644 index 0000000000..48ad60a12c --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/HiveUtils.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.util.hive; + +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.components.Validator; + +import java.io.File; + +public class HiveUtils { + + /** + * Validates that one or more files exist, as specified in a single property. + */ + public static Validator createMultipleFilesExistValidator() { + return (subject, input, context) -> { + if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(input)) { + return new ValidationResult.Builder().subject(subject).input(input).explanation("Expression Language Present").valid(true).build(); + } + final String[] files = input.split("\\s*,\\s*"); + for (String filename : files) { + try { + final File file = new File(filename.trim()); + final boolean valid = file.exists() && file.isFile(); + if (!valid) { + final String message = "File " + file + " does not exist or is not a file"; + return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(message).build(); + } + } catch (SecurityException e) { + final String message = "Unable to access " + filename + " due to " + e.getMessage(); + return new ValidationResult.Builder().subject(subject).input(input).valid(false).explanation(message).build(); + } + } + return new ValidationResult.Builder().subject(subject).input(input).valid(true).build(); + }; + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java new file mode 100644 index 0000000000..1014efb3c2 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/java/org/apache/nifi/util/hive/ValidationResources.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.util.hive; + +import org.apache.hadoop.conf.Configuration; + +/** + * A helper class for maintaining loaded configurations (to avoid reloading on use unless necessary) + */ +public class ValidationResources { + + private final String configResources; + private final Configuration configuration; + + public ValidationResources(String configResources, Configuration configuration) { + this.configResources = configResources; + this.configuration = configuration; + } + + public String getConfigResources() { + return configResources; + } + + public Configuration getConfiguration() { + return configuration; + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService new file mode 100644 index 0000000000..63e4951e43 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +org.apache.nifi.dbcp.hive.Hive_1_1ConnectionPool \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor new file mode 100644 index 0000000000..305fce9fc7 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +org.apache.nifi.processors.hive.SelectHive_1_1QL +org.apache.nifi.processors.hive.PutHive_1_1QL diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPoolTest.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPoolTest.java new file mode 100644 index 0000000000..8bafaa5c6b --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/dbcp/hive/Hive_1_1ConnectionPoolTest.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.dbcp.hive; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.isA; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.dbcp.BasicDataSource; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.hadoop.KerberosProperties; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.registry.VariableDescriptor; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.util.MockConfigurationContext; +import org.apache.nifi.util.MockVariableRegistry; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +public class Hive_1_1ConnectionPoolTest { + private UserGroupInformation userGroupInformation; + private Hive_1_1ConnectionPool hiveConnectionPool; + private BasicDataSource basicDataSource; + private ComponentLog componentLog; + private File krb5conf = new File("src/test/resources/krb5.conf"); + + @Before + public void setup() throws Exception { + // have to initialize this system property before anything else + System.setProperty("java.security.krb5.conf", krb5conf.getAbsolutePath()); + System.setProperty("java.security.krb5.realm", "nifi.com"); + System.setProperty("java.security.krb5.kdc", "nifi.kdc"); + + userGroupInformation = mock(UserGroupInformation.class); + basicDataSource = mock(BasicDataSource.class); + componentLog = mock(ComponentLog.class); + + when(userGroupInformation.doAs(isA(PrivilegedExceptionAction.class))).thenAnswer(invocation -> { + try { + return ((PrivilegedExceptionAction) invocation.getArguments()[0]).run(); + } catch (IOException | Error | RuntimeException | InterruptedException e) { + throw e; + } catch (Throwable e) { + throw new UndeclaredThrowableException(e); + } + }); + + initPool(); + } + + private void initPool() throws Exception { + hiveConnectionPool = new Hive_1_1ConnectionPool(); + + Field ugiField = Hive_1_1ConnectionPool.class.getDeclaredField("ugi"); + ugiField.setAccessible(true); + ugiField.set(hiveConnectionPool, userGroupInformation); + + Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource"); + dataSourceField.setAccessible(true); + dataSourceField.set(hiveConnectionPool, basicDataSource); + + Field componentLogField = AbstractControllerService.class.getDeclaredField("logger"); + componentLogField.setAccessible(true); + componentLogField.set(hiveConnectionPool, componentLog); + } + + @Test(expected = ProcessException.class) + public void testGetConnectionSqlException() throws SQLException { + SQLException sqlException = new SQLException("bad sql"); + when(basicDataSource.getConnection()).thenThrow(sqlException); + try { + hiveConnectionPool.getConnection(); + } catch (ProcessException e) { + assertEquals(sqlException, e.getCause()); + throw e; + } + } + + @Test + public void testExpressionLanguageSupport() throws Exception { + final String URL = "jdbc:hive2://localhost:10000/default"; + final String USER = "user"; + final String PASS = "pass"; + final int MAX_CONN = 7; + final String MAX_WAIT = "10 sec"; // 10000 milliseconds + final String CONF = "/path/to/hive-site.xml"; + hiveConnectionPool = new Hive_1_1ConnectionPool(); + + Map props = new HashMap() {{ + put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}"); + put(Hive_1_1ConnectionPool.DB_USER, "${username}"); + put(Hive_1_1ConnectionPool.DB_PASSWORD, "${password}"); + put(Hive_1_1ConnectionPool.MAX_TOTAL_CONNECTIONS, "${maxconn}"); + put(Hive_1_1ConnectionPool.MAX_WAIT_TIME, "${maxwait}"); + put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${hiveconf}"); + }}; + + MockVariableRegistry registry = new MockVariableRegistry(); + registry.setVariable(new VariableDescriptor("url"), URL); + registry.setVariable(new VariableDescriptor("username"), USER); + registry.setVariable(new VariableDescriptor("password"), PASS); + registry.setVariable(new VariableDescriptor("maxconn"), Integer.toString(MAX_CONN)); + registry.setVariable(new VariableDescriptor("maxwait"), MAX_WAIT); + registry.setVariable(new VariableDescriptor("hiveconf"), CONF); + + + MockConfigurationContext context = new MockConfigurationContext(props, null, registry); + hiveConnectionPool.onConfigured(context); + + Field dataSourceField = Hive_1_1ConnectionPool.class.getDeclaredField("dataSource"); + dataSourceField.setAccessible(true); + basicDataSource = (BasicDataSource) dataSourceField.get(hiveConnectionPool); + assertEquals(URL, basicDataSource.getUrl()); + assertEquals(USER, basicDataSource.getUsername()); + assertEquals(PASS, basicDataSource.getPassword()); + assertEquals(MAX_CONN, basicDataSource.getMaxActive()); + assertEquals(10000L, basicDataSource.getMaxWait()); + assertEquals(URL, hiveConnectionPool.getConnectionURL()); + } + + @Ignore("Kerberos does not seem to be properly handled in Travis build, but, locally, this test should successfully run") + @Test(expected = InitializationException.class) + public void testKerberosAuthException() throws Exception { + final String URL = "jdbc:hive2://localhost:10000/default"; + final String conf = "src/test/resources/hive-site-security.xml"; + final String ktab = "src/test/resources/fake.keytab"; + final String kprinc = "bad@PRINCIPAL.COM"; + + KerberosProperties kerbProperties = new KerberosProperties(krb5conf); + + Map props = new HashMap() {{ + put(Hive_1_1ConnectionPool.DATABASE_URL, "${url}"); + put(Hive_1_1ConnectionPool.HIVE_CONFIGURATION_RESOURCES, "${conf}"); + put(kerbProperties.getKerberosKeytab(), "${ktab}"); + put(kerbProperties.getKerberosPrincipal(), "${kprinc}"); + }}; + + MockVariableRegistry registry = new MockVariableRegistry(); + registry.setVariable(new VariableDescriptor("url"), URL); + registry.setVariable(new VariableDescriptor("conf"), conf); + registry.setVariable(new VariableDescriptor("ktab"), ktab); + registry.setVariable(new VariableDescriptor("kprinc"), kprinc); + + MockConfigurationContext context = new MockConfigurationContext(props, null, registry); + hiveConnectionPool.onConfigured(context); + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java new file mode 100644 index 0000000000..4f5dcf4a56 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestHiveParser.java @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.hive; + +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSessionFactory; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.util.MockProcessContext; +import org.apache.nifi.util.MockProcessorInitializationContext; +import org.junit.Before; +import org.junit.Test; + +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestHiveParser extends AbstractHive_1_1QLProcessor { + + @Before + public void initialize() { + final MockProcessContext processContext = new MockProcessContext(this); + final ProcessorInitializationContext initializationContext = new MockProcessorInitializationContext(this, processContext); + initialize(initializationContext); + } + + @Override + public void onTrigger(ProcessContext context, ProcessSessionFactory sessionFactory) throws ProcessException { + + } + + @Test + public void parseSelect() { + String query = "select a.empid, to_something(b.saraly) from " + + "company.emp a inner join default.salary b where a.empid = b.empid"; + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(2, tableNames.size()); + assertTrue(tableNames.contains(new TableName("company", "emp", true))); + assertTrue(tableNames.contains(new TableName("default", "salary", true))); + } + + @Test + public void parseSelectPrepared() { + String query = "select empid from company.emp a where a.firstName = ?"; + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(1, tableNames.size()); + assertTrue(tableNames.contains(new TableName("company", "emp", true))); + } + + + @Test + public void parseLongSelect() { + String query = "select\n" + + "\n" + + " i_item_id,\n" + + "\n" + + " i_item_desc,\n" + + "\n" + + " s_state,\n" + + "\n" + + " count(ss_quantity) as store_sales_quantitycount,\n" + + "\n" + + " avg(ss_quantity) as store_sales_quantityave,\n" + + "\n" + + " stddev_samp(ss_quantity) as store_sales_quantitystdev,\n" + + "\n" + + " stddev_samp(ss_quantity) / avg(ss_quantity) as store_sales_quantitycov,\n" + + "\n" + + " count(sr_return_quantity) as store_returns_quantitycount,\n" + + "\n" + + " avg(sr_return_quantity) as store_returns_quantityave,\n" + + "\n" + + " stddev_samp(sr_return_quantity) as store_returns_quantitystdev,\n" + + "\n" + + " stddev_samp(sr_return_quantity) / avg(sr_return_quantity) as store_returns_quantitycov,\n" + + "\n" + + " count(cs_quantity) as catalog_sales_quantitycount,\n" + + "\n" + + " avg(cs_quantity) as catalog_sales_quantityave,\n" + + "\n" + + " stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitystdev,\n" + + "\n" + + " stddev_samp(cs_quantity) / avg(cs_quantity) as catalog_sales_quantitycov\n" + + "\n" + + "from\n" + + "\n" + + " store_sales,\n" + + "\n" + + " store_returns,\n" + + "\n" + + " catalog_sales,\n" + + "\n" + + " date_dim d1,\n" + + "\n" + + " date_dim d2,\n" + + "\n" + + " date_dim d3,\n" + + "\n" + + " store,\n" + + "\n" + + " item\n" + + "\n" + + "where\n" + + "\n" + + " d1.d_quarter_name = '2000Q1'\n" + + "\n" + + " and d1.d_date_sk = ss_sold_date_sk\n" + + "\n" + + " and i_item_sk = ss_item_sk\n" + + "\n" + + " and s_store_sk = ss_store_sk\n" + + "\n" + + " and ss_customer_sk = sr_customer_sk\n" + + "\n" + + " and ss_item_sk = sr_item_sk\n" + + "\n" + + " and ss_ticket_number = sr_ticket_number\n" + + "\n" + + " and sr_returned_date_sk = d2.d_date_sk\n" + + "\n" + + " and d2.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" + + "\n" + + " and sr_customer_sk = cs_bill_customer_sk\n" + + "\n" + + " and sr_item_sk = cs_item_sk\n" + + "\n" + + " and cs_sold_date_sk = d3.d_date_sk\n" + + "\n" + + " and d3.d_quarter_name in ('2000Q1' , '2000Q2', '2000Q3')\n" + + "\n" + + "group by i_item_id , i_item_desc , s_state\n" + + "\n" + + "order by i_item_id , i_item_desc , s_state\n" + + "\n" + + "limit 100"; + + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(6, tableNames.size()); + AtomicInteger cnt = new AtomicInteger(0); + for (TableName tableName : tableNames) { + if (tableName.equals(new TableName(null, "store_sales", true))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName(null, "store_returns", true))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName(null, "catalog_sales", true))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName(null, "date_dim", true))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName(null, "store", true))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName(null, "item", true))) { + cnt.incrementAndGet(); + } + } + assertEquals(6, cnt.get()); + } + + @Test + public void parseSelectInsert() { + String query = "insert into databaseA.tableA select key, max(value) from databaseA.tableA where category = 'x'"; + + // The same database.tableName can appear two times for input and output. + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(2, tableNames.size()); + AtomicInteger cnt = new AtomicInteger(0); + tableNames.forEach(tableName -> { + if (tableName.equals(new TableName("databaseA", "tableA", false))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName("databaseA", "tableA", true))) { + cnt.incrementAndGet(); + } + }); + assertEquals(2, cnt.get()); + } + + @Test + public void parseInsert() { + String query = "insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id"; + + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(3, tableNames.size()); + AtomicInteger cnt = new AtomicInteger(0); + tableNames.forEach(tableName -> { + if (tableName.equals(new TableName("databaseB", "tableB1", false))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName(null, "tableA1", true))) { + cnt.incrementAndGet(); + } else if (tableName.equals(new TableName(null, "tableA2", true))) { + cnt.incrementAndGet(); + } + }); + assertEquals(3, cnt.get()); + } + + @Test + public void parseUpdate() { + String query = "update table_a set y = 'updated' where x > 100"; + + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(1, tableNames.size()); + assertTrue(tableNames.contains(new TableName(null, "table_a", false))); + } + + @Test + public void parseDelete() { + String query = "delete from table_a where x > 100"; + + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(1, tableNames.size()); + assertTrue(tableNames.contains(new TableName(null, "table_a", false))); + } + + @Test + public void parseDDL() { + String query = "CREATE TABLE IF NOT EXISTS EMPLOYEES(\n" + + "EmployeeID INT,FirstName STRING, Title STRING,\n" + + "State STRING, Laptop STRING)\n" + + "COMMENT 'Employee Names'\n" + + "STORED AS ORC"; + + + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(1, tableNames.size()); + assertTrue(tableNames.contains(new TableName(null, "EMPLOYEES", false))); + } + + @Test + public void parseSetProperty() { + String query = " set 'hive.exec.dynamic.partition.mode'=nonstrict"; + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(0, tableNames.size()); + } + + @Test + public void parseSetRole() { + String query = "set role all"; + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(0, tableNames.size()); + } + + @Test + public void parseShowRoles() { + String query = "show roles"; + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(0, tableNames.size()); + } + + @Test + public void parseMsck() { + String query = "msck repair table table_a"; + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(1, tableNames.size()); + assertTrue(tableNames.contains(new TableName(null, "table_a", false))); + } + + @Test + public void parseAddJar() { + String query = "ADD JAR hdfs:///tmp/my_jar.jar"; + final Set tableNames = findTableNames(query); + System.out.printf("tableNames=%s\n", tableNames); + assertEquals(0, tableNames.size()); + } + +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive_1_1QL.java new file mode 100644 index 0000000000..13c763a159 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestPutHive_1_1QL.java @@ -0,0 +1,841 @@ +package org.apache.nifi.processors.hive;/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.dbcp.DBCPService; +import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.pattern.RollbackOnFailure; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; + +import java.io.File; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class TestPutHive_1_1QL { + private static final String createPersons = "CREATE TABLE PERSONS (id integer primary key, name varchar(100), code integer)"; + private static final String createPersonsAutoId = "CREATE TABLE PERSONS (id INTEGER NOT NULL GENERATED ALWAYS AS IDENTITY (START WITH 1), name VARCHAR(100), code INTEGER check(code <= 100))"; + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + @BeforeClass + public static void setup() { + System.setProperty("derby.stream.error.file", "target/derby.log"); + } + + @Test + public void testDirectStatements() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersons); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (1, 'Mark', 84)".getBytes()); + runner.run(); + + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("Mark", rs.getString(2)); + assertEquals(84, rs.getInt(3)); + assertFalse(rs.next()); + } + } + + runner.enqueue("UPDATE PERSONS SET NAME='George' WHERE ID=1".getBytes()); + runner.run(); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("George", rs.getString(2)); + assertEquals(84, rs.getInt(3)); + assertFalse(rs.next()); + } + } + } + + @Test + public void testFailInMiddleWithBadStatementRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersonsAutoId); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', 84)".getBytes()); + runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax + runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); + runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); + runner.run(); + + // The 1st one should be routed to success, others should stay in queue. + assertEquals(3, runner.getQueueSize().getObjectCount()); + runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0); + runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 1); + } + + @Test + public void testFailAtBeginning() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersonsAutoId); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax + runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); + runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); + runner.run(); + + runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); + runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 2); + } + + @Test + public void testFailAtBeginningRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersonsAutoId); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + runner.enqueue("INSERT INTO PERSONS".getBytes()); // intentionally wrong syntax + runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Tom', 3)".getBytes()); + runner.enqueue("INSERT INTO PERSONS (NAME, CODE) VALUES ('Harry', 44)".getBytes()); + try { + runner.run(); + fail("ProcessException should be thrown"); + } catch (AssertionError e) { + assertTrue(e.getCause() instanceof ProcessException); + } + + assertEquals(3, runner.getQueueSize().getObjectCount()); + runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 0); + runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 0); + } + + @Test + public void testFailInMiddleWithBadParameterType() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersonsAutoId); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final Map goodAttributes = new HashMap<>(); + goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + goodAttributes.put("hiveql.args.1.value", "84"); + + final Map badAttributes = new HashMap<>(); + badAttributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR)); + badAttributes.put("hiveql.args.1.value", "hello"); + + final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); + runner.enqueue(data, goodAttributes); + runner.enqueue(data, badAttributes); + runner.enqueue(data, goodAttributes); + runner.enqueue(data, goodAttributes); + runner.run(); + + runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); + runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3); + } + + + @Test + public void testFailInMiddleWithBadParameterValue() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersonsAutoId); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final Map goodAttributes = new HashMap<>(); + goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + goodAttributes.put("hiveql.args.1.value", "84"); + + final Map badAttributes = new HashMap<>(); + badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + badAttributes.put("hiveql.args.1.value", "101"); // Constraint violation, up to 100 + + final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); + runner.enqueue(data, goodAttributes); + runner.enqueue(data, badAttributes); + runner.enqueue(data, goodAttributes); + runner.enqueue(data, goodAttributes); + runner.run(); + + runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3); + runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("Mark", rs.getString(2)); + assertEquals(84, rs.getInt(3)); + assertTrue(rs.next()); + assertTrue(rs.next()); + assertFalse(rs.next()); + } + } + } + + @Test + public void testFailInMiddleWithBadNumberFormat() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersonsAutoId); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final Map goodAttributes = new HashMap<>(); + goodAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + goodAttributes.put("hiveql.args.1.value", "84"); + + final Map badAttributes = new HashMap<>(); + badAttributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + badAttributes.put("hiveql.args.1.value", "NOT_NUMBER"); + + final byte[] data = "INSERT INTO PERSONS (NAME, CODE) VALUES ('Mark', ?)".getBytes(); + runner.enqueue(data, goodAttributes); + runner.enqueue(data, badAttributes); + runner.enqueue(data, goodAttributes); + runner.enqueue(data, goodAttributes); + runner.run(); + + runner.assertTransferCount(PutHive_1_1QL.REL_SUCCESS, 3); + runner.assertTransferCount(PutHive_1_1QL.REL_FAILURE, 1); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("Mark", rs.getString(2)); + assertEquals(84, rs.getInt(3)); + assertTrue(rs.next()); + assertTrue(rs.next()); + assertFalse(rs.next()); + } + } + } + + + @Test + public void testUsingSqlDataTypesWithNegativeValues() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate("CREATE TABLE PERSONS (id integer primary key, name varchar(100), code bigint)"); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", "-5"); + attributes.put("hiveql.args.1.value", "84"); + runner.enqueue("INSERT INTO PERSONS VALUES (1, 'Mark', ?)".getBytes(), attributes); + runner.run(); + + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); + runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS"); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("Mark", rs.getString(2)); + assertEquals(84, rs.getInt(3)); + assertFalse(rs.next()); + } + } + } + + @Test + public void testStatementsWithPreparedParameters() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersons); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes); + runner.run(); + + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("Mark", rs.getString(2)); + assertEquals(84, rs.getInt(3)); + assertFalse(rs.next()); + } + } + + runner.clearTransferState(); + + attributes.clear(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.1.value", "George"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.2.value", "1"); + + runner.enqueue("UPDATE PERSONS SET NAME=? WHERE ID=?".getBytes(), attributes); + runner.run(); + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("George", rs.getString(2)); + assertEquals(84, rs.getInt(3)); + assertFalse(rs.next()); + } + } + } + + + @Test + public void testMultipleStatementsWithinFlowFile() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersons); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + + "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.4.value", "1"); + + runner.enqueue(sql.getBytes(), attributes); + runner.run(); + + // should fail because of the semicolon + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); + runner.getFlowFilesForRelationship(PutHive_1_1QL.REL_SUCCESS) + .forEach(f -> f.assertAttributeEquals(PutHive_1_1QL.ATTR_OUTPUT_TABLES, "PERSONS")); + + // Now we can check that the values were inserted by the multi-statement script. + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals("Record ID mismatch", 1, rs.getInt(1)); + assertEquals("Record NAME mismatch", "George", rs.getString(2)); + } + } + } + + @Test + public void testMultipleStatementsWithinFlowFilePlusEmbeddedDelimiter() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersons); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + + "UPDATE PERSONS SET NAME='George\\;' WHERE ID=?; "; + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.4.value", "1"); + + runner.enqueue(sql.getBytes(), attributes); + runner.run(); + + // should fail because of the semicolon + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); + + // Now we can check that the values were inserted by the multi-statement script. + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals("Record ID mismatch", 1, rs.getInt(1)); + assertEquals("Record NAME mismatch", "George\\;", rs.getString(2)); + } + } + } + + + @Test + public void testWithNullParameter() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersons); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + + runner.enqueue("INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?)".getBytes(), attributes); + runner.run(); + + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_SUCCESS, 1); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + assertEquals(1, rs.getInt(1)); + assertEquals("Mark", rs.getString(2)); + assertEquals(0, rs.getInt(3)); + assertFalse(rs.next()); + } + } + } + + @Test + public void testInvalidStatement() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final File tempDir = folder.getRoot(); + final File dbDir = new File(tempDir, "db"); + final DBCPService service = new MockDBCPService(dbDir.getAbsolutePath()); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + stmt.executeUpdate(createPersons); + } + } + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + + "UPDATE SOME_RANDOM_TABLE NAME='George' WHERE ID=?; "; + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.4.value", "1"); + + runner.enqueue(sql.getBytes(), attributes); + runner.run(); + + // should fail because of the table is invalid + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_FAILURE, 1); + + try (final Connection conn = service.getConnection()) { + try (final Statement stmt = conn.createStatement()) { + final ResultSet rs = stmt.executeQuery("SELECT * FROM PERSONS"); + assertTrue(rs.next()); + } + } + } + + + @Test + public void testRetryableFailure() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final DBCPService service = new SQLExceptionService(null); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + + "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; + + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.4.value", "1"); + + runner.enqueue(sql.getBytes(), attributes); + runner.run(); + + // should fail because there isn't a valid connection and tables don't exist. + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1); + } + + @Test + public void testRetryableFailureRollbackOnFailure() throws InitializationException, ProcessException, SQLException, IOException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final DBCPService service = new SQLExceptionService(null); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); + + final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + + "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; + + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.4.value", "1"); + + runner.enqueue(sql.getBytes(), attributes); + try { + runner.run(); + fail("Should throw ProcessException"); + } catch (AssertionError e) { + assertTrue(e.getCause() instanceof ProcessException); + } + + assertEquals(1, runner.getQueueSize().getObjectCount()); + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0); + } + + @Test + public void testUnknownFailure() throws InitializationException, ProcessException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final SQLExceptionService service = new SQLExceptionService(null); + service.setErrorCode(2); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + + final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + + "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; + + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.4.value", "1"); + + runner.enqueue(sql.getBytes(), attributes); + runner.run(); + + // should fail because there isn't a valid connection and tables don't exist. + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 1); + } + + @Test + public void testUnknownFailureRollbackOnFailure() throws InitializationException, ProcessException { + final TestRunner runner = TestRunners.newTestRunner(PutHive_1_1QL.class); + final SQLExceptionService service = new SQLExceptionService(null); + service.setErrorCode(0); + runner.addControllerService("dbcp", service); + runner.enableControllerService(service); + + runner.setProperty(PutHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + runner.setProperty(RollbackOnFailure.ROLLBACK_ON_FAILURE, "true"); + + final String sql = "INSERT INTO PERSONS (ID, NAME, CODE) VALUES (?, ?, ?); " + + "UPDATE PERSONS SET NAME='George' WHERE ID=?; "; + + final Map attributes = new HashMap<>(); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.1.value", "1"); + + attributes.put("hiveql.args.2.type", String.valueOf(Types.VARCHAR)); + attributes.put("hiveql.args.2.value", "Mark"); + + attributes.put("hiveql.args.3.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.3.value", "84"); + + attributes.put("hiveql.args.4.type", String.valueOf(Types.INTEGER)); + attributes.put("hiveql.args.4.value", "1"); + + runner.enqueue(sql.getBytes(), attributes); + try { + runner.run(); + fail("Should throw ProcessException"); + } catch (AssertionError e) { + assertTrue(e.getCause() instanceof ProcessException); + } + + assertEquals(1, runner.getQueueSize().getObjectCount()); + runner.assertAllFlowFilesTransferred(PutHive_1_1QL.REL_RETRY, 0); + } + + /** + * Simple implementation only for testing purposes + */ + private static class MockDBCPService extends AbstractControllerService implements Hive_1_1DBCPService { + private final String dbLocation; + + MockDBCPService(final String dbLocation) { + this.dbLocation = dbLocation; + } + + @Override + public String getIdentifier() { + return "dbcp"; + } + + @Override + public Connection getConnection() throws ProcessException { + try { + Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); + return DriverManager.getConnection("jdbc:derby:" + dbLocation + ";create=true"); + } catch (final Exception e) { + e.printStackTrace(); + throw new ProcessException("getConnection failed: " + e); + } + } + + @Override + public String getConnectionURL() { + return "jdbc:derby:" + dbLocation + ";create=true"; + } + } + + /** + * Simple implementation only for testing purposes + */ + private static class SQLExceptionService extends AbstractControllerService implements Hive_1_1DBCPService { + private final Hive_1_1DBCPService service; + private int allowedBeforeFailure = 0; + private int successful = 0; + private int errorCode = 30000; // Default to a retryable exception code + + SQLExceptionService(final Hive_1_1DBCPService service) { + this.service = service; + } + + @Override + public String getIdentifier() { + return "dbcp"; + } + + @Override + public Connection getConnection() throws ProcessException { + try { + if (++successful > allowedBeforeFailure) { + final Connection conn = Mockito.mock(Connection.class); + Mockito.when(conn.prepareStatement(Mockito.any(String.class))).thenThrow(new SQLException("Unit Test Generated SQLException", "42000", errorCode)); + return conn; + } else { + return service.getConnection(); + } + } catch (final Exception e) { + e.printStackTrace(); + throw new ProcessException("getConnection failed: " + e); + } + } + + @Override + public String getConnectionURL() { + return service != null ? service.getConnectionURL() : null; + } + + void setErrorCode(int errorCode) { + this.errorCode = errorCode; + } + } +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive_1_1QL.java b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive_1_1QL.java new file mode 100644 index 0000000000..eba231baaa --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/java/org/apache/nifi/processors/hive/TestSelectHive_1_1QL.java @@ -0,0 +1,658 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.hive; + +import org.apache.avro.file.DataFileStream; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumReader; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.dbcp.DBCPService; +import org.apache.nifi.dbcp.hive.Hive_1_1DBCPService; +import org.apache.nifi.flowfile.attributes.CoreAttributes; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.provenance.ProvenanceEventRecord; +import org.apache.nifi.provenance.ProvenanceEventType; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.apache.nifi.util.hive.HiveJdbcCommon; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import static org.apache.nifi.processors.hive.SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT; +import static org.apache.nifi.util.hive.HiveJdbcCommon.AVRO; +import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV; +import static org.apache.nifi.util.hive.HiveJdbcCommon.CSV_MIME_TYPE; +import static org.apache.nifi.util.hive.HiveJdbcCommon.MIME_TYPE_AVRO_BINARY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestSelectHive_1_1QL { + + private static final Logger LOGGER; + private final static String MAX_ROWS_KEY = "maxRows"; + private final int NUM_OF_ROWS = 100; + + + static { + System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info"); + System.setProperty("org.slf4j.simpleLogger.showDateTime", "true"); + System.setProperty("org.slf4j.simpleLogger.log.nifi.io.nio", "debug"); + System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.SelectHive_1_1QL", "debug"); + System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hive.TestSelectHive_1_1QL", "debug"); + LOGGER = LoggerFactory.getLogger(TestSelectHive_1_1QL.class); + } + + private final static String DB_LOCATION = "target/db"; + + private final static String QUERY_WITH_EL = "select " + + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" + + " from persons PER" + + " where PER.ID > ${person.id}"; + + private final static String QUERY_WITHOUT_EL = "select " + + " PER.ID as PersonId, PER.NAME as PersonName, PER.CODE as PersonCode" + + " from persons PER" + + " where PER.ID > 10"; + + + @BeforeClass + public static void setupClass() { + System.setProperty("derby.stream.error.file", "target/derby.log"); + } + + private TestRunner runner; + + @Before + public void setup() throws InitializationException { + final DBCPService dbcp = new DBCPServiceSimpleImpl(); + final Map dbcpProperties = new HashMap<>(); + + runner = TestRunners.newTestRunner(SelectHive_1_1QL.class); + runner.addControllerService("dbcp", dbcp, dbcpProperties); + runner.enableControllerService(dbcp); + runner.setProperty(SelectHive_1_1QL.HIVE_DBCP_SERVICE, "dbcp"); + } + + @Test + public void testIncomingConnectionWithNoFlowFile() throws InitializationException { + runner.setIncomingConnection(true); + runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM persons"); + runner.run(); + runner.assertTransferCount(SelectHive_1_1QL.REL_SUCCESS, 0); + runner.assertTransferCount(SelectHive_1_1QL.REL_FAILURE, 0); + } + + @Test + public void testNoIncomingConnection() throws ClassNotFoundException, SQLException, InitializationException, IOException { + runner.setIncomingConnection(false); + invokeOnTrigger(QUERY_WITHOUT_EL, false, "Avro"); + + final List provenanceEvents = runner.getProvenanceEvents(); + final ProvenanceEventRecord provenance0 = provenanceEvents.get(0); + assertEquals(ProvenanceEventType.RECEIVE, provenance0.getEventType()); + assertEquals("jdbc:derby:target/db;create=true", provenance0.getTransitUri()); + } + + @Test + public void testNoTimeLimit() throws InitializationException, ClassNotFoundException, SQLException, IOException { + invokeOnTrigger(QUERY_WITH_EL, true, "Avro"); + + final List provenanceEvents = runner.getProvenanceEvents(); + assertEquals(3, provenanceEvents.size()); + + final ProvenanceEventRecord provenance0 = provenanceEvents.get(0); + assertEquals(ProvenanceEventType.FORK, provenance0.getEventType()); + + final ProvenanceEventRecord provenance1 = provenanceEvents.get(1); + assertEquals(ProvenanceEventType.FETCH, provenance1.getEventType()); + assertEquals("jdbc:derby:target/db;create=true", provenance1.getTransitUri()); + + final ProvenanceEventRecord provenance2 = provenanceEvents.get(2); + assertEquals(ProvenanceEventType.FORK, provenance2.getEventType()); + } + + + @Test + public void testWithNullIntColumn() throws SQLException { + // remove previous test database, if any + final File dbLocation = new File(DB_LOCATION); + dbLocation.delete(); + + // load test data to database + final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection(); + Statement stmt = con.createStatement(); + + try { + stmt.execute("drop table TEST_NULL_INT"); + } catch (final SQLException sqle) { + // Nothing to do, probably means the table didn't exist + } + + stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, constraint my_pk primary key (id))"); + + stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (0, NULL, 1)"); + stmt.execute("insert into TEST_NULL_INT (id, val1, val2) VALUES (1, 1, 1)"); + + runner.setIncomingConnection(false); + runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_NULL_INT"); + runner.run(); + + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1); + runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0).assertAttributeEquals(SelectHive_1_1QL.RESULT_ROW_COUNT, "2"); + } + + @Test + public void testWithSqlException() throws SQLException { + // remove previous test database, if any + final File dbLocation = new File(DB_LOCATION); + dbLocation.delete(); + + // load test data to database + final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection(); + Statement stmt = con.createStatement(); + + try { + stmt.execute("drop table TEST_NO_ROWS"); + } catch (final SQLException sqle) { + // Nothing to do, probably means the table didn't exist + } + + stmt.execute("create table TEST_NO_ROWS (id integer)"); + + runner.setIncomingConnection(false); + // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) + runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT val1 FROM TEST_NO_ROWS"); + runner.run(); + + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); + } + + @Test + public void invokeOnTriggerExceptionInPreQieriesNoIncomingFlows() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + + doOnTrigger(QUERY_WITHOUT_EL, false, CSV, + "select 'no exception' from persons; select exception from persons", + null); + + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); + } + + @Test + public void invokeOnTriggerExceptionInPreQieriesWithIncomingFlows() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + + doOnTrigger(QUERY_WITHOUT_EL, true, CSV, + "select 'no exception' from persons; select exception from persons", + null); + + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); + } + + @Test + public void invokeOnTriggerExceptionInPostQieriesNoIncomingFlows() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + + doOnTrigger(QUERY_WITHOUT_EL, false, CSV, + null, + "select 'no exception' from persons; select exception from persons"); + + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); + } + + @Test + public void invokeOnTriggerExceptionInPostQieriesWithIncomingFlows() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + + doOnTrigger(QUERY_WITHOUT_EL, true, CSV, + null, + "select 'no exception' from persons; select exception from persons"); + + // with incoming connections, it should be rolled back + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); + } + + @Test + public void testWithBadSQL() throws SQLException { + final String BAD_SQL = "create table TEST_NO_ROWS (id integer)"; + + // Test with incoming flow file (it should be routed to failure intact, i.e. same content and no parent) + runner.setIncomingConnection(true); + // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) + runner.enqueue(BAD_SQL); + runner.run(); + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); + MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0); + flowFile.assertContentEquals(BAD_SQL); + flowFile.assertAttributeEquals("parentIds", null); + runner.clearTransferState(); + + // Test with no incoming flow file (an empty flow file is transferred) + runner.setIncomingConnection(false); + // Try a valid SQL statement that will generate an error (val1 does not exist, e.g.) + runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, BAD_SQL); + runner.run(); + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_FAILURE, 1); + flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_FAILURE).get(0); + flowFile.assertContentEquals(""); + } + + @Test + public void invokeOnTriggerWithCsv() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV); + } + + @Test + public void invokeOnTriggerWithAvro() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + invokeOnTrigger(QUERY_WITHOUT_EL, false, AVRO); + } + + @Test + public void invokeOnTriggerWithValidPreQieries() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, + "select '1' from persons; select '2' from persons", //should not be 'select'. But Derby driver doesn't support "set param=val" format. + null); + } + + @Test + public void invokeOnTriggerWithValidPostQieries() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, + null, + //should not be 'select'. But Derby driver doesn't support "set param=val" format, + //so just providing any "compilable" query. + " select '4' from persons; \nselect '5' from persons"); + } + + @Test + public void invokeOnTriggerWithValidPrePostQieries() + throws InitializationException, ClassNotFoundException, SQLException, IOException { + invokeOnTrigger(QUERY_WITHOUT_EL, false, CSV, + //should not be 'select'. But Derby driver doesn't support "set param=val" format, + //so just providing any "compilable" query. + "select '1' from persons; select '2' from persons", + " select '4' from persons; \nselect '5' from persons"); + } + + + public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat) + throws InitializationException, ClassNotFoundException, SQLException, IOException { + invokeOnTrigger(query, incomingFlowFile, outputFormat, null, null); + } + + public void invokeOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat, + String preQueries, String postQueries) + throws InitializationException, ClassNotFoundException, SQLException, IOException { + + TestRunner runner = doOnTrigger(query, incomingFlowFile, outputFormat, preQueries, postQueries); + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1); + + final List flowfiles = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS); + MockFlowFile flowFile = flowfiles.get(0); + final InputStream in = new ByteArrayInputStream(flowFile.toByteArray()); + long recordsFromStream = 0; + if (AVRO.equals(outputFormat)) { + assertEquals(MIME_TYPE_AVRO_BINARY, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); + final DatumReader datumReader = new GenericDatumReader<>(); + try (DataFileStream dataFileReader = new DataFileStream<>(in, datumReader)) { + GenericRecord record = null; + while (dataFileReader.hasNext()) { + // Reuse record object by passing it to next(). This saves us from + // allocating and garbage collecting many objects for files with + // many items. + record = dataFileReader.next(record); + recordsFromStream++; + } + } + } else { + assertEquals(CSV_MIME_TYPE, flowFile.getAttribute(CoreAttributes.MIME_TYPE.key())); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + + String headerRow = br.readLine(); + // Derby capitalizes column names + assertEquals("PERSONID,PERSONNAME,PERSONCODE", headerRow); + + // Validate rows + String line; + while ((line = br.readLine()) != null) { + recordsFromStream++; + String[] values = line.split(","); + if (recordsFromStream < (NUM_OF_ROWS - 10)) { + assertEquals(3, values.length); + assertTrue(values[1].startsWith("\"")); + assertTrue(values[1].endsWith("\"")); + } else { + assertEquals(2, values.length); // Middle value is null + } + } + } + assertEquals(NUM_OF_ROWS - 10, recordsFromStream); + assertEquals(recordsFromStream, Integer.parseInt(flowFile.getAttribute(SelectHive_1_1QL.RESULT_ROW_COUNT))); + flowFile.assertAttributeEquals(AbstractHive_1_1QLProcessor.ATTR_INPUT_TABLES, "persons"); + } + + public TestRunner doOnTrigger(final String query, final boolean incomingFlowFile, String outputFormat, + String preQueries, String postQueries) + throws InitializationException, ClassNotFoundException, SQLException, IOException { + + // remove previous test database, if any + final File dbLocation = new File(DB_LOCATION); + dbLocation.delete(); + + // load test data to database + final Connection con = ((Hive_1_1DBCPService) runner.getControllerService("dbcp")).getConnection(); + final Statement stmt = con.createStatement(); + try { + stmt.execute("drop table persons"); + } catch (final SQLException sqle) { + // Nothing to do here, the table didn't exist + } + + stmt.execute("create table persons (id integer, name varchar(100), code integer)"); + Random rng = new Random(53496); + stmt.executeUpdate("insert into persons values (1, 'Joe Smith', " + rng.nextInt(469947) + ")"); + for (int i = 2; i < NUM_OF_ROWS; i++) { + stmt.executeUpdate("insert into persons values (" + i + ", 'Someone Else', " + rng.nextInt(469947) + ")"); + } + stmt.executeUpdate("insert into persons values (" + NUM_OF_ROWS + ", 'Last Person', NULL)"); + + LOGGER.info("test data loaded"); + + runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, query); + runner.setProperty(HIVEQL_OUTPUT_FORMAT, outputFormat); + if (preQueries != null) { + runner.setProperty(SelectHive_1_1QL.HIVEQL_PRE_QUERY, preQueries); + } + if (postQueries != null) { + runner.setProperty(SelectHive_1_1QL.HIVEQL_POST_QUERY, postQueries); + } + + if (incomingFlowFile) { + // incoming FlowFile content is not used, but attributes are used + final Map attributes = new HashMap<>(); + attributes.put("person.id", "10"); + runner.enqueue("Hello".getBytes(), attributes); + } + + runner.setIncomingConnection(incomingFlowFile); + runner.run(); + + return runner; + } + + @Test + public void testMaxRowsPerFlowFileAvro() throws ClassNotFoundException, SQLException, InitializationException, IOException { + + // load test data to database + final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); + Statement stmt = con.createStatement(); + InputStream in; + MockFlowFile mff; + + try { + stmt.execute("drop table TEST_QUERY_DB_TABLE"); + } catch (final SQLException sqle) { + // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] + } + + stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); + int rowCount = 0; + //create larger row set + for (int batch = 0; batch < 100; batch++) { + stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); + rowCount++; + } + + runner.setIncomingConnection(false); + runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE"); + runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); + runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO); + runner.setVariable(MAX_ROWS_KEY, "9"); + + runner.run(); + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12); + + //ensure all but the last file have 9 records each + for (int ff = 0; ff < 11; ff++) { + mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff); + in = new ByteArrayInputStream(mff.toByteArray()); + assertEquals(9, getNumberOfRecordsFromStream(in)); + + mff.assertAttributeExists("fragment.identifier"); + assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index")); + assertEquals("12", mff.getAttribute("fragment.count")); + } + + //last file should have 1 record + mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11); + in = new ByteArrayInputStream(mff.toByteArray()); + assertEquals(1, getNumberOfRecordsFromStream(in)); + mff.assertAttributeExists("fragment.identifier"); + assertEquals(Integer.toString(11), mff.getAttribute("fragment.index")); + assertEquals("12", mff.getAttribute("fragment.count")); + runner.clearTransferState(); + } + + @Test + public void testParametrizedQuery() throws ClassNotFoundException, SQLException, InitializationException, IOException { + // load test data to database + final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); + Statement stmt = con.createStatement(); + + try { + stmt.execute("drop table TEST_QUERY_DB_TABLE"); + } catch (final SQLException sqle) { + // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] + } + + stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); + int rowCount = 0; + //create larger row set + for (int batch = 0; batch < 100; batch++) { + stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); + rowCount++; + } + + runner.setIncomingConnection(true); + runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); + runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.AVRO); + runner.setVariable(MAX_ROWS_KEY, "9"); + + Map attributes = new HashMap(); + attributes.put("hiveql.args.1.value", "1"); + attributes.put("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE WHERE id = ?", attributes ); + + runner.run(); + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 1); + MockFlowFile flowFile = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(0); + // Assert the attributes from the incoming flow file are preserved in the outgoing flow file(s) + flowFile.assertAttributeEquals("hiveql.args.1.value", "1"); + flowFile.assertAttributeEquals("hiveql.args.1.type", String.valueOf(Types.INTEGER)); + runner.clearTransferState(); + } + + @Test + public void testMaxRowsPerFlowFileCSV() throws ClassNotFoundException, SQLException, InitializationException, IOException { + + // load test data to database + final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); + Statement stmt = con.createStatement(); + InputStream in; + MockFlowFile mff; + + try { + stmt.execute("drop table TEST_QUERY_DB_TABLE"); + } catch (final SQLException sqle) { + // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] + } + + stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); + int rowCount = 0; + //create larger row set + for (int batch = 0; batch < 100; batch++) { + stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); + rowCount++; + } + + runner.setIncomingConnection(true); + runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "${" + MAX_ROWS_KEY + "}"); + runner.setProperty(SelectHive_1_1QL.HIVEQL_OUTPUT_FORMAT, HiveJdbcCommon.CSV); + + runner.enqueue("SELECT * FROM TEST_QUERY_DB_TABLE", new HashMap() {{ + put(MAX_ROWS_KEY, "9"); + }}); + + runner.run(); + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, 12); + + //ensure all but the last file have 9 records (10 lines = 9 records + header) each + for (int ff = 0; ff < 11; ff++) { + mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(ff); + in = new ByteArrayInputStream(mff.toByteArray()); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + assertEquals(10, br.lines().count()); + + mff.assertAttributeExists("fragment.identifier"); + assertEquals(Integer.toString(ff), mff.getAttribute("fragment.index")); + assertEquals("12", mff.getAttribute("fragment.count")); + } + + //last file should have 1 record (2 lines = 1 record + header) + mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(11); + in = new ByteArrayInputStream(mff.toByteArray()); + BufferedReader br = new BufferedReader(new InputStreamReader(in)); + assertEquals(2, br.lines().count()); + mff.assertAttributeExists("fragment.identifier"); + assertEquals(Integer.toString(11), mff.getAttribute("fragment.index")); + assertEquals("12", mff.getAttribute("fragment.count")); + runner.clearTransferState(); + } + + @Test + public void testMaxRowsPerFlowFileWithMaxFragments() throws ClassNotFoundException, SQLException, InitializationException, IOException { + + // load test data to database + final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); + Statement stmt = con.createStatement(); + InputStream in; + MockFlowFile mff; + + try { + stmt.execute("drop table TEST_QUERY_DB_TABLE"); + } catch (final SQLException sqle) { + // Ignore this error, probably a "table does not exist" since Derby doesn't yet support DROP IF EXISTS [DERBY-4842] + } + + stmt.execute("create table TEST_QUERY_DB_TABLE (id integer not null, name varchar(100), scale float, created_on timestamp, bignum bigint default 0)"); + int rowCount = 0; + //create larger row set + for (int batch = 0; batch < 100; batch++) { + stmt.execute("insert into TEST_QUERY_DB_TABLE (id, name, scale, created_on) VALUES (" + rowCount + ", 'Joe Smith', 1.0, '1962-09-23 03:23:34.234')"); + rowCount++; + } + + runner.setIncomingConnection(false); + runner.setProperty(SelectHive_1_1QL.HIVEQL_SELECT_QUERY, "SELECT * FROM TEST_QUERY_DB_TABLE"); + runner.setProperty(SelectHive_1_1QL.MAX_ROWS_PER_FLOW_FILE, "9"); + Integer maxFragments = 3; + runner.setProperty(SelectHive_1_1QL.MAX_FRAGMENTS, maxFragments.toString()); + + runner.run(); + runner.assertAllFlowFilesTransferred(SelectHive_1_1QL.REL_SUCCESS, maxFragments); + + for (int i = 0; i < maxFragments; i++) { + mff = runner.getFlowFilesForRelationship(SelectHive_1_1QL.REL_SUCCESS).get(i); + in = new ByteArrayInputStream(mff.toByteArray()); + assertEquals(9, getNumberOfRecordsFromStream(in)); + + mff.assertAttributeExists("fragment.identifier"); + assertEquals(Integer.toString(i), mff.getAttribute("fragment.index")); + assertEquals(maxFragments.toString(), mff.getAttribute("fragment.count")); + } + + runner.clearTransferState(); + } + + private long getNumberOfRecordsFromStream(InputStream in) throws IOException { + final DatumReader datumReader = new GenericDatumReader<>(); + try (DataFileStream dataFileReader = new DataFileStream<>(in, datumReader)) { + GenericRecord record = null; + long recordsFromStream = 0; + while (dataFileReader.hasNext()) { + // Reuse record object by passing it to next(). This saves us from + // allocating and garbage collecting many objects for files with + // many items. + record = dataFileReader.next(record); + recordsFromStream += 1; + } + + return recordsFromStream; + } + } + + /** + * Simple implementation only for SelectHive_1_1QL processor testing. + */ + private class DBCPServiceSimpleImpl extends AbstractControllerService implements Hive_1_1DBCPService { + + @Override + public String getIdentifier() { + return "dbcp"; + } + + @Override + public Connection getConnection() throws ProcessException { + try { + Class.forName("org.apache.derby.jdbc.EmbeddedDriver"); + return DriverManager.getConnection("jdbc:derby:" + DB_LOCATION + ";create=true"); + } catch (final Exception e) { + throw new ProcessException("getConnection failed: " + e); + } + } + + @Override + public String getConnectionURL() { + return "jdbc:derby:" + DB_LOCATION + ";create=true"; + } + } +} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/array_of_records.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/array_of_records.avsc new file mode 100644 index 0000000000..19cac6e60f --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/array_of_records.avsc @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + { + "namespace" : "org.apache.nifi", + "name" : "outer_record", + "type" : "record", + "fields" : [ { + "name" : "records", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "inner_record", + "fields" : [ { + "name" : "name", + "type" : "string" + }, { + "name" : "age", + "type" : "int" + } ] + } + } + } ] +} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site-security.xml new file mode 100644 index 0000000000..eefc74ecbe --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site-security.xml @@ -0,0 +1,30 @@ + + + + + + fs.default.name + hdfs://hive + + + hadoop.security.authentication + kerberos + + + hadoop.security.authorization + true + + \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site.xml new file mode 100644 index 0000000000..8f4a91a4ec --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/core-site.xml @@ -0,0 +1,22 @@ + + + + + + fs.default.name + hdfs://hive + + \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/fake.keytab b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/fake.keytab new file mode 100644 index 0000000000..e69de29bb2 diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site-security.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site-security.xml new file mode 100644 index 0000000000..4d64c951a4 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site-security.xml @@ -0,0 +1,30 @@ + + + + + + fs.default.name + hdfs://hive + + + hive.server2.authentication + KERBEROS + + + hadoop.security.authentication + kerberos + + \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site.xml b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site.xml new file mode 100644 index 0000000000..7e7f86cf28 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/hive-site.xml @@ -0,0 +1,22 @@ + + + + + + fs.default.name + file:/// + + \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/krb5.conf b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/krb5.conf new file mode 100644 index 0000000000..323da39be9 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/krb5.conf @@ -0,0 +1,10 @@ +[libdefaults] + default_realm = EXAMPLE.COM + dns_lookup_kdc = false + dns_lookup_realm = false + +[realms] + EXAMPLE.COM = { + kdc = kerberos.example.com + admin_server = kerberos.example.com + } \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/user.avsc b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/user.avsc new file mode 100644 index 0000000000..95ef6e4fd0 --- /dev/null +++ b/nifi-nar-bundles/nifi-hive-bundle/nifi-hive_1_1-processors/src/test/resources/user.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]}, + {"name": "scale", "type": ["double", "null"]} + ] +} diff --git a/nifi-nar-bundles/nifi-hive-bundle/pom.xml b/nifi-nar-bundles/nifi-hive-bundle/pom.xml index ae705a3d1b..af1967f1de 100644 --- a/nifi-nar-bundles/nifi-hive-bundle/pom.xml +++ b/nifi-nar-bundles/nifi-hive-bundle/pom.xml @@ -31,6 +31,8 @@ nifi-hive-services-api-nar nifi-hive-processors nifi-hive-nar + nifi-hive_1_1-processors + nifi-hive_1_1-nar nifi-hive3-processors nifi-hive3-nar @@ -47,6 +49,8 @@ + 1.1.1 + 2.6.2 1.2.1 2.6.2 3.1.0